diff --git a/ogb_examples/graphproppred/main_pgl.py b/ogb_examples/graphproppred/main_pgl.py deleted file mode 100644 index 5b3c588b99b20b01652e9cb7d0054f69d244c222..0000000000000000000000000000000000000000 --- a/ogb_examples/graphproppred/main_pgl.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""test ogb -""" -import argparse -import ssl -ssl._create_default_https_context = ssl._create_unverified_context -# SSL - -import torch -import pgl -import numpy as np -import paddle.fluid as fluid -from pgl.contrib.ogb.graphproppred.dataset_pgl import PglGraphPropPredDataset -from pgl.utils import paddle_helper -from ogb.graphproppred import Evaluator -from pgl.contrib.ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder - - -def train(exe, batch_size, graph_wrapper, train_program, splitted_idx, dataset, - evaluator, fetch_loss, fetch_pred): - """Train""" - graphs, labels = dataset[splitted_idx["train"]] - perm = np.arange(0, len(graphs)) - np.random.shuffle(perm) - start_batch = 0 - batch_no = 0 - pred_output = np.zeros_like(labels, dtype="float32") - while start_batch < len(perm): - batch_index = perm[start_batch:start_batch + batch_size] - start_batch += batch_size - batch_graph = pgl.graph.MultiGraph(graphs[batch_index]) - batch_label = labels[batch_index] - batch_valid = (batch_label == batch_label).astype("float32") - batch_label = np.nan_to_num(batch_label).astype("float32") - feed_dict = graph_wrapper.to_feed(batch_graph) - feed_dict["label"] = batch_label - feed_dict["weight"] = batch_valid - loss, pred = exe.run(train_program, - feed=feed_dict, - fetch_list=[fetch_loss, fetch_pred]) - - if batch_no % 100 == 0: - print("loss: %s" % loss[0]) - pred_output[batch_index] = pred - batch_no += 1 - print("train", evaluator.eval({"y_true": labels, "y_pred": pred_output})) - - -def evaluate(exe, batch_size, graph_wrapper, val_program, splitted_idx, - dataset, mode, evaluator, fetch_pred): - """Eval""" - graphs, labels = dataset[splitted_idx[mode]] - perm = np.arange(0, len(graphs)) - start_batch = 0 - batch_no = 0 - pred_output = np.zeros_like(labels, dtype="float32") - while start_batch < len(perm): - batch_index = perm[start_batch:start_batch + batch_size] - start_batch += batch_size - batch_graph = pgl.graph.MultiGraph(graphs[batch_index]) - feed_dict = graph_wrapper.to_feed(batch_graph) - pred = exe.run(val_program, feed=feed_dict, fetch_list=[fetch_pred]) - pred_output[batch_index] = pred[0] - batch_no += 1 - print(mode, evaluator.eval({"y_true": labels, "y_pred": pred_output})) - - -def send_func(src_feat, dst_feat, edge_feat): - """Send""" - return src_feat["h"] + edge_feat["h"] - - -class GNNModel(object): - """GNNModel""" - - def __init__(self, name, emb_dim, num_task, num_layers): - self.num_task = num_task - self.emb_dim = emb_dim - self.num_layers = num_layers - self.name = name - self.atom_encoder = AtomEncoder(name=name, emb_dim=emb_dim) - self.bond_encoder = BondEncoder(name=name, emb_dim=emb_dim) - - def forward(self, graph): - """foward""" - h_node = self.atom_encoder(graph.node_feat['feat']) - h_edge = self.bond_encoder(graph.edge_feat['feat']) - for layer in range(self.num_layers): - msg = graph.send( - send_func, - nfeat_list=[("h", h_node)], - efeat_list=[("h", h_edge)]) - h_node = graph.recv(msg, 'sum') + h_node - h_node = fluid.layers.fc(h_node, - size=self.emb_dim, - name=self.name + '_%s' % layer, - act="relu") - graph_nodes = pgl.layers.graph_pooling(graph, h_node, "average") - graph_pred = fluid.layers.fc(graph_nodes, self.num_task, name="final") - return graph_pred - - -def main(): - """main - """ - # Training settings - parser = argparse.ArgumentParser(description='Graph Dataset') - parser.add_argument('--use_cuda', action='store_true') - parser.add_argument( - '--epochs', - type=int, - default=100, - help='number of epochs to train (default: 100)') - parser.add_argument( - '--dataset', - type=str, - default="ogbg-molhiv", - help='dataset name (default: proteinfunc)') - args = parser.parse_args() - - place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() - - ### automatic dataloading and splitting - dataset = PglGraphPropPredDataset(name=args.dataset) - splitted_idx = dataset.get_idx_split() - - ### automatic evaluator. takes dataset name as input - evaluator = Evaluator(args.dataset) - - graph_data, label = dataset[:2] - batch_graph = pgl.graph.MultiGraph(graph_data) - graph_data = batch_graph - - train_program = fluid.Program() - startup_program = fluid.Program() - test_program = fluid.Program() - # degree normalize - graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype("int64") - graph_data.node_feat["feat"] = graph_data.node_feat["feat"].astype("int64") - - model = GNNModel( - name="gnn", num_task=dataset.num_tasks, emb_dim=64, num_layers=2) - - with fluid.program_guard(train_program, startup_program): - gw = pgl.graph_wrapper.GraphWrapper( - "graph", - place=place, - node_feat=graph_data.node_feat_info(), - edge_feat=graph_data.edge_feat_info()) - pred = model.forward(gw) - sigmoid_pred = fluid.layers.sigmoid(pred) - - val_program = train_program.clone(for_test=True) - - initializer = [] - with fluid.program_guard(train_program, startup_program): - train_label = fluid.layers.data( - name="label", dtype="float32", shape=[None, dataset.num_tasks]) - train_weight = fluid.layers.data( - name="weight", dtype="float32", shape=[None, dataset.num_tasks]) - train_loss_t = fluid.layers.sigmoid_cross_entropy_with_logits( - x=pred, label=train_label) * train_weight - train_loss_t = fluid.layers.reduce_sum(train_loss_t) - - adam = fluid.optimizer.Adam( - learning_rate=1e-2, - regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0005)) - adam.minimize(train_loss_t) - - exe = fluid.Executor(place) - exe.run(startup_program) - - for epoch in range(1, args.epochs + 1): - print("Epoch", epoch) - train(exe, 128, gw, train_program, splitted_idx, dataset, evaluator, - train_loss_t, sigmoid_pred) - evaluate(exe, 128, gw, val_program, splitted_idx, dataset, "valid", - evaluator, sigmoid_pred) - evaluate(exe, 128, gw, val_program, splitted_idx, dataset, "test", - evaluator, sigmoid_pred) - - -if __name__ == "__main__": - main() diff --git a/ogb_examples/graphproppred/mol/README.md b/ogb_examples/graphproppred/mol/README.md index b42cf070b5893d1bcd70e8fe4a8c3b28d7444f9e..56694ec862bf998e1aa804adbd1cd94e8bfeb991 100644 --- a/ogb_examples/graphproppred/mol/README.md +++ b/ogb_examples/graphproppred/mol/README.md @@ -4,8 +4,8 @@ ### Requirements -- paddlpaddle 1.7.1 -- pgl 1.0.1 +- paddlpaddle >= 1.7.1 +- pgl 1.0.2 - ogb NOTE: To install ogb that is fited for this project, run below command to install ogb diff --git a/ogb_examples/graphproppred/ogbg_ppi.py b/ogb_examples/graphproppred/ogbg_ppi.py deleted file mode 100644 index e9ff287fa3b2956f25383c0c7eee35373ddc16e0..0000000000000000000000000000000000000000 --- a/ogb_examples/graphproppred/ogbg_ppi.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""test ogb -""" -import argparse -import ssl -ssl._create_default_https_context = ssl._create_unverified_context -# SSL - -import torch -import pgl -import numpy as np -import paddle.fluid as fluid -from pgl.contrib.ogb.graphproppred.dataset_pgl import PglGraphPropPredDataset -from pgl.utils import paddle_helper -from ogb.graphproppred import Evaluator -from pgl.contrib.ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder - - -def train(exe, batch_size, graph_wrapper, train_program, splitted_idx, dataset, - evaluator, fetch_loss, fetch_pred): - """Train""" - graphs, labels = dataset[splitted_idx["train"]] - perm = np.arange(0, len(graphs)) - np.random.shuffle(perm) - start_batch = 0 - batch_no = 0 - pred_output = np.zeros_like(labels, dtype="float32") - while start_batch < len(perm): - batch_index = perm[start_batch:start_batch + batch_size] - start_batch += batch_size - batch_graph = pgl.graph.MultiGraph(graphs[batch_index]) - batch_label = labels[batch_index] - batch_valid = (batch_label == batch_label).astype("float32") - batch_label = np.nan_to_num(batch_label).astype("float32") - feed_dict = graph_wrapper.to_feed(batch_graph) - feed_dict["label"] = batch_label - feed_dict["weight"] = batch_valid - loss, pred = exe.run(train_program, - feed=feed_dict, - fetch_list=[fetch_loss, fetch_pred]) - - if batch_no % 100 == 0: - print("loss: %s" % loss[0]) - pred_output[batch_index] = pred - batch_no += 1 - print("train", evaluator.eval({"y_true": labels, "y_pred": pred_output})) - - -def evaluate(exe, batch_size, graph_wrapper, val_program, splitted_idx, - dataset, mode, evaluator, fetch_pred): - """Eval""" - graphs, labels = dataset[splitted_idx[mode]] - perm = np.arange(0, len(graphs)) - start_batch = 0 - batch_no = 0 - pred_output = np.zeros_like(labels, dtype="float32") - while start_batch < len(perm): - batch_index = perm[start_batch:start_batch + batch_size] - start_batch += batch_size - batch_graph = pgl.graph.MultiGraph(graphs[batch_index]) - feed_dict = graph_wrapper.to_feed(batch_graph) - pred = exe.run(val_program, feed=feed_dict, fetch_list=[fetch_pred]) - pred_output[batch_index] = pred[0] - batch_no += 1 - print(mode, evaluator.eval({"y_true": labels, "y_pred": pred_output})) - - -def send_func(src_feat, dst_feat, edge_feat): - """Send""" - return src_feat["h"] + edge_feat["h"] - - -class GNNModel(object): - """GNNModel""" - - def __init__(self, name, emb_dim, num_task, num_layers): - self.num_task = num_task - self.emb_dim = emb_dim - self.num_layers = num_layers - self.name = name - self.atom_encoder = AtomEncoder(name=name, emb_dim=emb_dim) - self.bond_encoder = BondEncoder(name=name, emb_dim=emb_dim) - - def edges_encoder(self, inputs, name): - outputs = fluid.layers.fc(inputs, - size=self.emb_dim, - param_attr=fluid.ParamAttr(name=name)) - return outputs - - def forward(self, graph): - """foward""" - # h_node = self.atom_encoder(graph.node_feat['feat']) - h_node = fluid.layers.embedding( - graph.node_feat['feat'], size=[1, self.emb_dim]) - - # h_edge = self.bond_encoder(graph.edge_feat['feat']) - h_edge = self.edges_encoder( - graph.edge_feat['feat'], name="edges_encoder") - for layer in range(self.num_layers): - msg = graph.send( - send_func, - nfeat_list=[("h", h_node)], - efeat_list=[("h", h_edge)]) - h_node = graph.recv(msg, 'sum') + h_node - h_node = fluid.layers.fc(h_node, - size=self.emb_dim, - name=self.name + '_%s' % layer, - act="relu") - graph_nodes = pgl.layers.graph_pooling(graph, h_node, "average") - graph_pred = fluid.layers.fc(graph_nodes, self.num_task, name="final") - return graph_pred - - -def main(): - """main - """ - # Training settings - parser = argparse.ArgumentParser(description='Graph Dataset') - parser.add_argument('--use_cuda', action='store_true') - parser.add_argument( - '--epochs', - type=int, - default=100, - help='number of epochs to train (default: 100)') - parser.add_argument( - '--dataset', - type=str, - default="ogbg-molhiv", - help='dataset name (default: proteinfunc)') - args = parser.parse_args() - - place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() - - ### automatic dataloading and splitting - dataset = PglGraphPropPredDataset(name=args.dataset) - - for i in range(len(dataset)): - g, l = dataset[i] - if not g.node_feat: - g.node_feat['feat'] = np.array( - [0 for _ in range(g.num_nodes)]).reshape(-1, 1) - - splitted_idx = dataset.get_idx_split() - - ### automatic evaluator. takes dataset name as input - evaluator = Evaluator(args.dataset) - - graph_data, label = dataset[:2] - batch_graph = pgl.graph.MultiGraph(graph_data) - graph_data = batch_graph - - train_program = fluid.Program() - startup_program = fluid.Program() - test_program = fluid.Program() - # degree normalize - graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype( - "float32") - graph_data.node_feat["feat"] = graph_data.node_feat["feat"].astype("int64") - - model = GNNModel( - name="gnn", num_task=dataset.num_tasks, emb_dim=64, num_layers=2) - - with fluid.program_guard(train_program, startup_program): - gw = pgl.graph_wrapper.GraphWrapper( - "graph", - place=place, - node_feat=graph_data.node_feat_info(), - edge_feat=graph_data.edge_feat_info()) - pred = model.forward(gw) - sigmoid_pred = fluid.layers.sigmoid(pred) - - val_program = train_program.clone(for_test=True) - - initializer = [] - with fluid.program_guard(train_program, startup_program): - train_label = fluid.layers.data( - name="label", dtype="float32", shape=[None, dataset.num_tasks]) - train_weight = fluid.layers.data( - name="weight", dtype="float32", shape=[None, dataset.num_tasks]) - train_loss_t = fluid.layers.sigmoid_cross_entropy_with_logits( - x=pred, label=train_label) * train_weight - train_loss_t = fluid.layers.reduce_sum(train_loss_t) - - adam = fluid.optimizer.Adam( - learning_rate=1e-2, - regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0005)) - adam.minimize(train_loss_t) - - exe = fluid.Executor(place) - exe.run(startup_program) - - for epoch in range(1, args.epochs + 1): - print("Epoch", epoch) - train(exe, 128, gw, train_program, splitted_idx, dataset, evaluator, - train_loss_t, sigmoid_pred) - evaluate(exe, 128, gw, val_program, splitted_idx, dataset, "valid", - evaluator, sigmoid_pred) - evaluate(exe, 128, gw, val_program, splitted_idx, dataset, "test", - evaluator, sigmoid_pred) - - -if __name__ == "__main__": - main()