diff --git a/examples/deeper_gcn/README.md b/examples/deeper_gcn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e67e51e44cfbf575f7121f6d3d2987ad2b91e7fc --- /dev/null +++ b/examples/deeper_gcn/README.md @@ -0,0 +1,33 @@ +# DeeperGCN: All You Need to Train Deeper GCNs + +see more information in https://arxiv.org/pdf/2006.07739.pdf + + +### Datasets + +The datasets contain three citation networks: CORA, PUBMED, CITESEER. The details for these three datasets can be found in the [paper](https://arxiv.org/abs/1609.02907). + +### Dependencies + +- paddlepaddle>=1.6 +- pgl + +### Performance + +We train our models for 200 epochs and report the accuracy on the test dataset. + +| Dataset | Accuracy | +| --- | --- | +| Cora | ~77% | + +### How to run + +For examples, use gpu to train gat on cora dataset. +``` +python train.py --dataset cora --use_cuda +``` + +#### Hyperparameters + +- dataset: The citation dataset "cora", "citeseer", "pubmed". +- use_cuda: Use gpu if assign use_cuda. diff --git a/examples/deeper_gcn/model.py b/examples/deeper_gcn/model.py new file mode 100644 index 0000000000000000000000000000000000000000..f5f4126aab29a2900690ccd278ed101e5fb5aa6b --- /dev/null +++ b/examples/deeper_gcn/model.py @@ -0,0 +1,89 @@ +import pgl +import paddle.fluid as fluid + +def DeeperGCN(gw, feature, num_layers, + hidden_size, num_tasks, name, dropout_prob): + """Implementation of DeeperGCN, see the paper + "DeeperGCN: All You Need to Train Deeper GCNs" in + https://arxiv.org/pdf/2006.07739.pdf + + Args: + gw: Graph wrapper object + + feature: A tensor with shape (num_nodes, feature_size) + + num_layers: num of layers in DeeperGCN + + hidden_size: hidden_size in DeeperGCN + + num_tasks: final prediction + + name: deeper gcn layer names + + dropout_prob: dropout prob in DeeperGCN + + Return: + A tensor with shape (num_nodes, hidden_size) + """ + + beta = "dynamic" + feature = fluid.layers.fc(feature, + hidden_size, + bias_attr=False, + param_attr=fluid.ParamAttr(name=name + '_weight')) + + output = pgl.layers.gen_conv(gw, feature, name=name+"_gen_conv_0", beta=beta) + + for layer in range(num_layers): + # LN/BN->ReLU->GraphConv->Res + old_output = output + # 1. Layer Norm + output = fluid.layers.layer_norm( + output, + begin_norm_axis=1, + param_attr=fluid.ParamAttr( + name="norm_scale_%s_%d" % (name, layer), + initializer=fluid.initializer.Constant(1.0)), + bias_attr=fluid.ParamAttr( + name="norm_bias_%s_%d" % (name, layer), + initializer=fluid.initializer.Constant(0.0))) + + # 2. ReLU + output = fluid.layers.relu(output) + + #3. dropout + output = fluid.layers.dropout(output, + dropout_prob=dropout_prob, + dropout_implementation="upscale_in_train") + + #4 gen_conv + output = pgl.layers.gen_conv(gw, output, + name=name+"_gen_conv_%d"%layer, beta=beta) + + #5 res + output = output + old_output + + # final layer: LN + relu + droput + output = fluid.layers.layer_norm( + output, + begin_norm_axis=1, + param_attr=fluid.ParamAttr( + name="norm_scale_%s_%d" % (name, num_layers), + initializer=fluid.initializer.Constant(1.0)), + bias_attr=fluid.ParamAttr( + name="norm_bias_%s_%d" % (name, num_layers), + initializer=fluid.initializer.Constant(0.0))) + output = fluid.layers.relu(output) + output = fluid.layers.dropout(output, + dropout_prob=dropout_prob, + dropout_implementation="upscale_in_train") + + # final prediction + output = fluid.layers.fc(output, + num_tasks, + bias_attr=False, + param_attr=fluid.ParamAttr(name=name + '_final_weight')) + + return output + + diff --git a/examples/deeper_gcn/train.py b/examples/deeper_gcn/train.py new file mode 100644 index 0000000000000000000000000000000000000000..35ed8ac4cfc83f6693770b47ddf4f99cdd1f07f6 --- /dev/null +++ b/examples/deeper_gcn/train.py @@ -0,0 +1,155 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#-*- coding: utf-8 -*- +import pgl +from pgl import data_loader +from pgl.utils.logger import log +import paddle.fluid as fluid +import numpy as np +import time +import argparse +from pgl.utils.log_writer import LogWriter # vdl +from model import DeeperGCN + +def load(name): + if name == 'cora': + dataset = data_loader.CoraDataset() + elif name == "pubmed": + dataset = data_loader.CitationDataset("pubmed", symmetry_edges=False) + elif name == "citeseer": + dataset = data_loader.CitationDataset("citeseer", symmetry_edges=False) + else: + raise ValueError(name + " dataset doesn't exists") + return dataset + + +def main(args): + # vdl + writer = LogWriter("checkpoints/train_history") + + dataset = load(args.dataset) + place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() + train_program = fluid.Program() + startup_program = fluid.Program() + test_program = fluid.Program() + hidden_size = 64 + num_layers = 7 + + with fluid.program_guard(train_program, startup_program): + gw = pgl.graph_wrapper.GraphWrapper( + name="graph", + node_feat=dataset.graph.node_feat_info()) + + output = DeeperGCN(gw, + gw.node_feat["words"], + num_layers, + hidden_size, + dataset.num_classes, + "deepercnn", + 0.1) + + node_index = fluid.layers.data( + "node_index", + shape=[None, 1], + dtype="int64", + append_batch_size=False) + node_label = fluid.layers.data( + "node_label", + shape=[None, 1], + dtype="int64", + append_batch_size=False) + + pred = fluid.layers.gather(output, node_index) + loss, pred = fluid.layers.softmax_with_cross_entropy( + logits=pred, label=node_label, return_softmax=True) + acc = fluid.layers.accuracy(input=pred, label=node_label, k=1) + loss = fluid.layers.mean(loss) + + test_program = train_program.clone(for_test=True) + with fluid.program_guard(train_program, startup_program): + adam = fluid.optimizer.Adam( + regularization=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0005), + learning_rate=0.005) + adam.minimize(loss) + + exe = fluid.Executor(place) + exe.run(startup_program) + + feed_dict = gw.to_feed(dataset.graph) + + train_index = dataset.train_index + train_label = np.expand_dims(dataset.y[train_index], -1) + train_index = np.expand_dims(train_index, -1) + + val_index = dataset.val_index + val_label = np.expand_dims(dataset.y[val_index], -1) + val_index = np.expand_dims(val_index, -1) + + test_index = dataset.test_index + test_label = np.expand_dims(dataset.y[test_index], -1) + test_index = np.expand_dims(test_index, -1) + + # get beta param + beta_param_list = [] + for param in fluid.io.get_program_parameter(train_program): + if param.name.endswith("_beta"): + beta_param_list.append(param) + + dur = [] + for epoch in range(200): + if epoch >= 3: + t0 = time.time() + feed_dict["node_index"] = np.array(train_index, dtype="int64") + feed_dict["node_label"] = np.array(train_label, dtype="int64") + train_loss, train_acc = exe.run(train_program, + feed=feed_dict, + fetch_list=[loss, acc], + return_numpy=True) + for param in beta_param_list: + beta = np.array(fluid.global_scope().find_var(param.name).get_tensor()) + writer.add_scalar("beta/"+param.name, beta, epoch) + + if epoch >= 3: + time_per_epoch = 1.0 * (time.time() - t0) + dur.append(time_per_epoch) + + feed_dict["node_index"] = np.array(val_index, dtype="int64") + feed_dict["node_label"] = np.array(val_label, dtype="int64") + val_loss, val_acc = exe.run(test_program, + feed=feed_dict, + fetch_list=[loss, acc], + return_numpy=True) + + log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) + + "Train Loss: %f " % train_loss + "Train Acc: %f " % train_acc + + "Val Loss: %f " % val_loss + "Val Acc: %f " % val_acc) + + feed_dict["node_index"] = np.array(test_index, dtype="int64") + feed_dict["node_label"] = np.array(test_label, dtype="int64") + test_loss, test_acc = exe.run(test_program, + feed=feed_dict, + fetch_list=[loss, acc], + return_numpy=True) + log.info("Accuracy: %f" % test_acc) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='DeeperGCN') + parser.add_argument( + "--dataset", type=str, default="cora", help="dataset (cora, pubmed)") + parser.add_argument("--use_cuda", action='store_true', help="use_cuda") + args = parser.parse_args() + log.info(args) + main(args) diff --git a/pgl/__init__.py b/pgl/__init__.py index 93375e9ec5c7913334b02a05f6681de3f1ee4069..7543265d30492f8d1fe7a898f948166ae89001ea 100644 --- a/pgl/__init__.py +++ b/pgl/__init__.py @@ -21,3 +21,4 @@ from pgl import data_loader from pgl import heter_graph from pgl import heter_graph_wrapper from pgl import contrib +from pgl import message_passing diff --git a/pgl/layers/conv.py b/pgl/layers/conv.py index 68a1d733ed1d297e7a20daa1fb7c14828ff8722b..4d17c323b1f338f185bd3c90d60ac36741664886 100644 --- a/pgl/layers/conv.py +++ b/pgl/layers/conv.py @@ -15,10 +15,10 @@ graph neural networks. """ import paddle.fluid as fluid -from pgl import graph_wrapper from pgl.utils import paddle_helper +from pgl import message_passing -__all__ = ['gcn', 'gat', 'gin', 'gaan'] +__all__ = ['gcn', 'gat', 'gin', 'gaan', 'gen_conv'] def gcn(gw, feature, hidden_size, activation, name, norm=None): @@ -352,3 +352,55 @@ def gaan(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o output = fluid.layers.dropout(output, dropout_prob=0.1) return output + + +def gen_conv(gw, + feature, + name, + beta=None): + """Implementation of GENeralized Graph Convolution (GENConv), see the paper + "DeeperGCN: All You Need to Train Deeper GCNs" in + https://arxiv.org/pdf/2006.07739.pdf + + Args: + gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`) + + feature: A tensor with shape (num_nodes, feature_size). + + beta: [0, +infinity] or "dynamic" or None + + name: deeper gcn layer names. + + Return: + A tensor with shape (num_nodes, feature_size) + """ + + if beta == "dynamic": + beta = fluid.layers.create_parameter( + shape=[1], + dtype='float32', + default_initializer= + fluid.initializer.ConstantInitializer(value=1.0), + name=name + '_beta') + + # message passing + msg = gw.send(message_passing.copy_send, nfeat_list=[("h", feature)]) + output = gw.recv(msg, message_passing.softmax_agg(beta)) + + # msg norm + output = message_passing.msg_norm(feature, output, name) + output = feature + output + + output = fluid.layers.fc(output, + feature.shape[-1], + bias_attr=False, + act="relu", + param_attr=fluid.ParamAttr(name=name + '_weight1')) + + output = fluid.layers.fc(output, + feature.shape[-1], + bias_attr=False, + param_attr=fluid.ParamAttr(name=name + '_weight2')) + + return output + diff --git a/pgl/message_passing.py b/pgl/message_passing.py new file mode 100644 index 0000000000000000000000000000000000000000..4046ad9c905fabd526dd084d7abb5924409ca07a --- /dev/null +++ b/pgl/message_passing.py @@ -0,0 +1,204 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This package implements some common message passing +functions to help building graph neural networks. +""" + +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as L +from pgl.utils import paddle_helper + +__all__ = ['copy_send', 'weighted_copy_send', 'mean_recv', + 'sum_recv', 'max_recv', 'lstm_recv', 'graphsage_sum', + 'graphsage_mean', 'pinsage_mean', 'pinsage_sum', + 'softmax_agg', 'msg_norm'] + + +def copy_send(src_feat, dst_feat, edge_feat): + """doc""" + return src_feat["h"] + +def weighted_copy_send(src_feat, dst_feat, edge_feat): + """doc""" + return src_feat["h"] * edge_feat["weight"] + +def mean_recv(feat): + """doc""" + return fluid.layers.sequence_pool(feat, pool_type="average") + + +def sum_recv(feat): + """doc""" + return fluid.layers.sequence_pool(feat, pool_type="sum") + + +def max_recv(feat): + """doc""" + return fluid.layers.sequence_pool(feat, pool_type="max") + + +def lstm_recv(hidden_dim): + """doc""" + def lstm_recv_inside(feat): + forward, _ = fluid.layers.dynamic_lstm( + input=feat, size=hidden_dim * 4, use_peepholes=False) + output = fluid.layers.sequence_last_step(forward) + return output + return lstm_recv_inside + + +def graphsage_sum(gw, feature, hidden_size, act, initializer, learning_rate, name): + """doc""" + msg = gw.send(copy_send, nfeat_list=[("h", feature)]) + neigh_feature = gw.recv(msg, sum_recv) + self_feature = feature + self_feature = fluid.layers.fc(self_feature, + hidden_size, + act=act, + param_attr=fluid.ParamAttr(name=name + "_l.w_0", initializer=initializer, + learning_rate=learning_rate), + bias_attr=name+"_l.b_0" + ) + neigh_feature = fluid.layers.fc(neigh_feature, + hidden_size, + act=act, + param_attr=fluid.ParamAttr(name=name + "_r.w_0", initializer=initializer, + learning_rate=learning_rate), + bias_attr=name+"_r.b_0" + ) + output = fluid.layers.concat([self_feature, neigh_feature], axis=1) + output = fluid.layers.l2_normalize(output, axis=1) + return output + + +def graphsage_mean(gw, feature, hidden_size, act, initializer, learning_rate, name): + """doc""" + msg = gw.send(copy_send, nfeat_list=[("h", feature)]) + neigh_feature = gw.recv(msg, mean_recv) + self_feature = feature + self_feature = fluid.layers.fc(self_feature, + hidden_size, + act=act, + param_attr=fluid.ParamAttr(name=name + "_l.w_0", initializer=initializer, + learning_rate=learning_rate), + bias_attr=name+"_l.b_0" + ) + neigh_feature = fluid.layers.fc(neigh_feature, + hidden_size, + act=act, + param_attr=fluid.ParamAttr(name=name + "_r.w_0", initializer=initializer, + learning_rate=learning_rate), + bias_attr=name+"_r.b_0" + ) + output = fluid.layers.concat([self_feature, neigh_feature], axis=1) + output = fluid.layers.l2_normalize(output, axis=1) + return output + + +def pinsage_mean(gw, feature, hidden_size, act, initializer, learning_rate, name): + """doc""" + msg = gw.send(weighted_copy_send, nfeat_list=[("h", feature)], efeat_list=["weight"]) + neigh_feature = gw.recv(msg, mean_recv) + self_feature = feature + self_feature = fluid.layers.fc(self_feature, + hidden_size, + act=act, + param_attr=fluid.ParamAttr(name=name + "_l.w_0", initializer=initializer, + learning_rate=learning_rate), + bias_attr=name+"_l.b_0" + ) + neigh_feature = fluid.layers.fc(neigh_feature, + hidden_size, + act=act, + param_attr=fluid.ParamAttr(name=name + "_r.w_0", initializer=initializer, + learning_rate=learning_rate), + bias_attr=name+"_r.b_0" + ) + output = fluid.layers.concat([self_feature, neigh_feature], axis=1) + output = fluid.layers.l2_normalize(output, axis=1) + return output + + +def pinsage_sum(gw, feature, hidden_size, act, initializer, learning_rate, name): + """doc""" + msg = gw.send(weighted_copy_send, nfeat_list=[("h", feature)], efeat_list=["weight"]) + neigh_feature = gw.recv(msg, sum_recv) + self_feature = feature + self_feature = fluid.layers.fc(self_feature, + hidden_size, + act=act, + param_attr=fluid.ParamAttr(name=name + "_l.w_0", initializer=initializer, + learning_rate=learning_rate), + bias_attr=name+"_l.b_0" + ) + neigh_feature = fluid.layers.fc(neigh_feature, + hidden_size, + act=act, + param_attr=fluid.ParamAttr(name=name + "_r.w_0", initializer=initializer, + learning_rate=learning_rate), + bias_attr=name+"_r.b_0" + ) + output = fluid.layers.concat([self_feature, neigh_feature], axis=1) + output = fluid.layers.l2_normalize(output, axis=1) + return output + + +def softmax_agg(beta): + """Implementation of softmax_agg aggregator, see more information in the paper + "DeeperGCN: All You Need to Train Deeper GCNs" + (https://arxiv.org/pdf/2006.07739.pdf) + + Args: + msg: the received message, lod-tensor, (batch_size, seq_len, hidden_size) + beta: Inverse Temperature + + Return: + An output tensor with shape (num_nodes, hidden_size) + """ + + def softmax_agg_inside(msg): + alpha = paddle_helper.sequence_softmax(msg, beta) + msg = msg * alpha + return fluid.layers.sequence_pool(msg, "sum") + + return softmax_agg_inside + + +def msg_norm(x, msg, name): + """Implementation of message normalization, see more information in the paper + "DeeperGCN: All You Need to Train Deeper GCNs" + (https://arxiv.org/pdf/2006.07739.pdf) + + Args: + x: centre node feature (num_nodes, feature_size) + msg: neighbor node feature (num_nodes, feature_size) + name: name for s + + Return: + An output tensor with shape (num_nodes, feature_size) + """ + s = fluid.layers.create_parameter( + shape=[1], + dtype='float32', + default_initializer= + fluid.initializer.ConstantInitializer(value=1.0), + name=name + '_s_msg_norm') + + msg = fluid.layers.l2_normalize(msg, axis=1) + x_norm = fluid.layers.reduce_sum(x * x, dim=1, keep_dim=True) + msg = msg * x_norm * s + return msg + diff --git a/pgl/utils/paddle_helper.py b/pgl/utils/paddle_helper.py index adbece57a6580f266eba5e6158207127218b79e0..3570fac2c9da6b668108d4216cac9d415ce68dcd 100644 --- a/pgl/utils/paddle_helper.py +++ b/pgl/utils/paddle_helper.py @@ -185,7 +185,7 @@ def lod_constant(name, value, lod, dtype): return output, data_initializer -def sequence_softmax(x): +def sequence_softmax(x, beta=None): """Compute sequence softmax over paddle LodTensor This function compute softmax normalization along with the length of sequence. @@ -194,10 +194,15 @@ def sequence_softmax(x): Args: x: The input variable which is a LodTensor. + beta: Inverse Temperature Return: Output of sequence_softmax """ + + if beta is not None: + x = x * beta + x_max = fluid.layers.sequence_pool(x, "max") x_max = fluid.layers.sequence_expand_as(x_max, x) x = x - x_max