From 388e99112018f283f01a6818cc0016b1c5b79617 Mon Sep 17 00:00:00 2001 From: wangwenjin Date: Mon, 25 May 2020 10:40:25 +0800 Subject: [PATCH] modify GaAN --- .gitignore | 6 +- examples/GaAN/model.py | 36 ++++++++ examples/GaAN/train.py | 58 ++++++------- pgl/layers/conv.py | 184 ++++++++++++++++++++++++++++++----------- 4 files changed, 207 insertions(+), 77 deletions(-) create mode 100644 examples/GaAN/model.py diff --git a/.gitignore b/.gitignore index 4c6c2d2..1058cfb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # data and log -.examples/GaAN/datase/t -.examples/GaAN/log/ -.examples/GaAN/__pycache__/ +/examples/GaAN/dataset/ +/examples/GaAN/log/ +/examples/GaAN/__pycache__/ # Virtualenv /.venv/ /venv/ diff --git a/examples/GaAN/model.py b/examples/GaAN/model.py new file mode 100644 index 0000000..5bcdf9c --- /dev/null +++ b/examples/GaAN/model.py @@ -0,0 +1,36 @@ +from paddle import fluid +from pgl.utils import paddle_helper +from pgl.layers import GaAN + +class GaANModel(object): + def __init__(self, num_class, num_layers, hidden_size_a=24, + hidden_size_v=32, hidden_size_m=64, hidden_size_o=128, + heads=8, act='relu', name="GaAN"): + self.num_class = num_class + self.num_layers = num_layers + self.hidden_size_a = hidden_size_a + self.hidden_size_v = hidden_size_v + self.hidden_size_m = hidden_size_m + self.hidden_size_o = hidden_size_o + self.act = act + self.name = name + self.heads = heads + + def forward(self, gw): + feature = gw.node_feat['node_feat'] + for i in range(self.num_layers): + feature = GaAN(gw, feature, self.hidden_size_a, self.hidden_size_v, + self.hidden_size_m, self.hidden_size_o, self.heads, + self.name+'_'+str(i)) + + pred = fluid.layers.fc( + feature, self.num_class, act=None, name=self.name + "_pred_output") + + return pred + + + + + + + \ No newline at end of file diff --git a/examples/GaAN/train.py b/examples/GaAN/train.py index bab31cc..16e82a6 100644 --- a/examples/GaAN/train.py +++ b/examples/GaAN/train.py @@ -1,17 +1,3 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from preprocess import get_graph_data import pgl import argparse @@ -21,17 +7,23 @@ from paddle import fluid from visualdl import LogWriter import reader -from train_tool import train_epoch, valid_epoch +from train_tool import train_epoch, valid_epoch + + +from model import GaANModel + if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Training") + parser = argparse.ArgumentParser(description="ogb Training") parser.add_argument("--d_name", type=str, choices=["ogbn-proteins"], default="ogbn-proteins", help="the name of dataset in ogb") + parser.add_argument("--model", type=str, choices=["GaAN"], default="GaAN", + help="the name of model") parser.add_argument("--mini_data", type=str, choices=["True", "False"], default="False", help="use a small dataset to test the code") parser.add_argument("--use_gpu", type=bool, choices=[True, False], default=True, help="use gpu") - parser.add_argument("--gpu_id", type=int, default=0, + parser.add_argument("--gpu_id", type=int, default=4, help="the id of gpu") parser.add_argument("--exp_id", type=int, default=0, help="the id of experiment") @@ -57,8 +49,10 @@ if __name__ == "__main__": help="the hidden size of each layer in GaAN") args = parser.parse_args() + +# d_name = "ogbn-proteins" - print("setting".center(50, "=")) + print("超参数配置".center(50, "=")) print("lr = {}, rc = {}, epochs = {}, batch_size = {}".format(args.lr, args.rc, args.epochs, args.batch_size)) print("Experiment ID: {}".format(args.exp_id).center(50, "=")) @@ -66,13 +60,12 @@ if __name__ == "__main__": d_name = args.d_name # get data - g, label, train_idx, valid_idx, test_idx, evaluator = get_graph_data( - d_name=d_name, - mini_data=eval(args.mini_data)) + g, label, train_idx, valid_idx, test_idx, evaluator = get_graph_data(d_name=d_name, + mini_data=eval(args.mini_data)) # create log writer - log_writer = LogWriter(args.log_path, sync_cycle=10) + log_writer = LogWriter(args.log_path+'/'+str(args.exp_id), sync_cycle=10) with log_writer.mode("train") as logger: log_train_loss_epoch = logger.scalar("loss") log_train_rocauc_epoch = logger.scalar("rocauc") @@ -84,6 +77,9 @@ if __name__ == "__main__": log_test_loss = log_writer.scalar("test_loss") log_test_rocauc = log_writer.scalar("test_rocauc") + if args.model == "GaAN": + graph_model = GaANModel(112, 3, args.hidden_size_a, args.hidden_size_v, args.hidden_size_m, + args.hidden_size_o, args.heads) # training samples = [25, 10] # 2-hop sample size @@ -102,6 +98,7 @@ if __name__ == "__main__": edge_feat=g.edge_feat_info() ) + node_index = fluid.layers.data('node_index', shape=[None, 1], dtype="int64", append_batch_size=False) @@ -109,11 +106,8 @@ if __name__ == "__main__": append_batch_size=False) parent_node_index = fluid.layers.data('parent_node_index', shape=[None, 1], dtype="int64", append_batch_size=False) - feature = gw.node_feat['node_feat'] - for i in range(3): - feature = pgl.layers.GaAN(gw, feature, args.hidden_size_a, args.hidden_size_v, - args.hidden_size_m, args.hidden_size_o, args.heads, name='GaAN_'+str(i)) - output = fluid.layers.fc(feature, 112, act=None) + + output = graph_model.forward(gw) output = fluid.layers.gather(output, node_index) score = fluid.layers.sigmoid(output) @@ -125,6 +119,14 @@ if __name__ == "__main__": val_program = train_program.clone(for_test=True) with fluid.program_guard(train_program, startup_program): + # adam = fluid.optimizer.Adam( + # learning_rate=1e-2, + # regularization=fluid.regularizer.L2DecayRegularizer( + # regularization_coeff=0.0005)) +# lr = fluid.layers.natural_exp_decay(learning_rate=args.lr, +# decay_steps=1000, +# decay_rate=0.5, +# ) lr = args.lr adam = fluid.optimizer.Adam( learning_rate=lr, @@ -213,4 +215,4 @@ if __name__ == "__main__": )) log_text.add_record(args.epochs+3, "End".center(50, "=")) - + \ No newline at end of file diff --git a/pgl/layers/conv.py b/pgl/layers/conv.py index 17f4f2e..8e4c5d5 100644 --- a/pgl/layers/conv.py +++ b/pgl/layers/conv.py @@ -259,27 +259,97 @@ def gin(gw, return output -def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads, - name): - """ - This is an implementation of the paper GaAN: Gated Attention Networks for Learning - on Large and Spatiotemporal Graphs(https://arxiv.org/abs/1803.07294) - """ - # project the feature of nodes into new vector spaces - feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False, - param_attr=fluid.ParamAttr(name=name + '_project_key')) - feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False, - param_attr=fluid.ParamAttr(name=name + '_project_value')) - feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False, - param_attr=fluid.ParamAttr(name=name + '_project_query')) - feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False, - param_attr=fluid.ParamAttr(name=name + '_project_gate')) +# def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads, +# name): +# """ +# This is an implementation of the paper GaAN: Gated Attention Networks for Learning +# on Large and Spatiotemporal Graphs(https://arxiv.org/abs/1803.07294) +# """ +# # send function +# def send_func(src_feat, dst_feat, edge_feat): +# print("heads: {}, hidden_size_a: {}".format(heads, hidden_size_a)) +# feat_query, feat_key = dst_feat['feat_query'], src_feat['feat_key'] +# feat_query = fluid.layers.reshape(feat_query, [-1, heads, hidden_size_a]) +# feat_key = fluid.layers.reshape(feat_key, [-1, heads, hidden_size_a]) +# alpha = fluid.layers.reduce_sum(feat_key * feat_query, dim=-1) + +# return {'dst_node_feat': dst_feat['node_feat'], +# 'src_node_feat': src_feat['node_feat'], +# 'feat_value': src_feat['feat_value'], +# 'alpha': alpha, +# 'feat_gate': src_feat['feat_gate']} + +# # recv function +# def recv_func(message): +# dst_feat = message['dst_node_feat'] # feature of dst nodes on each edge +# src_feat = message['src_node_feat'] # feature of src nodes on each edge +# x = fluid.layers.sequence_pool(dst_feat, 'average') # feature of center nodes +# z = fluid.layers.sequence_pool(src_feat, 'average') # mean feature of neighbors + +# # compute gate +# feat_gate = message['feat_gate'] +# g_max = fluid.layers.sequence_pool(feat_gate, 'max') +# g = fluid.layers.concat([x, g_max, z], axis=1) +# g = fluid.layers.fc(g, heads, bias_attr=False, act='sigmoid') + +# # softmax of attention coefficient +# alpha = message['alpha'] +# alpha = paddle_helper.sequence_softmax(alpha) + +# feat_value = message['feat_value'] +# old = feat_value +# feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) +# feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0) +# feat_value = fluid.layers.reshape(feat_value, [-1, heads * hidden_size_v]) +# feat_value = fluid.layers.lod_reset(feat_value, old) + +# feat_value = fluid.layers.sequence_pool(feat_value, 'sum') +# feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) +# output = fluid.layers.elementwise_mul(feat_value, g, axis=0) +# output = fluid.layers.reshape(output, [-1, heads*hidden_size_v]) +# output = fluid.layers.concat([x, output], axis=1) + +# return output - # send function +# # project the feature of nodes into new vector spaces +# feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False, +# param_attr=fluid.ParamAttr(name=name + '_project_key')) +# feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False, +# param_attr=fluid.ParamAttr(name=name + '_project_value')) +# feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False, +# param_attr=fluid.ParamAttr(name=name + '_project_query')) +# feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False, +# param_attr=fluid.ParamAttr(name=name + '_project_gate')) + +# # send stage +# msg = gw.send(send_func, nfeat_list=[('node_feat', feature), +# ('feat_key', feat_key), ('feat_value', feat_value), +# ('feat_query', feat_query), ('feat_gate', feat_gate)], +# efeat_list=None, +# ) + +# # recv stage +# output = gw.recv(msg, recv_func) + +# # output +# output = fluid.layers.fc(output, hidden_size_o, bias_attr=False, +# param_attr=fluid.ParamAttr(name=name+'_project_output')) +# outout = fluid.layers.leaky_relu(output, alpha=0.1) +# output = fluid.layers.dropout(output, dropout_prob=0.1) + +# return output + +def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads, name): + def send_func(src_feat, dst_feat, edge_feat): + # 计算每条边上的注意力分数 + # E * (M * D1), 每个 dst 点都查询它的全部邻边的 src 点 feat_query, feat_key = dst_feat['feat_query'], src_feat['feat_key'] + # E * M * D1 + old = feat_query feat_query = fluid.layers.reshape(feat_query, [-1, heads, hidden_size_a]) feat_key = fluid.layers.reshape(feat_key, [-1, heads, hidden_size_a]) + # E * M alpha = fluid.layers.reduce_sum(feat_key * feat_query, dim=-1) return {'dst_node_feat': dst_feat['node_feat'], @@ -288,53 +358,75 @@ def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o 'alpha': alpha, 'feat_gate': src_feat['feat_gate']} - # send stage - message = gw.send(send_func, nfeat_list=[('node_feat', feature), - ('feat_key', feat_key), ('feat_value', feat_value), - ('feat_query', feat_query), ('feat_gate', feat_gate)], - efeat_list=None, - ) - - # recv function def recv_func(message): - dst_feat = message['dst_node_feat'] # feature of dst nodes on each edge - src_feat = message['src_node_feat'] # feature of src nodes on each edge - x = fluid.layers.sequence_pool(dst_feat, 'average') # feature of center nodes - z = fluid.layers.sequence_pool(src_feat, 'average') # mean feature of neighbors - - # compute gate + # 每条边的终点的特征 + dst_feat = message['dst_node_feat'] + # 每条边的出发点的特征 + src_feat = message['src_node_feat'] + # 每个中心点自己的特征 + x = fluid.layers.sequence_pool(dst_feat, 'average') + # 每个中心点的邻居的特征的平均值 + z = fluid.layers.sequence_pool(src_feat, 'average') + + # 计算 gate feat_gate = message['feat_gate'] g_max = fluid.layers.sequence_pool(feat_gate, 'max') g = fluid.layers.concat([x, g_max, z], axis=1) - g = fluid.layers.fc(g, heads, bias_attr=False, act='sigmoid') + g = fluid.layers.fc(g, heads, bias_attr=False, act="sigmoid") - # softmax of attention coefficient + # softmax alpha = message['alpha'] - alpha = paddle_helper.sequence_softmax(alpha) + alpha = paddle_helper.sequence_softmax(alpha) # E * M - feat_value = message['feat_value'] + feat_value = message['feat_value'] # E * (M * D2) old = feat_value - feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) + feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) # E * M * D2 feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0) - feat_value = fluid.layers.reshape(feat_value, [-1, heads * hidden_size_v]) + feat_value = fluid.layers.reshape(feat_value, [-1, heads*hidden_size_v]) # E * (M * D2) feat_value = fluid.layers.lod_reset(feat_value, old) - - feat_value = fluid.layers.sequence_pool(feat_value, 'sum') - feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) + + feat_value = fluid.layers.sequence_pool(feat_value, 'sum') # N * (M * D2) + + feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) # N * M * D2 + output = fluid.layers.elementwise_mul(feat_value, g, axis=0) - output = fluid.layers.reshape(output, [-1, heads*hidden_size_v]) + output = fluid.layers.reshape(output, [-1, heads * hidden_size_v]) # N * (M * D2) + output = fluid.layers.concat([x, output], axis=1) return output - # recv stage + # feature N * D + + # 计算每个点自己需要发送出去的内容 + # 投影后的特征向量 + # N * (D1 * M) + feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False, + param_attr=fluid.ParamAttr(name=name + '_project_key')) + # N * (D2 * M) + feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False, + param_attr=fluid.ParamAttr(name=name + '_project_value')) + # N * (D1 * M) + feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False, + param_attr=fluid.ParamAttr(name=name + '_project_query')) + # N * Dm + feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False, + param_attr=fluid.ParamAttr(name=name + '_project_gate')) + + # send 阶段 + + message = gw.send( + send_func, + nfeat_list=[('node_feat', feature), ('feat_key', feat_key), ('feat_value', feat_value), + ('feat_query', feat_query), ('feat_gate', feat_gate)], + efeat_list=None, + ) + + # 聚合邻居特征 output = gw.recv(message, recv_func) - - # output output = fluid.layers.fc(output, hidden_size_o, bias_attr=False, - param_attr=fluid.ParamAttr(name=name+'_project_output')) - outout = fluid.layers.leaky_relu(output, alpha=0.1) + param_attr=fluid.ParamAttr(name=name + '_project_output')) + output = fluid.layers.leaky_relu(output, alpha=0.1) output = fluid.layers.dropout(output, dropout_prob=0.1) return output - -- GitLab