提交 388e9911 编写于 作者: W wangwenjin

modify GaAN

上级 af936913
# data and log
.examples/GaAN/datase/t
.examples/GaAN/log/
.examples/GaAN/__pycache__/
/examples/GaAN/dataset/
/examples/GaAN/log/
/examples/GaAN/__pycache__/
# Virtualenv
/.venv/
/venv/
......
from paddle import fluid
from pgl.utils import paddle_helper
from pgl.layers import GaAN
class GaANModel(object):
def __init__(self, num_class, num_layers, hidden_size_a=24,
hidden_size_v=32, hidden_size_m=64, hidden_size_o=128,
heads=8, act='relu', name="GaAN"):
self.num_class = num_class
self.num_layers = num_layers
self.hidden_size_a = hidden_size_a
self.hidden_size_v = hidden_size_v
self.hidden_size_m = hidden_size_m
self.hidden_size_o = hidden_size_o
self.act = act
self.name = name
self.heads = heads
def forward(self, gw):
feature = gw.node_feat['node_feat']
for i in range(self.num_layers):
feature = GaAN(gw, feature, self.hidden_size_a, self.hidden_size_v,
self.hidden_size_m, self.hidden_size_o, self.heads,
self.name+'_'+str(i))
pred = fluid.layers.fc(
feature, self.num_class, act=None, name=self.name + "_pred_output")
return pred
\ No newline at end of file
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from preprocess import get_graph_data
import pgl
import argparse
......@@ -23,15 +9,21 @@ from visualdl import LogWriter
import reader
from train_tool import train_epoch, valid_epoch
from model import GaANModel
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Training")
parser = argparse.ArgumentParser(description="ogb Training")
parser.add_argument("--d_name", type=str, choices=["ogbn-proteins"], default="ogbn-proteins",
help="the name of dataset in ogb")
parser.add_argument("--model", type=str, choices=["GaAN"], default="GaAN",
help="the name of model")
parser.add_argument("--mini_data", type=str, choices=["True", "False"], default="False",
help="use a small dataset to test the code")
parser.add_argument("--use_gpu", type=bool, choices=[True, False], default=True,
help="use gpu")
parser.add_argument("--gpu_id", type=int, default=0,
parser.add_argument("--gpu_id", type=int, default=4,
help="the id of gpu")
parser.add_argument("--exp_id", type=int, default=0,
help="the id of experiment")
......@@ -58,7 +50,9 @@ if __name__ == "__main__":
args = parser.parse_args()
print("setting".center(50, "="))
# d_name = "ogbn-proteins"
print("超参数配置".center(50, "="))
print("lr = {}, rc = {}, epochs = {}, batch_size = {}".format(args.lr, args.rc, args.epochs,
args.batch_size))
print("Experiment ID: {}".format(args.exp_id).center(50, "="))
......@@ -66,13 +60,12 @@ if __name__ == "__main__":
d_name = args.d_name
# get data
g, label, train_idx, valid_idx, test_idx, evaluator = get_graph_data(
d_name=d_name,
g, label, train_idx, valid_idx, test_idx, evaluator = get_graph_data(d_name=d_name,
mini_data=eval(args.mini_data))
# create log writer
log_writer = LogWriter(args.log_path, sync_cycle=10)
log_writer = LogWriter(args.log_path+'/'+str(args.exp_id), sync_cycle=10)
with log_writer.mode("train") as logger:
log_train_loss_epoch = logger.scalar("loss")
log_train_rocauc_epoch = logger.scalar("rocauc")
......@@ -84,6 +77,9 @@ if __name__ == "__main__":
log_test_loss = log_writer.scalar("test_loss")
log_test_rocauc = log_writer.scalar("test_rocauc")
if args.model == "GaAN":
graph_model = GaANModel(112, 3, args.hidden_size_a, args.hidden_size_v, args.hidden_size_m,
args.hidden_size_o, args.heads)
# training
samples = [25, 10] # 2-hop sample size
......@@ -102,6 +98,7 @@ if __name__ == "__main__":
edge_feat=g.edge_feat_info()
)
node_index = fluid.layers.data('node_index', shape=[None, 1], dtype="int64",
append_batch_size=False)
......@@ -109,11 +106,8 @@ if __name__ == "__main__":
append_batch_size=False)
parent_node_index = fluid.layers.data('parent_node_index', shape=[None, 1], dtype="int64",
append_batch_size=False)
feature = gw.node_feat['node_feat']
for i in range(3):
feature = pgl.layers.GaAN(gw, feature, args.hidden_size_a, args.hidden_size_v,
args.hidden_size_m, args.hidden_size_o, args.heads, name='GaAN_'+str(i))
output = fluid.layers.fc(feature, 112, act=None)
output = graph_model.forward(gw)
output = fluid.layers.gather(output, node_index)
score = fluid.layers.sigmoid(output)
......@@ -125,6 +119,14 @@ if __name__ == "__main__":
val_program = train_program.clone(for_test=True)
with fluid.program_guard(train_program, startup_program):
# adam = fluid.optimizer.Adam(
# learning_rate=1e-2,
# regularization=fluid.regularizer.L2DecayRegularizer(
# regularization_coeff=0.0005))
# lr = fluid.layers.natural_exp_decay(learning_rate=args.lr,
# decay_steps=1000,
# decay_rate=0.5,
# )
lr = args.lr
adam = fluid.optimizer.Adam(
learning_rate=lr,
......
......@@ -259,27 +259,97 @@ def gin(gw,
return output
def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads,
name):
"""
This is an implementation of the paper GaAN: Gated Attention Networks for Learning
on Large and Spatiotemporal Graphs(https://arxiv.org/abs/1803.07294)
"""
# project the feature of nodes into new vector spaces
feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
param_attr=fluid.ParamAttr(name=name + '_project_key'))
feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False,
param_attr=fluid.ParamAttr(name=name + '_project_value'))
feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
param_attr=fluid.ParamAttr(name=name + '_project_query'))
feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False,
param_attr=fluid.ParamAttr(name=name + '_project_gate'))
# def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads,
# name):
# """
# This is an implementation of the paper GaAN: Gated Attention Networks for Learning
# on Large and Spatiotemporal Graphs(https://arxiv.org/abs/1803.07294)
# """
# # send function
# def send_func(src_feat, dst_feat, edge_feat):
# print("heads: {}, hidden_size_a: {}".format(heads, hidden_size_a))
# feat_query, feat_key = dst_feat['feat_query'], src_feat['feat_key']
# feat_query = fluid.layers.reshape(feat_query, [-1, heads, hidden_size_a])
# feat_key = fluid.layers.reshape(feat_key, [-1, heads, hidden_size_a])
# alpha = fluid.layers.reduce_sum(feat_key * feat_query, dim=-1)
# return {'dst_node_feat': dst_feat['node_feat'],
# 'src_node_feat': src_feat['node_feat'],
# 'feat_value': src_feat['feat_value'],
# 'alpha': alpha,
# 'feat_gate': src_feat['feat_gate']}
# # recv function
# def recv_func(message):
# dst_feat = message['dst_node_feat'] # feature of dst nodes on each edge
# src_feat = message['src_node_feat'] # feature of src nodes on each edge
# x = fluid.layers.sequence_pool(dst_feat, 'average') # feature of center nodes
# z = fluid.layers.sequence_pool(src_feat, 'average') # mean feature of neighbors
# # compute gate
# feat_gate = message['feat_gate']
# g_max = fluid.layers.sequence_pool(feat_gate, 'max')
# g = fluid.layers.concat([x, g_max, z], axis=1)
# g = fluid.layers.fc(g, heads, bias_attr=False, act='sigmoid')
# # softmax of attention coefficient
# alpha = message['alpha']
# alpha = paddle_helper.sequence_softmax(alpha)
# feat_value = message['feat_value']
# old = feat_value
# feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v])
# feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0)
# feat_value = fluid.layers.reshape(feat_value, [-1, heads * hidden_size_v])
# feat_value = fluid.layers.lod_reset(feat_value, old)
# feat_value = fluid.layers.sequence_pool(feat_value, 'sum')
# feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v])
# output = fluid.layers.elementwise_mul(feat_value, g, axis=0)
# output = fluid.layers.reshape(output, [-1, heads*hidden_size_v])
# output = fluid.layers.concat([x, output], axis=1)
# return output
# # project the feature of nodes into new vector spaces
# feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
# param_attr=fluid.ParamAttr(name=name + '_project_key'))
# feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False,
# param_attr=fluid.ParamAttr(name=name + '_project_value'))
# feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
# param_attr=fluid.ParamAttr(name=name + '_project_query'))
# feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False,
# param_attr=fluid.ParamAttr(name=name + '_project_gate'))
# # send stage
# msg = gw.send(send_func, nfeat_list=[('node_feat', feature),
# ('feat_key', feat_key), ('feat_value', feat_value),
# ('feat_query', feat_query), ('feat_gate', feat_gate)],
# efeat_list=None,
# )
# # recv stage
# output = gw.recv(msg, recv_func)
# # output
# output = fluid.layers.fc(output, hidden_size_o, bias_attr=False,
# param_attr=fluid.ParamAttr(name=name+'_project_output'))
# outout = fluid.layers.leaky_relu(output, alpha=0.1)
# output = fluid.layers.dropout(output, dropout_prob=0.1)
# return output
def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads, name):
# send function
def send_func(src_feat, dst_feat, edge_feat):
# 计算每条边上的注意力分数
# E * (M * D1), 每个 dst 点都查询它的全部邻边的 src 点
feat_query, feat_key = dst_feat['feat_query'], src_feat['feat_key']
# E * M * D1
old = feat_query
feat_query = fluid.layers.reshape(feat_query, [-1, heads, hidden_size_a])
feat_key = fluid.layers.reshape(feat_key, [-1, heads, hidden_size_a])
# E * M
alpha = fluid.layers.reduce_sum(feat_key * feat_query, dim=-1)
return {'dst_node_feat': dst_feat['node_feat'],
......@@ -288,53 +358,75 @@ def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o
'alpha': alpha,
'feat_gate': src_feat['feat_gate']}
# send stage
message = gw.send(send_func, nfeat_list=[('node_feat', feature),
('feat_key', feat_key), ('feat_value', feat_value),
('feat_query', feat_query), ('feat_gate', feat_gate)],
efeat_list=None,
)
# recv function
def recv_func(message):
dst_feat = message['dst_node_feat'] # feature of dst nodes on each edge
src_feat = message['src_node_feat'] # feature of src nodes on each edge
x = fluid.layers.sequence_pool(dst_feat, 'average') # feature of center nodes
z = fluid.layers.sequence_pool(src_feat, 'average') # mean feature of neighbors
# compute gate
# 每条边的终点的特征
dst_feat = message['dst_node_feat']
# 每条边的出发点的特征
src_feat = message['src_node_feat']
# 每个中心点自己的特征
x = fluid.layers.sequence_pool(dst_feat, 'average')
# 每个中心点的邻居的特征的平均值
z = fluid.layers.sequence_pool(src_feat, 'average')
# 计算 gate
feat_gate = message['feat_gate']
g_max = fluid.layers.sequence_pool(feat_gate, 'max')
g = fluid.layers.concat([x, g_max, z], axis=1)
g = fluid.layers.fc(g, heads, bias_attr=False, act='sigmoid')
g = fluid.layers.fc(g, heads, bias_attr=False, act="sigmoid")
# softmax of attention coefficient
# softmax
alpha = message['alpha']
alpha = paddle_helper.sequence_softmax(alpha)
alpha = paddle_helper.sequence_softmax(alpha) # E * M
feat_value = message['feat_value']
feat_value = message['feat_value'] # E * (M * D2)
old = feat_value
feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v])
feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) # E * M * D2
feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0)
feat_value = fluid.layers.reshape(feat_value, [-1, heads * hidden_size_v])
feat_value = fluid.layers.reshape(feat_value, [-1, heads*hidden_size_v]) # E * (M * D2)
feat_value = fluid.layers.lod_reset(feat_value, old)
feat_value = fluid.layers.sequence_pool(feat_value, 'sum')
feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v])
feat_value = fluid.layers.sequence_pool(feat_value, 'sum') # N * (M * D2)
feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) # N * M * D2
output = fluid.layers.elementwise_mul(feat_value, g, axis=0)
output = fluid.layers.reshape(output, [-1, heads*hidden_size_v])
output = fluid.layers.reshape(output, [-1, heads * hidden_size_v]) # N * (M * D2)
output = fluid.layers.concat([x, output], axis=1)
return output
# recv stage
output = gw.recv(message, recv_func)
# feature N * D
# 计算每个点自己需要发送出去的内容
# 投影后的特征向量
# N * (D1 * M)
feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
param_attr=fluid.ParamAttr(name=name + '_project_key'))
# N * (D2 * M)
feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False,
param_attr=fluid.ParamAttr(name=name + '_project_value'))
# N * (D1 * M)
feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
param_attr=fluid.ParamAttr(name=name + '_project_query'))
# N * Dm
feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False,
param_attr=fluid.ParamAttr(name=name + '_project_gate'))
# send 阶段
# output
message = gw.send(
send_func,
nfeat_list=[('node_feat', feature), ('feat_key', feat_key), ('feat_value', feat_value),
('feat_query', feat_query), ('feat_gate', feat_gate)],
efeat_list=None,
)
# 聚合邻居特征
output = gw.recv(message, recv_func)
output = fluid.layers.fc(output, hidden_size_o, bias_attr=False,
param_attr=fluid.ParamAttr(name=name+'_project_output'))
outout = fluid.layers.leaky_relu(output, alpha=0.1)
param_attr=fluid.ParamAttr(name=name + '_project_output'))
output = fluid.layers.leaky_relu(output, alpha=0.1)
output = fluid.layers.dropout(output, dropout_prob=0.1)
return output
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册