modify GaAN

388e9911 · wangwenjin · af936913 · 388e9911 · 388e9911 · 388e9911
Showing with 207 addition and 77 deletion

.gitignore .gitignore +3 -3

examples/GaAN/model.py examples/GaAN/model.py +36 -0

examples/GaAN/train.py examples/GaAN/train.py +30 -28

pgl/layers/conv.py pgl/layers/conv.py +138 -46

未找到文件。
--- a/.gitignore
+++ b/.gitignore
 # data and log
-.examples/GaAN/datase/t
-.examples/GaAN/log/
-.examples/GaAN/__pycache__/
+/examples/GaAN/dataset/
+/examples/GaAN/log/
+/examples/GaAN/__pycache__/
 # Virtualenv
 /.venv/
 /venv/

--- a/examples/GaAN/model.py
+++ b/examples/GaAN/model.py
+from paddle import fluid
+from pgl.utils import paddle_helper
+from pgl.layers import GaAN
+
+class GaANModel(object):
+    def __init__(self, num_class, num_layers, hidden_size_a=24, 
+                 hidden_size_v=32, hidden_size_m=64, hidden_size_o=128,  
+                 heads=8, act='relu', name="GaAN"):
+        self.num_class = num_class
+        self.num_layers = num_layers
+        self.hidden_size_a = hidden_size_a
+        self.hidden_size_v = hidden_size_v
+        self.hidden_size_m = hidden_size_m
+        self.hidden_size_o = hidden_size_o
+        self.act = act
+        self.name = name
+        self.heads = heads    
+    
+    def forward(self, gw):
+        feature = gw.node_feat['node_feat']
+        for i in range(self.num_layers):
+            feature = GaAN(gw, feature, self.hidden_size_a, self.hidden_size_v,
+                                    self.hidden_size_m, self.hidden_size_o, self.heads, 
+                                    self.name+'_'+str(i))
+        
+        pred = fluid.layers.fc(
+            feature, self.num_class, act=None, name=self.name + "_pred_output")
+        
+        return pred
+    
+
+
+        
+    
+    
+    
\ No newline at end of file
--- a/examples/GaAN/train.py
+++ b/examples/GaAN/train.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 from preprocess import get_graph_data
 import pgl
 import argparse
@@ -23,15 +9,21 @@ from visualdl import LogWriter
 import reader
 from train_tool import train_epoch, valid_epoch

+
+from model import GaANModel
+
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Training")
+    parser = argparse.ArgumentParser(description="ogb Training")
    parser.add_argument("--d_name", type=str, choices=["ogbn-proteins"], default="ogbn-proteins",
                       help="the name of dataset in ogb")
+    parser.add_argument("--model", type=str, choices=["GaAN"], default="GaAN",
+                       help="the name of model")
    parser.add_argument("--mini_data", type=str, choices=["True", "False"], default="False",
                       help="use a small dataset to test the code")
    parser.add_argument("--use_gpu", type=bool, choices=[True, False], default=True,
                       help="use gpu")
-    parser.add_argument("--gpu_id", type=int, default=0,
+    parser.add_argument("--gpu_id", type=int, default=4,
                       help="the id of gpu")
    parser.add_argument("--exp_id", type=int, default=0,
                       help="the id of experiment")
@@ -58,7 +50,9 @@ if __name__ == "__main__":
    
    args = parser.parse_args()
    
-    print("setting".center(50, "="))
+#     d_name = "ogbn-proteins"
+
+    print("超参数配置".center(50, "="))
    print("lr = {}, rc = {}, epochs = {}, batch_size = {}".format(args.lr, args.rc, args.epochs,
                                                                  args.batch_size))
    print("Experiment ID: {}".format(args.exp_id).center(50, "="))
@@ -66,13 +60,12 @@ if __name__ == "__main__":
    d_name = args.d_name
    
    # get data
-    g, label, train_idx, valid_idx, test_idx, evaluator = get_graph_data(
-                                                            d_name=d_name, 
+    g, label, train_idx, valid_idx, test_idx, evaluator = get_graph_data(d_name=d_name, 
                                                                         mini_data=eval(args.mini_data))
    
    
    # create log writer
-    log_writer = LogWriter(args.log_path, sync_cycle=10)
+    log_writer = LogWriter(args.log_path+'/'+str(args.exp_id), sync_cycle=10)
    with log_writer.mode("train") as logger:
        log_train_loss_epoch = logger.scalar("loss")
        log_train_rocauc_epoch = logger.scalar("rocauc")
@@ -84,6 +77,9 @@ if __name__ == "__main__":
    log_test_loss = log_writer.scalar("test_loss")
    log_test_rocauc = log_writer.scalar("test_rocauc")

+    if args.model == "GaAN":
+        graph_model = GaANModel(112, 3, args.hidden_size_a, args.hidden_size_v, args.hidden_size_m,
+                                args.hidden_size_o, args.heads)
    
    # training
    samples = [25, 10] # 2-hop sample size
@@ -102,6 +98,7 @@ if __name__ == "__main__":
            edge_feat=g.edge_feat_info()
        )

+
        node_index = fluid.layers.data('node_index', shape=[None, 1], dtype="int64",
                                       append_batch_size=False)

@@ -109,11 +106,8 @@ if __name__ == "__main__":
                                       append_batch_size=False)
        parent_node_index = fluid.layers.data('parent_node_index', shape=[None, 1], dtype="int64",
                                       append_batch_size=False)
-        feature = gw.node_feat['node_feat']
-        for i in range(3):
-            feature = pgl.layers.GaAN(gw, feature, args.hidden_size_a, args.hidden_size_v,
-                    args.hidden_size_m, args.hidden_size_o, args.heads, name='GaAN_'+str(i))
-        output = fluid.layers.fc(feature, 112, act=None)
+
+        output = graph_model.forward(gw)
        output = fluid.layers.gather(output, node_index)
        score = fluid.layers.sigmoid(output)

@@ -125,6 +119,14 @@ if __name__ == "__main__":
    val_program = train_program.clone(for_test=True)

    with fluid.program_guard(train_program, startup_program):
+    #     adam = fluid.optimizer.Adam(
+    #         learning_rate=1e-2,
+    #         regularization=fluid.regularizer.L2DecayRegularizer(
+    #             regularization_coeff=0.0005))
+#         lr = fluid.layers.natural_exp_decay(learning_rate=args.lr,
+#                                            decay_steps=1000,
+#                                            decay_rate=0.5,
+#                                            )
        lr = args.lr
        adam = fluid.optimizer.Adam(
            learning_rate=lr,

--- a/pgl/layers/conv.py
+++ b/pgl/layers/conv.py
@@ -259,27 +259,97 @@ def gin(gw,

    return output

-def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads,
-        name):
-    """
-    This is an implementation of the paper GaAN: Gated Attention Networks for Learning 
-    on Large and Spatiotemporal Graphs(https://arxiv.org/abs/1803.07294)
-    """
-    # project the feature of nodes into new vector spaces
-    feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
-                    param_attr=fluid.ParamAttr(name=name + '_project_key'))
-    feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False,
-                    param_attr=fluid.ParamAttr(name=name + '_project_value'))
-    feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
-                    param_attr=fluid.ParamAttr(name=name + '_project_query'))
-    feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False,
-                    param_attr=fluid.ParamAttr(name=name + '_project_gate'))
+# def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads,
+#         name):
+#     """
+#     This is an implementation of the paper GaAN: Gated Attention Networks for Learning 
+#     on Large and Spatiotemporal Graphs(https://arxiv.org/abs/1803.07294)
+#     """
+#     # send function
+#     def send_func(src_feat, dst_feat, edge_feat):
+#         print("heads: {}, hidden_size_a: {}".format(heads, hidden_size_a))
+#         feat_query, feat_key = dst_feat['feat_query'], src_feat['feat_key']
+#         feat_query = fluid.layers.reshape(feat_query, [-1, heads, hidden_size_a])
+#         feat_key = fluid.layers.reshape(feat_key, [-1, heads, hidden_size_a])
+#         alpha = fluid.layers.reduce_sum(feat_key * feat_query, dim=-1)
+
+#         return {'dst_node_feat': dst_feat['node_feat'],
+#                 'src_node_feat': src_feat['node_feat'],
+#                 'feat_value': src_feat['feat_value'],
+#                 'alpha': alpha,
+#                 'feat_gate': src_feat['feat_gate']}
+    
+#     # recv function
+#     def recv_func(message):
+#         dst_feat = message['dst_node_feat'] # feature of dst nodes on each edge
+#         src_feat = message['src_node_feat'] # feature of src nodes on each edge
+#         x = fluid.layers.sequence_pool(dst_feat, 'average') # feature of center nodes
+#         z = fluid.layers.sequence_pool(src_feat, 'average') # mean feature of neighbors
+
+#         # compute gate
+#         feat_gate = message['feat_gate']
+#         g_max = fluid.layers.sequence_pool(feat_gate, 'max')
+#         g = fluid.layers.concat([x, g_max, z], axis=1)
+#         g = fluid.layers.fc(g, heads, bias_attr=False, act='sigmoid')
+
+#         # softmax of attention coefficient
+#         alpha = message['alpha']
+#         alpha = paddle_helper.sequence_softmax(alpha)
+
+#         feat_value = message['feat_value']
+#         old = feat_value
+#         feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v])
+#         feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0)
+#         feat_value = fluid.layers.reshape(feat_value, [-1, heads * hidden_size_v])
+#         feat_value = fluid.layers.lod_reset(feat_value, old)
+        
+#         feat_value = fluid.layers.sequence_pool(feat_value, 'sum')
+#         feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v])
+#         output = fluid.layers.elementwise_mul(feat_value, g, axis=0)
+#         output = fluid.layers.reshape(output, [-1, heads*hidden_size_v])
+#         output = fluid.layers.concat([x, output], axis=1)
+
+#         return output
+    
+#     # project the feature of nodes into new vector spaces
+#     feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
+#                     param_attr=fluid.ParamAttr(name=name + '_project_key'))
+#     feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False,
+#                     param_attr=fluid.ParamAttr(name=name + '_project_value'))
+#     feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
+#                     param_attr=fluid.ParamAttr(name=name + '_project_query'))
+#     feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False,
+#                     param_attr=fluid.ParamAttr(name=name + '_project_gate'))
+    
+#     # send stage
+#     msg = gw.send(send_func, nfeat_list=[('node_feat', feature),
+#                 ('feat_key', feat_key), ('feat_value', feat_value),
+#                 ('feat_query', feat_query), ('feat_gate', feat_gate)],
+#                 efeat_list=None,
+#                 )
+
+#     # recv stage
+#     output = gw.recv(msg, recv_func)
+    
+#     # output
+#     output = fluid.layers.fc(output, hidden_size_o, bias_attr=False,
+#                     param_attr=fluid.ParamAttr(name=name+'_project_output'))
+#     outout = fluid.layers.leaky_relu(output, alpha=0.1)
+#     output = fluid.layers.dropout(output, dropout_prob=0.1)
+
+#     return output
+
+def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o, heads, name):

-    # send function
    def send_func(src_feat, dst_feat, edge_feat):
+        # 计算每条边上的注意力分数
+        # E * (M * D1), 每个 dst 点都查询它的全部邻边的 src 点
        feat_query, feat_key = dst_feat['feat_query'], src_feat['feat_key']
+        # E * M * D1
+        old = feat_query
        feat_query = fluid.layers.reshape(feat_query, [-1, heads, hidden_size_a])
        feat_key = fluid.layers.reshape(feat_key, [-1, heads, hidden_size_a])
+        # E * M
        alpha = fluid.layers.reduce_sum(feat_key * feat_query, dim=-1)

        return {'dst_node_feat': dst_feat['node_feat'],
@@ -288,53 +358,75 @@ def GaAN(gw, feature, hidden_size_a, hidden_size_v, hidden_size_m, hidden_size_o
                'alpha': alpha,
                'feat_gate': src_feat['feat_gate']}

-    # send stage
-    message = gw.send(send_func, nfeat_list=[('node_feat', feature),
-                ('feat_key', feat_key), ('feat_value', feat_value),
-                ('feat_query', feat_query), ('feat_gate', feat_gate)],
-                efeat_list=None,
-                )
-
-    # recv function
    def recv_func(message):
-        dst_feat = message['dst_node_feat'] # feature of dst nodes on each edge
-        src_feat = message['src_node_feat'] # feature of src nodes on each edge
-        x = fluid.layers.sequence_pool(dst_feat, 'average') # feature of center nodes
-        z = fluid.layers.sequence_pool(src_feat, 'average') # mean feature of neighbors
-
-        # compute gate
+        # 每条边的终点的特征
+        dst_feat = message['dst_node_feat']
+        # 每条边的出发点的特征
+        src_feat = message['src_node_feat']
+        # 每个中心点自己的特征
+        x = fluid.layers.sequence_pool(dst_feat, 'average')
+        # 每个中心点的邻居的特征的平均值
+        z = fluid.layers.sequence_pool(src_feat, 'average')
+
+        # 计算 gate
        feat_gate = message['feat_gate']
        g_max = fluid.layers.sequence_pool(feat_gate, 'max')
        g = fluid.layers.concat([x, g_max, z], axis=1)
-        g = fluid.layers.fc(g, heads, bias_attr=False, act='sigmoid')
+        g = fluid.layers.fc(g, heads, bias_attr=False, act="sigmoid")

-        # softmax of attention coefficient
+        # softmax
        alpha = message['alpha']
-        alpha = paddle_helper.sequence_softmax(alpha)
+        alpha = paddle_helper.sequence_softmax(alpha) # E * M

-        feat_value = message['feat_value']
+        feat_value = message['feat_value'] # E * (M * D2)
        old = feat_value
-        feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v])
+        feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) # E * M * D2
        feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0)
-        feat_value = fluid.layers.reshape(feat_value, [-1, heads * hidden_size_v])
+        feat_value = fluid.layers.reshape(feat_value, [-1, heads*hidden_size_v]) # E * (M * D2)
        feat_value = fluid.layers.lod_reset(feat_value, old)

-        feat_value = fluid.layers.sequence_pool(feat_value, 'sum')
-        feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v])
+        feat_value = fluid.layers.sequence_pool(feat_value, 'sum') # N * (M * D2)
+
+        feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) # N * M * D2
+
        output = fluid.layers.elementwise_mul(feat_value, g, axis=0)
-        output = fluid.layers.reshape(output, [-1, heads*hidden_size_v])
+        output = fluid.layers.reshape(output, [-1, heads * hidden_size_v]) # N * (M * D2)
+
        output = fluid.layers.concat([x, output], axis=1)

        return output

-    # recv stage
-    output = gw.recv(message, recv_func)
+    # feature N * D
+
+    # 计算每个点自己需要发送出去的内容
+    # 投影后的特征向量
+    # N * (D1 * M)
+    feat_key = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
+                     param_attr=fluid.ParamAttr(name=name + '_project_key'))
+    # N * (D2 * M)
+    feat_value = fluid.layers.fc(feature, hidden_size_v * heads, bias_attr=False,
+                     param_attr=fluid.ParamAttr(name=name + '_project_value'))
+    # N * (D1 * M)
+    feat_query = fluid.layers.fc(feature, hidden_size_a * heads, bias_attr=False,
+                     param_attr=fluid.ParamAttr(name=name + '_project_query'))
+    # N * Dm
+    feat_gate = fluid.layers.fc(feature, hidden_size_m, bias_attr=False, 
+                                param_attr=fluid.ParamAttr(name=name + '_project_gate'))
+
+    # send 阶段

-    # output
+    message = gw.send(
+        send_func,
+        nfeat_list=[('node_feat', feature), ('feat_key', feat_key), ('feat_value', feat_value),
+                    ('feat_query', feat_query), ('feat_gate', feat_gate)],
+        efeat_list=None,
+    )
+
+    # 聚合邻居特征
+    output = gw.recv(message, recv_func)
    output = fluid.layers.fc(output, hidden_size_o, bias_attr=False,
-                    param_attr=fluid.ParamAttr(name=name+'_project_output'))
-    outout = fluid.layers.leaky_relu(output, alpha=0.1)
+                            param_attr=fluid.ParamAttr(name=name + '_project_output'))
+    output = fluid.layers.leaky_relu(output, alpha=0.1)
    output = fluid.layers.dropout(output, dropout_prob=0.1)

    return output
-