update examples

0bdc0da9 · liweibin · 35a9bda3 · 0bdc0da9 · 0bdc0da9 · 0bdc0da9
12 changed file
--- a/examples/dgi/README.md
+++ b/examples/dgi/README.md
-# PGL Examples for DGI
+# PGL Examples for Deep Graph Infomax 

 [Deep Graph Infomax \(DGI\)](https://arxiv.org/abs/1809.10341) is a general approach for learning node representations within graph-structured data in an unsupervised manner. DGI relies on maximizing mutual information between patch representations and corresponding high-level summaries of graphs---both derived using established graph convolutional network architectures.


--- a/examples/distribute_deepwalk/README.md
+++ b/examples/distribute_deepwalk/README.md
 # PGL Examples for distributed deepwalk
 [Deepwalk](https://arxiv.org/pdf/1403.6652.pdf) is an algorithmic framework for representational learning on graphs. Given any graph, it can learn continuous feature representations for the nodes, which can then be used for various downstream machine learning tasks. Based on PGL, we reproduce distributed deepwalk algorithms and reach the same level of indicators as the paper.
+
 ## Datasets
 The datasets contain two networks: [BlogCatalog](http://socialcomputing.asu.edu/datasets/BlogCatalog3). 
 ## Dependencies
@@ -8,7 +9,9 @@ The datasets contain two networks: [BlogCatalog](http://socialcomputing.asu.edu/

 ## How to run

-For examples, train deepwalk in distributed mode on cora dataset.
+We adopt [PaddlePaddle Fleet](https://github.com/PaddlePaddle/Fleet) as our distributed training frameworks ```pgl_deepwalk.cfg``` is config file for deepwalk hyperparameter and ```local_config``` is a config file for parameter servers. By default, we have 2 pservers and 2 trainers. We can use ```cloud_run.sh``` to help you startup the parameter servers and model trainers. 
+
+For examples, train deepwalk in distributed mode on BlogCataLog dataset.
 ```sh
 # train deepwalk in distributed mode.
 sh cloud_run.sh

--- a/examples/distribute_graphsage/README.md
+++ b/examples/distribute_graphsage/README.md
@@ -55,3 +55,5 @@ python train.py --use_cuda --epoch 10 --graphsage_type graphsage_mean --sample_w
 - samples_1: The max neighbors for the first hop neighbor sampling. (default: 25)
 - samples_2: The max neighbors for the second hop neighbor sampling. (default: 10)
 - hidden_size: The hidden size of the GraphSAGE models.
+
+
--- a/examples/distribute_graphsage/reader.py
+++ b/examples/distribute_graphsage/reader.py
@@ -89,8 +89,7 @@ def worker(batch_info, graph_wrapper, samples):
                if len(start_nodes) == 0:
                    break

-            subgraph = graph.subgraph(
-                nodes=nodes, eid=eids, edges=[eid2edges[e] for e in eids])
+            subgraph = graph.subgraph(nodes=nodes, eid=eids, edges=[ eid2edges[e] for e in eids])
            sub_node_index = subgraph.reindex_from_parrent_nodes(
                batch_train_samples)
            feed_dict = graph_wrapper.to_feed(subgraph)
@@ -103,7 +102,8 @@ def worker(batch_info, graph_wrapper, samples):
    return work


-def multiprocess_graph_reader(graph_wrapper,
+def multiprocess_graph_reader(
+                              graph_wrapper,
                              samples,
                              node_index,
                              batch_size,
@@ -138,7 +138,7 @@ def multiprocess_graph_reader(graph_wrapper,
        reader_pool = []
        for i in range(num_workers):
            reader_pool.append(
-                worker(batch_info[block_size * i:block_size * (i + 1)],
+                worker(batch_info[block_size * i:block_size * (i + 1)], 
                       graph_wrapper, samples))
        multi_process_sample = mp_reader.multiprocess_reader(
            reader_pool, use_pipe=True, queue_size=1000)
@@ -146,3 +146,4 @@ def multiprocess_graph_reader(graph_wrapper,
        return paddle.reader.buffered(r, 1000)

    return reader()
+
--- a/examples/distribute_graphsage/requirements.txt
+++ b/examples/distribute_graphsage/requirements.txt
 scipy
 redis==2.10.6
 redis-py-cluster==1.3.6
+
--- a/examples/distribute_graphsage/train.py
+++ b/examples/distribute_graphsage/train.py
@@ -170,9 +170,7 @@ def main(args):

    with fluid.program_guard(train_program, startup_program):
        graph_wrapper = pgl.graph_wrapper.GraphWrapper(
-            "sub_graph",
-            fluid.CPUPlace(),
-            node_feat=[('feats', [None, 602], np.dtype('float32'))])
+            "sub_graph", fluid.CPUPlace(), node_feat=[('feats', [None, 602], np.dtype('float32'))])
        model_loss, model_acc = build_graph_model(
            graph_wrapper,
            num_class=data["num_class"],

--- a/examples/line/README.md
+++ b/examples/line/README.md
@@ -36,7 +36,7 @@ For examples, use gpu to train LINE on Flickr dataset.
 # multiclass task example
 python line.py --use_cuda --order first_order --data_path ./data/flickr/ --save_dir ./checkpoints/model/

-python multi_class.py --ckpt_path ./checkpoints/model/model_eopch_20 --percent 0.5
+python multi_class.py --ckpt_path ./checkpoints/model/model_epoch_20 --percent 0.5

 ```


--- a/examples/line/line.py
+++ b/examples/line/line.py
@@ -42,6 +42,16 @@ def make_dir(path):
            raise


+def save_param(dirname, var_name_list):
+    """save_param"""
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+    for var_name in var_name_list:
+        var = fluid.global_scope().find_var(var_name)
+        var_tensor = var.get_tensor()
+        np.save(os.path.join(dirname, var_name + '.npy'), np.array(var_tensor))
+
+
 def set_seed(seed):
    """Set global random seed.
    """
@@ -153,9 +163,9 @@ def main(args):

        # save parameters in every epoch
        log.info("saving persistables parameters...")
-        fluid.io.save_persistables(exe,
-                                   os.path.join(args.save_dir, "model_epoch_%d"
-                                                % (epoch + 1)), main_program)
+        cur_save_path = os.path.join(args.save_dir,
+                                     "model_epoch_%d" % (epoch + 1))
+        save_param(cur_save_path, ['shared_w'])


 if __name__ == '__main__':

--- a/examples/line/multi_class.py
+++ b/examples/line/multi_class.py
@@ -33,6 +33,15 @@ from pgl.utils.logger import log
 from data_loader import FlickrDataset


+def load_param(dirname, var_name_list):
+    """load_param"""
+    for var_name in var_name_list:
+        var = fluid.global_scope().find_var(var_name)
+        var_tensor = var.get_tensor()
+        var_tmp = np.load(os.path.join(dirname, var_name + '.npy'))
+        var_tensor.set(var_tmp, fluid.CPUPlace())
+
+
 def set_seed(seed):
    """Set global random seed.
    """
@@ -200,12 +209,15 @@ def main(args):
            return False
        return os.path.exists(os.path.join(args.ckpt_path, var.name))

-    fluid.io.load_vars(
-        exe, args.ckpt_path, main_program=train_prog, predicate=existed_params)
+    log.info('loading pretrained parameters from npy')
+    load_param(args.ckpt_path, ['shared_w'])
+
    step = 0
    prev_time = time.time()
    train_model['pyreader'].start()

+    final_macro_f1 = 0.0
+    final_micro_f1 = 0.0
    while 1:
        try:
            train_loss_val, train_probs_val, train_labels_val, train_topk_val = exe.run(
@@ -257,8 +269,13 @@ def main(args):
                log.info("\t\tStep %d " % step + "Test Loss: %f " %
                         test_loss_val + "Test Macro F1: %f " % test_macro_f1 +
                         "Test Micro F1: %f " % test_micro_f1)
+                final_macro_f1 = max(test_macro_f1, final_macro_f1)
+                final_micro_f1 = max(test_micro_f1, final_micro_f1)
                break

+    log.info("\nFinal test Macro F1: %f " % final_macro_f1 +
+             "Final test Micro F1: %f " % final_micro_f1)
+

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='LINE')
@@ -268,7 +285,7 @@ if __name__ == '__main__':
        default='./data/flickr/',
        help='dataset for training')
    parser.add_argument("--use_cuda", action='store_true', help="use_cuda")
-    parser.add_argument("--epochs", type=int, default=10)
+    parser.add_argument("--epochs", type=int, default=5)
    parser.add_argument("--seed", type=int, default=1667)
    parser.add_argument(
        "--lr", type=float, default=0.025, help='learning rate')

--- a/examples/node2vec/README.md
+++ b/examples/node2vec/README.md
@@ -16,7 +16,8 @@ python node2vec.py --use_cuda --dataset BlogCatalog --save_path ./tmp/node2vec_B
 python multi_class.py --use_cuda --ckpt_path ./tmp/node2vec_BlogCatalog/paddle_model --epoch 1000

 # link prediction task example
-python node2vec.py --use_cuda --dataset ArXiv --save_path ./tmp/node2vec_ArXiv --offline_learning --epoch 400
+python node2vec.py --use_cuda --dataset ArXiv --save_path
+./tmp/node2vec_ArXiv --offline_learning --epoch 10

 python link_predict.py --use_cuda --ckpt_path ./tmp/node2vec_ArXiv/paddle_model --epoch 400
 ```

--- a/examples/strucvec/classify.py
+++ b/examples/strucvec/classify.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+"""
+classify.py
+"""
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +14,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import numpy as np
 import paddle
 import paddle.fluid as fluid

--- a/examples/strucvec/sklearn_classify.py
+++ b/examples/strucvec/sklearn_classify.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+"""
+sklearn_classify.py
+"""
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -36,7 +39,7 @@ def train_lr_l2_model(args, data):
            test_size=0.2,
            random_state=random_num + random_seed)

-        # use the one vs rest to train the lr model with l2
+        # use the one vs rest to train the lr model with l2 
        pred_test = []
        for i in range(0, args.num_class):
            y_train_relabel = np.where(y_train == i, 1, 0)