提交 0bdc0da9 编写于 作者: L liweibin

update examples

上级 35a9bda3
# PGL Examples for DGI
# PGL Examples for Deep Graph Infomax
[Deep Graph Infomax \(DGI\)](https://arxiv.org/abs/1809.10341) is a general approach for learning node representations within graph-structured data in an unsupervised manner. DGI relies on maximizing mutual information between patch representations and corresponding high-level summaries of graphs---both derived using established graph convolutional network architectures.
......
# PGL Examples for distributed deepwalk
[Deepwalk](https://arxiv.org/pdf/1403.6652.pdf) is an algorithmic framework for representational learning on graphs. Given any graph, it can learn continuous feature representations for the nodes, which can then be used for various downstream machine learning tasks. Based on PGL, we reproduce distributed deepwalk algorithms and reach the same level of indicators as the paper.
## Datasets
The datasets contain two networks: [BlogCatalog](http://socialcomputing.asu.edu/datasets/BlogCatalog3).
## Dependencies
......@@ -8,7 +9,9 @@ The datasets contain two networks: [BlogCatalog](http://socialcomputing.asu.edu/
## How to run
For examples, train deepwalk in distributed mode on cora dataset.
We adopt [PaddlePaddle Fleet](https://github.com/PaddlePaddle/Fleet) as our distributed training frameworks ```pgl_deepwalk.cfg``` is config file for deepwalk hyperparameter and ```local_config``` is a config file for parameter servers. By default, we have 2 pservers and 2 trainers. We can use ```cloud_run.sh``` to help you startup the parameter servers and model trainers.
For examples, train deepwalk in distributed mode on BlogCataLog dataset.
```sh
# train deepwalk in distributed mode.
sh cloud_run.sh
......
......@@ -55,3 +55,5 @@ python train.py --use_cuda --epoch 10 --graphsage_type graphsage_mean --sample_w
- samples_1: The max neighbors for the first hop neighbor sampling. (default: 25)
- samples_2: The max neighbors for the second hop neighbor sampling. (default: 10)
- hidden_size: The hidden size of the GraphSAGE models.
......@@ -89,8 +89,7 @@ def worker(batch_info, graph_wrapper, samples):
if len(start_nodes) == 0:
break
subgraph = graph.subgraph(
nodes=nodes, eid=eids, edges=[eid2edges[e] for e in eids])
subgraph = graph.subgraph(nodes=nodes, eid=eids, edges=[ eid2edges[e] for e in eids])
sub_node_index = subgraph.reindex_from_parrent_nodes(
batch_train_samples)
feed_dict = graph_wrapper.to_feed(subgraph)
......@@ -103,7 +102,8 @@ def worker(batch_info, graph_wrapper, samples):
return work
def multiprocess_graph_reader(graph_wrapper,
def multiprocess_graph_reader(
graph_wrapper,
samples,
node_index,
batch_size,
......@@ -138,7 +138,7 @@ def multiprocess_graph_reader(graph_wrapper,
reader_pool = []
for i in range(num_workers):
reader_pool.append(
worker(batch_info[block_size * i:block_size * (i + 1)],
worker(batch_info[block_size * i:block_size * (i + 1)],
graph_wrapper, samples))
multi_process_sample = mp_reader.multiprocess_reader(
reader_pool, use_pipe=True, queue_size=1000)
......@@ -146,3 +146,4 @@ def multiprocess_graph_reader(graph_wrapper,
return paddle.reader.buffered(r, 1000)
return reader()
......@@ -170,9 +170,7 @@ def main(args):
with fluid.program_guard(train_program, startup_program):
graph_wrapper = pgl.graph_wrapper.GraphWrapper(
"sub_graph",
fluid.CPUPlace(),
node_feat=[('feats', [None, 602], np.dtype('float32'))])
"sub_graph", fluid.CPUPlace(), node_feat=[('feats', [None, 602], np.dtype('float32'))])
model_loss, model_acc = build_graph_model(
graph_wrapper,
num_class=data["num_class"],
......
......@@ -36,7 +36,7 @@ For examples, use gpu to train LINE on Flickr dataset.
# multiclass task example
python line.py --use_cuda --order first_order --data_path ./data/flickr/ --save_dir ./checkpoints/model/
python multi_class.py --ckpt_path ./checkpoints/model/model_eopch_20 --percent 0.5
python multi_class.py --ckpt_path ./checkpoints/model/model_epoch_20 --percent 0.5
```
......
......@@ -42,6 +42,16 @@ def make_dir(path):
raise
def save_param(dirname, var_name_list):
"""save_param"""
if not os.path.exists(dirname):
os.makedirs(dirname)
for var_name in var_name_list:
var = fluid.global_scope().find_var(var_name)
var_tensor = var.get_tensor()
np.save(os.path.join(dirname, var_name + '.npy'), np.array(var_tensor))
def set_seed(seed):
"""Set global random seed.
"""
......@@ -153,9 +163,9 @@ def main(args):
# save parameters in every epoch
log.info("saving persistables parameters...")
fluid.io.save_persistables(exe,
os.path.join(args.save_dir, "model_epoch_%d"
% (epoch + 1)), main_program)
cur_save_path = os.path.join(args.save_dir,
"model_epoch_%d" % (epoch + 1))
save_param(cur_save_path, ['shared_w'])
if __name__ == '__main__':
......
......@@ -33,6 +33,15 @@ from pgl.utils.logger import log
from data_loader import FlickrDataset
def load_param(dirname, var_name_list):
"""load_param"""
for var_name in var_name_list:
var = fluid.global_scope().find_var(var_name)
var_tensor = var.get_tensor()
var_tmp = np.load(os.path.join(dirname, var_name + '.npy'))
var_tensor.set(var_tmp, fluid.CPUPlace())
def set_seed(seed):
"""Set global random seed.
"""
......@@ -200,12 +209,15 @@ def main(args):
return False
return os.path.exists(os.path.join(args.ckpt_path, var.name))
fluid.io.load_vars(
exe, args.ckpt_path, main_program=train_prog, predicate=existed_params)
log.info('loading pretrained parameters from npy')
load_param(args.ckpt_path, ['shared_w'])
step = 0
prev_time = time.time()
train_model['pyreader'].start()
final_macro_f1 = 0.0
final_micro_f1 = 0.0
while 1:
try:
train_loss_val, train_probs_val, train_labels_val, train_topk_val = exe.run(
......@@ -257,8 +269,13 @@ def main(args):
log.info("\t\tStep %d " % step + "Test Loss: %f " %
test_loss_val + "Test Macro F1: %f " % test_macro_f1 +
"Test Micro F1: %f " % test_micro_f1)
final_macro_f1 = max(test_macro_f1, final_macro_f1)
final_micro_f1 = max(test_micro_f1, final_micro_f1)
break
log.info("\nFinal test Macro F1: %f " % final_macro_f1 +
"Final test Micro F1: %f " % final_micro_f1)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LINE')
......@@ -268,7 +285,7 @@ if __name__ == '__main__':
default='./data/flickr/',
help='dataset for training')
parser.add_argument("--use_cuda", action='store_true', help="use_cuda")
parser.add_argument("--epochs", type=int, default=10)
parser.add_argument("--epochs", type=int, default=5)
parser.add_argument("--seed", type=int, default=1667)
parser.add_argument(
"--lr", type=float, default=0.025, help='learning rate')
......
......@@ -16,7 +16,8 @@ python node2vec.py --use_cuda --dataset BlogCatalog --save_path ./tmp/node2vec_B
python multi_class.py --use_cuda --ckpt_path ./tmp/node2vec_BlogCatalog/paddle_model --epoch 1000
# link prediction task example
python node2vec.py --use_cuda --dataset ArXiv --save_path ./tmp/node2vec_ArXiv --offline_learning --epoch 400
python node2vec.py --use_cuda --dataset ArXiv --save_path
./tmp/node2vec_ArXiv --offline_learning --epoch 10
python link_predict.py --use_cuda --ckpt_path ./tmp/node2vec_ArXiv/paddle_model --epoch 400
```
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
"""
classify.py
"""
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -11,7 +14,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
import paddle.fluid as fluid
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
"""
sklearn_classify.py
"""
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -36,7 +39,7 @@ def train_lr_l2_model(args, data):
test_size=0.2,
random_state=random_num + random_seed)
# use the one vs rest to train the lr model with l2
# use the one vs rest to train the lr model with l2
pred_test = []
for i in range(0, args.num_class):
y_train_relabel = np.where(y_train == i, 1, 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册