From 98b071f8fe41860468185255ae29321852b6e17e Mon Sep 17 00:00:00 2001
From: Webbley <liwb5@foxmail.com>
Date: Tue, 21 Apr 2020 17:11:50 +0800
Subject: [PATCH] update distribute_metapath2vec

---
 examples/distribute_metapath2vec/README.md   | 20 +++++++++++++++-----
 examples/distribute_metapath2vec/config.yaml |  2 +-
 examples/distribute_metapath2vec/walker.py   |  7 +++++--
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/examples/distribute_metapath2vec/README.md b/examples/distribute_metapath2vec/README.md
index de31aa3..715850e 100644
--- a/examples/distribute_metapath2vec/README.md
+++ b/examples/distribute_metapath2vec/README.md
@@ -2,17 +2,17 @@
 [metapath2vec](https://ericdongyx.github.io/papers/KDD17-dong-chawla-swami-metapath2vec.pdf) is a algorithm framework for representation learning in heterogeneous networks which contains multiple types of nodes and links. Given a heterogeneous graph, metapath2vec algorithm first generates meta-path-based random walks and then use skipgram model to train a language model. Based on PGL, we reproduce metapath2vec algorithm in distributed mode.
 
 
-## Datasets
+### Datasets
 DBLP: The dataset contains 14376 papers (P), 20 conferences (C), 14475 authors (A), and 8920 terms (T). There are 33791 nodes in this dataset.
 You can dowload datasets from [here](https://github.com/librahu/HIN-Datasets-for-Recommendation-and-Network-Embedding)
 
 We use the ```DBLP``` dataset for example. After downloading the dataset, put them, let's say, in ```./data/DBLP/``` .
 
-## Dependencies
+### Dependencies
 - paddlepaddle>=1.6
 - pgl>=1.0.0
 
-## How to run
+### How to run
 Before training, run the below command to do data preprocessing.
 ```sh
 python data_process.py --data_path ./data/DBLP  --output_path ./data/data_processed
@@ -30,11 +30,21 @@ python multi_class.py --dataset ./data/data_processed/author_label.txt --ckpt_pa
 
 ```
 
+### Model Selection
+Actually, There are 3 models in this example, they are ```metapath2vec```, ```metapath2vec++``` and ```multi_metapath2vec++```. You can select different models by modifying ```config.yaml```.
 
-## Hyperparameters
+In order to run ```metapath2vec++``` model, you can easily rewrite the hyper parameter of **neg_sample_type** to **m2v_plus**, then ```metapath2vec++``` model will be selected.
+
+```multi-metapath2vec++``` means that you are not only use a single metapath, instead, you can use several metapaths at the same time to train the model. For example, you might want to use ```c2p-p2a-a2p-p2c``` and  ```p2a-a2p``` simultaneously. Then you can rewrite the below hyper parameters in ```config.yaml```.
+- **neg_sample_type**: "m2v_plus"
+- **walk_mode**: "multi_m2v"
+- **meta_path**: "c2p-p2a-a2p-p2c;p2a-a2p"
+- **first_node_type**: "c;p"
+
+### Hyperparameters
 All the hyper parameters are saved in ```config.yaml``` file. So before training, you can open the config.yaml to modify the hyper parameters as you like.
 
-Some important hyper parameters in config.yaml:
+Some important hyper parameters in ```config.yaml```:
 - **edge_path**: the directory of graph data that you want to load
 - **lr**: learning rate
 - **neg_num**: number of negative samples.
diff --git a/examples/distribute_metapath2vec/config.yaml b/examples/distribute_metapath2vec/config.yaml
index 30506df..db3f38c 100644
--- a/examples/distribute_metapath2vec/config.yaml
+++ b/examples/distribute_metapath2vec/config.yaml
@@ -31,7 +31,7 @@ is_distributed: False
 # trainging config
 epochs: 10
 optimizer: "sgd"
-lr: 1.0
+lr: 0.1
 warm_start_from_dir: null
 walkpath_files: "None"
 train_files: "None"
diff --git a/examples/distribute_metapath2vec/walker.py b/examples/distribute_metapath2vec/walker.py
index db340f9..6503766 100644
--- a/examples/distribute_metapath2vec/walker.py
+++ b/examples/distribute_metapath2vec/walker.py
@@ -87,9 +87,12 @@ class NodeGenerator(object):
             idx = cc % num_n_type
             n_type = n_type_list[idx]
             try:
-                nodes = node_generators[n_type].next()
+                nodes = next(node_generators[n_type])
             except StopIteration as e:
-                log.info("exception when iteration")
+                log.info("node type of %s iteration finished in one epoch" %
+                         (n_type))
+                node_generators[n_type] = \
+                        self.graph.node_batch_iter(self.batch_size, n_type=n_type)
                 break
             yield (nodes, idx)
             cc += 1
-- 
GitLab