Merge pull request #4 from PaddlePaddle/develop

Develop

Merge pull request #4 from PaddlePaddle/develop
Develop
03cb3621 · Huang Zhengjie · GitHub · b505f7d4 · 2a8223af · 03cb3621
59 changed file
--- a/examples/GATNE/Dataset.py
+++ b/examples/GATNE/Dataset.py
@@ -21,7 +21,7 @@ import tqdm
 import numpy as np
 import logging
 import random
-from pgl.contrib import heter_graph
+from pgl import heter_graph
 import pickle as pkl

--- a/examples/GATNE/model.py
+++ b/examples/GATNE/model.py
@@ -21,7 +21,7 @@ import logging
 import paddle.fluid as fluid
 import paddle.fluid.layers as fl
-from pgl.contrib import heter_graph_wrapper
+from pgl import heter_graph_wrapper
 class GATNE(object):

--- a/examples/distribute_metapath2vec/README.md
+++ b/examples/distribute_metapath2vec/README.md
+# Distributed metapath2vec in PGL
+[metapath2vec](https://ericdongyx.github.io/papers/KDD17-dong-chawla-swami-metapath2vec.pdf) is a algorithm framework for representation learning in heterogeneous networks which contains multiple types of nodes and links. Given a heterogeneous graph, metapath2vec algorithm first generates meta-path-based random walks and then use skipgram model to train a language model. Based on PGL, we reproduce metapath2vec algorithm in distributed mode.
+## Datasets
+DBLP: The dataset contains 14376 papers (P), 20 conferences (C), 14475 authors (A), and 8920 terms (T). There are 33791 nodes in this dataset.
+You can dowload datasets from [here](https://github.com/librahu/HIN-Datasets-for-Recommendation-and-Network-Embedding)
+We use the ```DBLP``` dataset for example. After downloading the dataset, put them, let's say, in ```./data/DBLP/``` .
+## Dependencies
+- paddlepaddle>=1.6
+- pgl>=1.0.0
+## How to run
+Before training, run the below command to do data preprocessing.
+```sh
+python data_process.py --data_path ./data/DBLP  --output_path ./data/data_processed
+```
+We adopt [PaddlePaddle Fleet](https://github.com/PaddlePaddle/Fleet) as our distributed training frameworks. ```config.yaml``` is a configure file for metapath2vec hyperparameters and ```local_config``` is a configure file for parameter servers of PaddlePaddle. By default, we have 2 pservers and 2 trainers. One can use ```cloud_run.sh``` to help startup the parameter servers and model trainers. 
+For examples, train metapath2vec in distributed mode on DBLP dataset.
+```sh
+# train metapath2vec in distributed mode.
+sh cloud_run.sh
+# multiclass task example
+python multi_class.py --dataset ./data/data_processed/author_label.txt --ckpt_path ./checkpoints/2000 --num_nodes 33791
+```
+## Hyperparameters
+All the hyper parameters are saved in ```config.yaml``` file. So before training, you can open the config.yaml to modify the hyper parameters as you like.
+Some important hyper parameters in config.yaml:
+- **edge_path**: the directory of graph data that you want to load
+- **lr**: learning rate
+- **neg_num**: number of negative samples.
+- **num_walks**: number of walks started from each node
+- **walk_len**: walk length
+- **meta_path**: meta path scheme
--- a/examples/distribute_metapath2vec/cloud_run.sh
+++ b/examples/distribute_metapath2vec/cloud_run.sh
+#!/bin/bash 
+set -x
+mode=${1}
+source ./utils.sh
+unset http_proxy https_proxy
+source ./local_config
+if [ ! -d ${log_dir} ]; then
+    mkdir ${log_dir}
+fi 
+for((i=0;i<${PADDLE_PSERVERS_NUM};i++))
+do
+    echo "start ps server: ${i}"
+    echo $log_dir
+    TRAINING_ROLE="PSERVER" PADDLE_TRAINER_ID=${i} sh job.sh &> $log_dir/pserver.$i.log & 
+done
+sleep 10s 
+for((j=0;j<${PADDLE_TRAINERS_NUM};j++))
+do
+    echo "start ps work: ${j}"
+    TRAINING_ROLE="TRAINER" PADDLE_TRAINER_ID=${j} sh job.sh &> $log_dir/worker.$j.log &
+done
+tail -f $log_dir/worker.0.log
--- a/examples/distribute_metapath2vec/cluster_train.py
+++ b/examples/distribute_metapath2vec/cluster_train.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import time
+import os
+import math
+import numpy as np
+import paddle.fluid as F
+import paddle.fluid.layers as L
+from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
+from paddle.fluid.transpiler.distribute_transpiler import DistributeTranspilerConfig
+import paddle.fluid.incubate.fleet.base.role_maker as role_maker
+from pgl.utils.logger import log
+from model import Metapath2vecModel
+from graph import m2vGraph
+from utils import load_config
+from walker import multiprocess_data_generator
+def init_role():
+    # reset the place according to role of parameter server
+    training_role = os.getenv("TRAINING_ROLE", "TRAINER")
+    paddle_role = role_maker.Role.WORKER
+    place = F.CPUPlace()
+    if training_role == "PSERVER":
+        paddle_role = role_maker.Role.SERVER
+    # set the fleet runtime environment according to configure
+    ports = os.getenv("PADDLE_PORT", "6174").split(",")
+    pserver_ips = os.getenv("PADDLE_PSERVERS").split(",")  # ip,ip...
+    eplist = []
+    if len(ports) > 1:
+        # local debug mode, multi port
+        for port in ports:
+            eplist.append(':'.join([pserver_ips[0], port]))
+    else:
+        # distributed mode, multi ip
+        for ip in pserver_ips:
+            eplist.append(':'.join([ip, ports[0]]))
+    pserver_endpoints = eplist  # ip:port,ip:port...
+    worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0"))
+    trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
+    role = role_maker.UserDefinedRoleMaker(
+        current_id=trainer_id,
+        role=paddle_role,
+        worker_num=worker_num,
+        server_endpoints=pserver_endpoints)
+    fleet.init(role)
+def optimization(base_lr, loss, train_steps, optimizer='sgd'):
+    decayed_lr = L.learning_rate_scheduler.polynomial_decay(
+        learning_rate=base_lr,
+        decay_steps=train_steps,
+        end_learning_rate=0.0001 * base_lr,
+        power=1.0,
+        cycle=False)
+    if optimizer == 'sgd':
+        optimizer = F.optimizer.SGD(decayed_lr)
+    elif optimizer == 'adam':
+        optimizer = F.optimizer.Adam(decayed_lr, lazy_mode=True)
+    else:
+        raise ValueError
+    log.info('learning rate:%f' % (base_lr))
+    #create the DistributeTranspiler configure
+    config = DistributeTranspilerConfig()
+    config.sync_mode = False
+    #config.runtime_split_send_recv = False
+    config.slice_var_up = False
+    #create the distributed optimizer
+    optimizer = fleet.distributed_optimizer(optimizer, config)
+    optimizer.minimize(loss)
+def build_complied_prog(train_program, model_loss):
+    num_threads = int(os.getenv("CPU_NUM", 10))
+    trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
+    exec_strategy = F.ExecutionStrategy()
+    exec_strategy.num_threads = num_threads
+    #exec_strategy.use_experimental_executor = True
+    build_strategy = F.BuildStrategy()
+    build_strategy.enable_inplace = True
+    #build_strategy.memory_optimize = True
+    build_strategy.memory_optimize = False
+    build_strategy.remove_unnecessary_lock = False
+    if num_threads > 1:
+        build_strategy.reduce_strategy = F.BuildStrategy.ReduceStrategy.Reduce
+    compiled_prog = F.compiler.CompiledProgram(
+        train_program).with_data_parallel(loss_name=model_loss.name)
+    return compiled_prog
+def train_prog(exe, program, loss, node2vec_pyreader, args, train_steps):
+    trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
+    step = 0
+    if not os.path.exists(args.save_path):
+        os.makedirs(args.save_path)
+    while True:
+        try:
+            begin_time = time.time()
+            loss_val, = exe.run(program, fetch_list=[loss])
+            log.info("step %s: loss %.5f speed: %.5f s/step" %
+                     (step, np.mean(loss_val), time.time() - begin_time))
+            step += 1
+        except F.core.EOFException:
+            node2vec_pyreader.reset()
+        if step % args.steps_per_save == 0 or step == train_steps:
+            save_path = args.save_path
+            if trainer_id == 0:
+                model_path = os.path.join(save_path, "%s" % step)
+                fleet.save_persistables(exe, model_path)
+        if step == train_steps:
+            break
+def main(args):
+    log.info("start")
+    worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0"))
+    num_devices = int(os.getenv("CPU_NUM", 10))
+    model = Metapath2vecModel(config=args)
+    pyreader = model.pyreader
+    loss = model.forward()
+    # init fleet
+    init_role()
+    train_steps = math.ceil(args.num_nodes * args.epochs / args.batch_size /
+                            num_devices / worker_num)
+    log.info("Train step: %s" % train_steps)
+    real_batch_size = args.batch_size * args.walk_len * args.win_size
+    if args.optimizer == "sgd":
+        args.lr *= real_batch_size
+    optimization(args.lr, loss, train_steps, args.optimizer)
+    # init and run server or worker
+    if fleet.is_server():
+        fleet.init_server(args.warm_start_from_dir)
+        fleet.run_server()
+    if fleet.is_worker():
+        log.info("start init worker done")
+        fleet.init_worker()
+        #just the worker, load the sample
+        log.info("init worker done")
+        exe = F.Executor(F.CPUPlace())
+        exe.run(fleet.startup_program)
+        log.info("Startup done")
+        dataset = m2vGraph(args)
+        log.info("Build graph done.")
+        data_generator = multiprocess_data_generator(args, dataset)
+        cur_time = time.time()
+        for idx, _ in enumerate(data_generator()):
+            log.info("iter %s: %s s" % (idx, time.time() - cur_time))
+            cur_time = time.time()
+            if idx == 100:
+                break
+        pyreader.decorate_tensor_provider(data_generator)
+        pyreader.start()
+        compiled_prog = build_complied_prog(fleet.main_program, loss)
+        train_prog(exe, compiled_prog, loss, pyreader, args, train_steps)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='metapath2vec')
+    parser.add_argument("-c", "--config", type=str, default="./config.yaml")
+    args = parser.parse_args()
+    config = load_config(args.config)
+    log.info(config)
+    main(config)
--- a/examples/distribute_metapath2vec/config.yaml
+++ b/examples/distribute_metapath2vec/config.yaml
+# graph data config
+edge_path: "./data/data_processed"
+edge_files: "p2a:paper_author.txt,p2c:paper_conference.txt,p2t:paper_type.txt"
+node_types_file: "node_types.txt"
+num_nodes: 37791
+symmetry: True
+# skipgram pair data config 
+win_size: 5
+neg_num: 5
+# average; m2v_plus
+neg_sample_type: "average" 
+# random walk config
+# m2v; multi_m2v;
+walk_mode: "m2v" 
+meta_path: "c2p-p2a-a2p-p2c"
+first_node_type: "c"
+walk_len: 24
+batch_size: 4
+node_shuffle: True
+node_files: null
+num_sample_workers: 2
+# model config
+embed_dim: 64
+is_sparse: True
+# only use when num_nodes > 100,000,000, slower than noraml embedding
+is_distributed: False 
+# trainging config
+epochs: 10
+optimizer: "sgd"
+lr: 1.0
+warm_start_from_dir: null
+walkpath_files: "None"
+train_files: "None"
+steps_per_save: 1000
+save_path: "./checkpoints"
+log_dir: "./logs"
+CPU_NUM: 16
--- a/examples/distribute_metapath2vec/data_process.py
+++ b/examples/distribute_metapath2vec/data_process.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Data preprocessing for DBLP dataset"""
+import sys
+import os
+import argparse
+import numpy as np
+from collections import OrderedDict
+AUTHOR = 14475
+PAPER = 14376
+CONF = 20
+TYPE = 8920
+LABEL = 4
+def build_node_types(meta_node, outfile):
+    """build_node_types"""
+    nt_ori2new = {}
+    with open(outfile, 'w') as writer:
+        offset = 0
+        for node_type, num_nodes in meta_node.items():
+            ori_id2new_id = {}
+            for i in range(num_nodes):
+                writer.write("%d\t%s\n" % (offset + i, node_type))
+                ori_id2new_id[i + 1] = offset + i
+            nt_ori2new[node_type] = ori_id2new_id
+            offset += num_nodes
+    return nt_ori2new
+def remapping_index(args, src_dict, dst_dict, ori_file, new_file):
+    """remapping_index"""
+    ori_file = os.path.join(args.data_path, ori_file)
+    new_file = os.path.join(args.output_path, new_file)
+    with open(ori_file, 'r') as reader, open(new_file, 'w') as writer:
+        for line in reader:
+            slots = line.strip().split()
+            s = int(slots[0])
+            d = int(slots[1])
+            new_s = src_dict[s]
+            new_d = dst_dict[d]
+            writer.write("%d\t%d\n" % (new_s, new_d))
+def author_label(args, ori_id2pgl_id, ori_file, real_file, new_file):
+    """author_label"""
+    ori_file = os.path.join(args.data_path, ori_file)
+    real_file = os.path.join(args.data_path, real_file)
+    new_file = os.path.join(args.output_path, new_file)
+    real_id2pgl_id = {}
+    with open(ori_file, 'r') as reader:
+        for line in reader:
+            slots = line.strip().split()
+            ori_id = int(slots[0])
+            real_id = int(slots[1])
+            pgl_id = ori_id2pgl_id[ori_id]
+            real_id2pgl_id[real_id] = pgl_id
+    with open(real_file, 'r') as reader, open(new_file, 'w') as writer:
+        for line in reader:
+            slots = line.strip().split()
+            real_id = int(slots[0])
+            label = int(slots[1])
+            pgl_id = real_id2pgl_id[real_id]
+            writer.write("%d\t%d\n" % (pgl_id, label))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='DBLP data preprocessing')
+    parser.add_argument(
+        '--data_path',
+        default=None,
+        type=str,
+        help='original data path(default: None)')
+    parser.add_argument(
+        '--output_path',
+        default=None,
+        type=str,
+        help='output path(default: None)')
+    args = parser.parse_args()
+    meta_node = OrderedDict()
+    meta_node['a'] = AUTHOR
+    meta_node['p'] = PAPER
+    meta_node['c'] = CONF
+    meta_node['t'] = TYPE
+    if not os.path.exists(args.output_path):
+        os.makedirs(args.output_path)
+    node_types_file = os.path.join(args.output_path, "node_types.txt")
+    nt_ori2new = build_node_types(meta_node, node_types_file)
+    remapping_index(args, nt_ori2new['p'], nt_ori2new['a'], 'paper_author.dat',
+                    'paper_author.txt')
+    remapping_index(args, nt_ori2new['p'], nt_ori2new['c'],
+                    'paper_conference.dat', 'paper_conference.txt')
+    remapping_index(args, nt_ori2new['p'], nt_ori2new['t'], 'paper_type.dat',
+                    'paper_type.txt')
+    author_label(args, nt_ori2new['a'], 'author_map_id.dat',
+                 'author_label.dat', 'author_label.txt')
--- a/examples/distribute_metapath2vec/graph.py
+++ b/examples/distribute_metapath2vec/graph.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import sys
+import os
+import numpy as np
+import pickle as pkl
+import tqdm
+import time
+import random
+from pgl.utils.logger import log
+from pgl import heter_graph
+class m2vGraph(object):
+    """Implemetation of graph in order to sample metapath random walk.
+    """
+    def __init__(self, config):
+        self.edge_path = config.edge_path
+        self.num_nodes = config.num_nodes
+        self.symmetry = config.symmetry
+        edge_files = config.edge_files
+        node_types_file = config.node_types_file
+        self.edge_file_list = []
+        for pair in edge_files.split(','):
+            e_type, filename = pair.split(':')
+            filename = os.path.join(self.edge_path, filename)
+            self.edge_file_list.append((e_type, filename))
+        self.node_types_file = os.path.join(self.edge_path, node_types_file)
+        self.build_graph()
+    def build_graph(self):
+        """Build pgl heterogeneous graph.
+        """
+        edges_by_types = {}
+        npy = self.edge_file_list[0][1] + ".npy"
+        if os.path.exists(npy):
+            log.info("load data from numpy file")
+            for pair in self.edge_file_list:
+                edges_by_types[pair[0]] = np.load(pair[1] + ".npy")
+        else:
+            log.info("load data from txt file")
+            for pair in self.edge_file_list:
+                edges_by_types[pair[0]] = self.load_edges(pair[1])
+                #  np.save(pair[1] + ".npy", edges_by_types[pair[0]])
+        for e_type, edges in edges_by_types.items():
+            log.info(["number of %s edges: " % e_type, len(edges)])
+        if self.symmetry:
+            tmp = {}
+            for key, edges in edges_by_types.items():
+                n_list = key.split('2')
+                re_key = n_list[1] + '2' + n_list[0]
+                tmp[re_key] = edges_by_types[key][:, [1, 0]]
+            edges_by_types.update(tmp)
+        log.info(["finished loadding symmetry edges."])
+        node_types = self.load_node_types(self.node_types_file)
+        assert len(node_types) == self.num_nodes, \
+                "num_nodes should be equal to the length of node_types"
+        log.info(["number of nodes: ", len(node_types)])
+        node_features = {
+            'index': np.array([i for i in range(self.num_nodes)]).reshape(
+                -1, 1).astype(np.int64)
+        }
+        self.graph = heter_graph.HeterGraph(
+            num_nodes=self.num_nodes,
+            edges=edges_by_types,
+            node_types=node_types,
+            node_feat=node_features)
+    def load_edges(self, file_, symmetry=False):
+        """Load edges from file.
+        """
+        edges = []
+        with open(file_, 'r') as reader:
+            for line in reader:
+                items = line.strip().split()
+                src, dst = int(items[0]), int(items[1])
+                edges.append((src, dst))
+                if symmetry:
+                    edges.append((dst, src))
+            edges = np.array(list(set(edges)), dtype=np.int64)
+            #  edges = list(set(edges))
+        return edges
+    def load_node_types(self, file_):
+        """Load node types 
+        """
+        node_types = []
+        log.info("node_types_file name: %s" % file_)
+        with open(file_, 'r') as reader:
+            for line in reader:
+                items = line.strip().split()
+                node_id = int(items[0])
+                n_type = items[1]
+                node_types.append((node_id, n_type))
+        return node_types
--- a/examples/distribute_metapath2vec/job.sh
+++ b/examples/distribute_metapath2vec/job.sh
+#!/bin/bash
+set -x
+source ./utils.sh
+export CPU_NUM=$CPU_NUM
+export FLAGS_rpc_deadline=3000000 
+export FLAGS_communicator_send_queue_size=1
+export FLAGS_communicator_min_send_grad_num_before_recv=0
+export FLAGS_communicator_max_merge_var_num=1
+export FLAGS_communicator_merge_sparse_grad=0
+python -u cluster_train.py -c config.yaml
--- a/examples/distribute_metapath2vec/local_config
+++ b/examples/distribute_metapath2vec/local_config
+#!/bin/bash 
+export PADDLE_TRAINERS_NUM=2
+export PADDLE_PSERVERS_NUM=2
+export PADDLE_PORT=6184,6185
+export PADDLE_PSERVERS="127.0.0.1"
--- a/examples/distribute_metapath2vec/model.py
+++ b/examples/distribute_metapath2vec/model.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+    metapath2vec model.
+"""
+from __future__ import division
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+import math
+import paddle.fluid.layers as L
+import paddle.fluid as F
+def distributed_embedding(input,
+                          dict_size,
+                          hidden_size,
+                          initializer,
+                          name,
+                          num_part=16,
+                          is_sparse=False,
+                          learning_rate=1.0):
+    _part_size = hidden_size // num_part
+    if hidden_size % num_part != 0:
+        _part_size += 1
+    output_embedding = []
+    p_num = 0
+    while hidden_size > 0:
+        _part_size = min(_part_size, hidden_size)
+        hidden_size -= _part_size
+        print("part", p_num, "size=", (dict_size, _part_size))
+        part_embedding = L.embedding(
+            input=input,
+            size=(dict_size, int(_part_size)),
+            is_sparse=is_sparse,
+            is_distributed=False,
+            param_attr=F.ParamAttr(
+                name=name + '_part%s' % p_num,
+                initializer=initializer,
+                learning_rate=learning_rate))
+        p_num += 1
+        output_embedding.append(part_embedding)
+    return L.concat(output_embedding, -1)
+class Metapath2vecModel(object):
+    def __init__(self, config, embedding_lr=1.0):
+        self.config = config
+        self.neg_num = self.config.neg_num
+        self.num_nodes = self.config.num_nodes
+        self.embed_dim = self.config.embed_dim
+        self.is_sparse = self.config.is_sparse
+        self.is_distributed = self.config.is_distributed
+        self.embedding_lr = embedding_lr
+        self.pyreader = L.py_reader(
+            capacity=70,
+            shapes=[[-1, 1, 1], [-1, self.neg_num + 1, 1]],
+            dtypes=['int64', 'int64'],
+            lod_levels=[0, 0],
+            name='train',
+            use_double_buffer=True)
+        bound = 1. / math.sqrt(self.embed_dim)
+        self.embed_init = F.initializer.Uniform(low=-bound, high=bound)
+        self.loss = None
+        max_hidden_size = int(math.pow(2, 31) / 4 / self.num_nodes)
+        self.num_part = int(math.ceil(1. * self.embed_dim / max_hidden_size))
+    def forward(self):
+        src, dsts = L.read_file(self.pyreader)
+        if self.is_sparse:
+            src = L.reshape(src, [-1, 1])
+            dsts = L.reshape(dsts, [-1, 1])
+        if self.num_part is not None and self.num_part != 1 and not self.is_distributed:
+            src_embed = distributed_embedding(
+                src,
+                self.num_nodes,
+                self.embed_dim,
+                self.embed_init,
+                "weight",
+                self.num_part,
+                self.is_sparse,
+                learning_rate=self.embedding_lr)
+            dsts_embed = distributed_embedding(
+                dsts,
+                self.num_nodes,
+                self.embed_dim,
+                self.embed_init,
+                "weight",
+                self.num_part,
+                self.is_sparse,
+                learning_rate=self.embedding_lr)
+        else:
+            src_embed = L.embedding(
+                src, (self.num_nodes, self.embed_dim),
+                self.is_sparse,
+                self.is_distributed,
+                param_attr=F.ParamAttr(
+                    name="weight",
+                    learning_rate=self.embedding_lr,
+                    initializer=self.embed_init))
+            dsts_embed = L.embedding(
+                dsts, (self.num_nodes, self.embed_dim),
+                self.is_sparse,
+                self.is_distributed,
+                param_attr=F.ParamAttr(
+                    name="weight",
+                    learning_rate=self.embedding_lr,
+                    initializer=self.embed_init))
+        if self.is_sparse:
+            src_embed = L.reshape(src_embed, [-1, 1, self.embed_dim])
+            dsts_embed = L.reshape(dsts_embed,
+                                   [-1, self.neg_num + 1, self.embed_dim])
+        logits = L.matmul(
+            src_embed, dsts_embed,
+            transpose_y=True)  # [batch_size, 1, neg_num+1]
+        pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
+                                                    "float32", 1)
+        neg_label = L.fill_constant_batch_size_like(
+            logits, [-1, 1, self.neg_num], "float32", 0)
+        label = L.concat([pos_label, neg_label], -1)
+        pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
+                                                     "float32", self.neg_num)
+        neg_weight = L.fill_constant_batch_size_like(
+            logits, [-1, 1, self.neg_num], "float32", 1)
+        weight = L.concat([pos_weight, neg_weight], -1)
+        weight.stop_gradient = True
+        label.stop_gradient = True
+        loss = L.sigmoid_cross_entropy_with_logits(logits, label)
+        loss = loss * weight
+        loss = L.reduce_mean(loss)
+        loss = loss * ((self.neg_num + 1) / 2 / self.neg_num)
+        loss.persistable = True
+        self.loss = loss
+        return loss
--- a/examples/distribute_metapath2vec/mp_reader.py
+++ b/examples/distribute_metapath2vec/mp_reader.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Optimized Multiprocessing Reader for PaddlePaddle
+"""
+import multiprocessing
+import numpy as np
+import time
+import paddle.fluid as fluid
+import pyarrow
+def _serialize_serializable(obj):
+    """Serialize Feed Dict
+    """
+    return {"type": type(obj), "data": obj.__dict__}
+def _deserialize_serializable(obj):
+    """Deserialize Feed Dict
+    """
+    val = obj["type"].__new__(obj["type"])
+    val.__dict__.update(obj["data"])
+    return val
+context = pyarrow.default_serialization_context()
+context.register_type(
+    object,
+    "object",
+    custom_serializer=_serialize_serializable,
+    custom_deserializer=_deserialize_serializable)
+def serialize_data(data):
+    """serialize_data"""
+    return pyarrow.serialize(data, context=context).to_buffer().to_pybytes()
+def deserialize_data(data):
+    """deserialize_data"""
+    return pyarrow.deserialize(data, context=context)
+def multiprocess_reader(readers, use_pipe=True, queue_size=1000):
+    """
+    multiprocess_reader use python multi process to read data from readers
+    and then use multiprocess.Queue or multiprocess.Pipe to merge all
+    data. The process number is equal to the number of input readers, each
+    process call one reader.
+    Multiprocess.Queue require the rw access right to /dev/shm, some
+    platform does not support.
+    you need to create multiple readers first, these readers should be independent
+    to each other so that each process can work independently.
+    An example:
+    .. code-block:: python
+        reader0 = reader(["file01", "file02"])
+        reader1 = reader(["file11", "file12"])
+        reader1 = reader(["file21", "file22"])
+        reader = multiprocess_reader([reader0, reader1, reader2],
+            queue_size=100, use_pipe=False)
+    """
+    assert type(readers) is list and len(readers) > 0
+    def _read_into_queue(reader, queue):
+        """read_into_queue"""
+        for sample in reader():
+            if sample is None:
+                raise ValueError("sample has None")
+            queue.put(serialize_data(sample))
+        queue.put(serialize_data(None))
+    def queue_reader():
+        """queue_reader"""
+        queue = multiprocessing.Queue(queue_size)
+        for reader in readers:
+            p = multiprocessing.Process(
+                target=_read_into_queue, args=(reader, queue))
+            p.start()
+        reader_num = len(readers)
+        finish_num = 0
+        while finish_num < reader_num:
+            sample = deserialize_data(queue.get())
+            if sample is None:
+                finish_num += 1
+            else:
+                yield sample
+    def _read_into_pipe(reader, conn):
+        """read_into_pipe"""
+        for sample in reader():
+            if sample is None:
+                raise ValueError("sample has None!")
+            conn.send(serialize_data(sample))
+        conn.send(serialize_data(None))
+        conn.close()
+    def pipe_reader():
+        """pipe_reader"""
+        conns = []
+        for reader in readers:
+            parent_conn, child_conn = multiprocessing.Pipe()
+            conns.append(parent_conn)
+            p = multiprocessing.Process(
+                target=_read_into_pipe, args=(reader, child_conn))
+            p.start()
+        reader_num = len(readers)
+        finish_num = 0
+        conn_to_remove = []
+        finish_flag = np.zeros(len(conns), dtype="int32")
+        while finish_num < reader_num:
+            for conn_id, conn in enumerate(conns):
+                if finish_flag[conn_id] > 0:
+                    continue
+                buff = conn.recv()
+                now = time.time()
+                sample = deserialize_data(buff)
+                out = time.time() - now
+                if sample is None:
+                    finish_num += 1
+                    conn.close()
+                    finish_flag[conn_id] = 1
+                else:
+                    yield sample
+    if use_pipe:
+        return pipe_reader
+    else:
+        return queue_reader
--- a/examples/distribute_metapath2vec/multi_class.py
+++ b/examples/distribute_metapath2vec/multi_class.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This file provides the multi class task for testing the embedding learned by metapath2vec model.
+"""
+import argparse
+import sys
+import os
+import tqdm
+import time
+import math
+import logging
+import random
+import pickle as pkl
+import numpy as np
+import sklearn.metrics
+from sklearn.metrics import f1_score
+import pgl
+import paddle.fluid as fluid
+import paddle.fluid.layers as fl
+def load_data(file_):
+    """Load data for node classification.
+    """
+    words_label = []
+    line_count = 0
+    with open(file_, 'r') as reader:
+        for line in reader:
+            line_count += 1
+            tokens = line.strip().split()
+            word, label = int(tokens[0]), int(tokens[1]) - 1
+            words_label.append((word, label))
+    words_label = np.array(words_label, dtype=np.int64)
+    np.random.shuffle(words_label)
+    logging.info('%d/%d word_label pairs have been loaded' %
+                 (len(words_label), line_count))
+    return words_label
+def node_classify_model(config):
+    """Build node classify model.
+    """
+    nodes = fl.data('nodes', shape=[None, 1], dtype='int64')
+    labels = fl.data('labels', shape=[None, 1], dtype='int64')
+    embed_nodes = fl.embedding(
+        input=nodes,
+        size=[config.num_nodes, config.embed_dim],
+        param_attr=fluid.ParamAttr(name='weight'))
+    embed_nodes.stop_gradient = True
+    probs = fl.fc(input=embed_nodes, size=config.num_labels, act='softmax')
+    predict = fl.argmax(probs, axis=-1)
+    loss = fl.cross_entropy(input=probs, label=labels)
+    loss = fl.reduce_mean(loss)
+    return {
+        'loss': loss,
+        'probs': probs,
+        'predict': predict,
+        'labels': labels,
+    }
+def run_epoch(exe, prog, model, feed_dict, lr):
+    """Run training process of every epoch.
+    """
+    if lr is None:
+        loss, predict = exe.run(prog,
+                                feed=feed_dict,
+                                fetch_list=[model['loss'], model['predict']],
+                                return_numpy=True)
+        lr_ = 0
+    else:
+        loss, predict, lr_ = exe.run(
+            prog,
+            feed=feed_dict,
+            fetch_list=[model['loss'], model['predict'], lr],
+            return_numpy=True)
+    macro_f1 = f1_score(feed_dict['labels'], predict, average="macro")
+    micro_f1 = f1_score(feed_dict['labels'], predict, average="micro")
+    return {
+        'loss': loss,
+        'pred': predict,
+        'lr': lr_,
+        'macro_f1': macro_f1,
+        'micro_f1': micro_f1
+    }
+def main(args):
+    """main function for training node classification task.
+    """
+    words_label = load_data(args.dataset)
+    # split data for training and testing
+    split_position = int(words_label.shape[0] * args.train_percent)
+    train_words_label = words_label[0:split_position, :]
+    test_words_label = words_label[split_position:, :]
+    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
+    train_prog = fluid.Program()
+    test_prog = fluid.Program()
+    startup_prog = fluid.Program()
+    with fluid.program_guard(train_prog, startup_prog):
+        with fluid.unique_name.guard():
+            model = node_classify_model(args)
+    test_prog = train_prog.clone(for_test=True)
+    with fluid.program_guard(train_prog, startup_prog):
+        lr = fl.polynomial_decay(args.lr, 1000, 0.001)
+        adam = fluid.optimizer.Adam(lr)
+        adam.minimize(model['loss'])
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+    def existed_params(var):
+        if not isinstance(var, fluid.framework.Parameter):
+            return False
+        return os.path.exists(os.path.join(args.ckpt_path, var.name))
+    fluid.io.load_vars(
+        exe, args.ckpt_path, main_program=train_prog, predicate=existed_params)
+    #  load_param(args.ckpt_path, ['content'])
+    feed_dict = {}
+    X = train_words_label[:, 0].reshape(-1, 1)
+    labels = train_words_label[:, 1].reshape(-1, 1)
+    logging.info('%d/%d data to train' %
+                 (labels.shape[0], words_label.shape[0]))
+    test_feed_dict = {}
+    test_X = test_words_label[:, 0].reshape(-1, 1)
+    test_labels = test_words_label[:, 1].reshape(-1, 1)
+    logging.info('%d/%d data to test' %
+                 (test_labels.shape[0], words_label.shape[0]))
+    for epoch in range(args.epochs):
+        feed_dict['nodes'] = X
+        feed_dict['labels'] = labels
+        train_result = run_epoch(exe, train_prog, model, feed_dict, lr)
+        test_feed_dict['nodes'] = test_X
+        test_feed_dict['labels'] = test_labels
+        test_result = run_epoch(exe, test_prog, model, test_feed_dict, lr=None)
+        logging.info(
+            'epoch %d | lr %.4f | train_loss %.5f | train_macro_F1 %.4f | train_micro_F1 %.4f | test_loss %.5f | test_macro_F1 %.4f | test_micro_F1 %.4f'
+            % (epoch, train_result['lr'], train_result['loss'],
+               train_result['macro_f1'], train_result['micro_f1'],
+               test_result['loss'], test_result['macro_f1'],
+               test_result['micro_f1']))
+    logging.info(
+        'final_test_macro_f1 score: %.4f | final_test_micro_f1 score: %.4f' %
+        (test_result['macro_f1'], test_result['micro_f1']))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='multi_class')
+    parser.add_argument(
+        '--dataset',
+        default=None,
+        type=str,
+        help='training and testing data file(default: None)')
+    parser.add_argument(
+        '--ckpt_path', default=None, type=str, help='task name(default: None)')
+    parser.add_argument("--use_cuda", action='store_true', help="use_cuda")
+    parser.add_argument(
+        '--train_percent',
+        default=0.5,
+        type=float,
+        help='train_percent(default: 0.5)')
+    parser.add_argument(
+        '--num_labels',
+        default=4,
+        type=int,
+        help='number of labels(default: 4)')
+    parser.add_argument(
+        '--epochs',
+        default=100,
+        type=int,
+        help='number of epochs for training(default: 100)')
+    parser.add_argument(
+        '--lr',
+        default=0.025,
+        type=float,
+        help='learning rate(default: 0.025)')
+    parser.add_argument(
+        '--num_nodes', default=0, type=int, help='number of nodes')
+    parser.add_argument(
+        '--embed_dim',
+        default=64,
+        type=int,
+        help='dimension of embedding(default: 64)')
+    args = parser.parse_args()
+    log_format = '%(asctime)s-%(levelname)s-%(name)s: %(message)s'
+    logging.basicConfig(level='INFO', format=log_format)
+    main(args)
--- a/examples/distribute_metapath2vec/utils.py
+++ b/examples/distribute_metapath2vec/utils.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of some helper functions"""
+from __future__ import division
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+import os
+import time
+import yaml
+import numpy as np
+from pgl.utils.logger import log
+class AttrDict(dict):
+    """Attr dict
+    """
+    def __init__(self, d):
+        self.dict = d
+    def __getattr__(self, attr):
+        value = self.dict[attr]
+        if isinstance(value, dict):
+            return AttrDict(value)
+        else:
+            return value
+    def __str__(self):
+        return str(self.dict)
+def load_config(config_file):
+    """Load config file"""
+    with open(config_file) as f:
+        if hasattr(yaml, 'FullLoader'):
+            config = yaml.load(f, Loader=yaml.FullLoader)
+        else:
+            config = yaml.load(f)
+    return AttrDict(config)
--- a/examples/distribute_metapath2vec/utils.sh
+++ b/examples/distribute_metapath2vec/utils.sh
+# parse yaml file 
+function parse_yaml {
+   local prefix=$2
+   local s='[[:space:]]*' w='[a-zA-Z0-9_]*' fs=$(echo @|tr @ '\034')
+   sed -ne "s|^\($s\):|\1|" \
+        -e "s|^\($s\)\($w\)$s:$s[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p" \
+        -e "s|^\($s\)\($w\)$s:$s\(.*\)$s\$|\1$fs\2$fs\3|p"  $1 |
+   awk -F$fs '{
+      indent = length($1)/2;
+      vname[indent] = $2;
+      for (i in vname) {if (i > indent) {delete vname[i]}}
+      if (length($3) > 0) {
+         vn=""; for (i=0; i<indent; i++) {vn=(vn)(vname[i])("_")}
+         printf("%s%s%s=\"%s\"\n", "'$prefix'",vn, $2, $3);
+      }
+   }'
+}
+eval $(parse_yaml "$(dirname "${BASH_SOURCE}")"/config.yaml)
--- a/examples/distribute_metapath2vec/walker.py
+++ b/examples/distribute_metapath2vec/walker.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""doc
+"""
+from __future__ import division
+from __future__ import absolute_import
+from __future__ import print_function
+import time
+import io
+import os
+import numpy as np
+import random
+from pgl.utils.logger import log
+from pgl.sample import metapath_randomwalk
+from pgl.graph_kernel import skip_gram_gen_pair
+from pgl.graph_kernel import alias_sample_build_table
+from utils import load_config
+from graph import m2vGraph
+import mp_reader
+class NodeGenerator(object):
+    """Node generator"""
+    def __init__(self, config, graph):
+        self.config = config
+        self.graph = graph
+        self.batch_size = self.config.batch_size
+        self.shuffle = self.config.node_shuffle
+        self.node_files = self.config.node_files
+        self.first_node_type = self.config.first_node_type
+        self.walk_mode = self.config.walk_mode
+    def __call__(self):
+        if self.walk_mode == "m2v":
+            generator = self.m2v_node_generate
+            log.info("node gen mode is : %s" % (self.walk_mode))
+        elif self.walk_mode == "multi_m2v":
+            generator = self.multi_m2v_node_generate
+            log.info("node gen mode is : %s" % (self.walk_mode))
+        elif self.walk_mode == "files":
+            generator = self.files_node_generate
+            log.info("node gen mode is : %s" % (self.walk_mode))
+        else:
+            generator = self.m2v_node_generate
+            log.info("node gen mode is : %s" % (self.walk_mode))
+        while True:
+            for nodes in generator():
+                yield nodes
+    def m2v_node_generate(self):
+        """m2v_node_generate"""
+        for nodes in self.graph.node_batch_iter(
+                batch_size=self.batch_size,
+                n_type=self.first_node_type,
+                shuffle=self.shuffle):
+            yield nodes
+    def multi_m2v_node_generate(self):
+        """multi_m2v_node_generate"""
+        n_type_list = self.first_node_type.split(';')
+        num_n_type = len(n_type_list)
+        node_types = np.unique(self.graph.node_types).tolist()
+        node_generators = {}
+        for n_type in node_types:
+            node_generators[n_type] = \
+                    self.graph.node_batch_iter(self.batch_size, n_type=n_type)
+        cc = 0
+        while True:
+            idx = cc % num_n_type
+            n_type = n_type_list[idx]
+            try:
+                nodes = node_generators[n_type].next()
+            except StopIteration as e:
+                log.info("exception when iteration")
+                break
+            yield (nodes, idx)
+            cc += 1
+    def files_node_generate(self):
+        """files_node_generate"""
+        nodes = []
+        for filename in self.node_files:
+            with io.open(filename) as inf:
+                for line in inf:
+                    node = int(line.strip('\n\t'))
+                    nodes.append(node)
+                    if len(nodes) == self.batch_size:
+                        yield nodes
+                        nodes = []
+        if len(nodes):
+            yield nodes
+class WalkGenerator(object):
+    """Walk generator"""
+    def __init__(self, config, dataset):
+        self.config = config
+        self.dataset = dataset
+        self.graph = self.dataset.graph
+        self.walk_mode = self.config.walk_mode
+        self.node_generator = NodeGenerator(self.config, self.graph)
+        if self.walk_mode == "multi_m2v":
+            num_path = len(self.config.meta_path.split(';'))
+            num_first_node_type = len(self.config.first_node_type.split(';'))
+            assert num_first_node_type == num_path, \
+                "In [multi_m2v] walk_mode, the number of metapath should be the same \
+                as the number of first_node_type"
+            assert num_path > 1, "In [multi_m2v] walk_mode, the number of metapath\
+                    should be greater than 1"
+    def __call__(self):
+        np.random.seed(os.getpid())
+        if self.walk_mode == "m2v":
+            walk_generator = self.m2v_walk
+            log.info("walk mode is : %s" % (self.walk_mode))
+        elif self.walk_mode == "multi_m2v":
+            walk_generator = self.multi_m2v_walk
+            log.info("walk mode is : %s" % (self.walk_mode))
+        else:
+            raise ValueError("walk_mode [%s] is not matched" % self.walk_mode)
+        for walks in walk_generator():
+            yield walks
+    def m2v_walk(self):
+        """Metapath2vec walker"""
+        for nodes in self.node_generator():
+            walks = metapath_randomwalk(
+                self.graph, nodes, self.config.meta_path, self.config.walk_len)
+            yield walks
+    def multi_m2v_walk(self):
+        """Multi metapath2vec walker"""
+        meta_paths = self.config.meta_path.split(';')
+        for nodes, idx in self.node_generator():
+            walks = metapath_randomwalk(self.graph, nodes, meta_paths[idx],
+                                        self.config.walk_len)
+            yield walks
+class DataGenerator(object):
+    def __init__(self, config, dataset):
+        self.config = config
+        self.dataset = dataset
+        self.graph = self.dataset.graph
+        self.walk_generator = WalkGenerator(self.config, self.dataset)
+    def __call__(self):
+        generator = self.pair_generate
+        for src, pos, negs in generator():
+            dst = np.concatenate([pos, negs], 1)
+            yield src, dst
+    def pair_generate(self):
+        for walks in self.walk_generator():
+            try:
+                src_list, pos_list = [], []
+                for walk in walks:
+                    s, p = skip_gram_gen_pair(walk, self.config.win_size)
+                    src_list.append(s), pos_list.append(p)
+                src = [s for x in src_list for s in x]
+                pos = [s for x in pos_list for s in x]
+                if len(src) == 0:
+                    continue
+                negs = self.negative_sample(
+                    src,
+                    pos,
+                    neg_num=self.config.neg_num,
+                    neg_sample_type=self.config.neg_sample_type)
+                src = np.array(src, dtype=np.int64).reshape(-1, 1, 1)
+                pos = np.array(pos, dtype=np.int64).reshape(-1, 1, 1)
+                yield src, pos, negs
+            except Exception as e:
+                log.exception(e)
+    def negative_sample(self, src, pos, neg_num, neg_sample_type):
+        if neg_sample_type == "average":
+            neg_sample_size = [len(pos), neg_num, 1]
+            negs = np.random.randint(
+                low=0, high=self.graph.num_nodes, size=neg_sample_size)
+        elif neg_sample_type == "m2v_plus":
+            negs = []
+            for s in src:
+                neg = self.graph.sample_nodes(
+                    sample_num=neg_num, n_type=self.graph.node_types[s])
+                negs.append(neg)
+            negs = np.vstack(negs).reshape(-1, neg_num, 1)
+        else:  # equal to "average"
+            neg_sample_size = [len(pos), neg_num, 1]
+            negs = np.random.randint(
+                low=0, high=self.graph.num_nodes, size=neg_sample_size)
+        negs = negs.astype(np.int64)
+        return negs
+def multiprocess_data_generator(config, dataset):
+    """Multiprocess data generator.
+    """
+    if config.num_sample_workers == 1:
+        data_generator = DataGenerator(config, dataset)
+    else:
+        pool = [
+            DataGenerator(config, dataset)
+            for i in range(config.num_sample_workers)
+        ]
+        data_generator = mp_reader.multiprocess_reader(
+            pool, use_pipe=True, queue_size=100)
+    return data_generator
+if __name__ == "__main__":
+    config_file = "./config.yaml"
+    config = load_config(config_file)
+    dataset = m2vGraph(config)
+    data_generator = multiprocess_data_generator(config, dataset)
+    start = time.time()
+    cc = 0
+    for src, dst in data_generator():
+        log.info(src.shape)
+        log.info("time: %.6f" % (time.time() - start))
+        start = time.time()
+        cc += 1
+        if cc == 100:
+            break
--- a/examples/graphsage/reader.py
+++ b/examples/graphsage/reader.py
@@ -19,8 +19,8 @@ import pgl
 import time
 from pgl.utils import mp_reader
 from pgl.utils.logger import log
-import train
 import time
+import copy
 def node_batch_iter(nodes, node_label, batch_size):
@@ -46,12 +46,11 @@ def traverse(item):
        yield item
-def flat_node_and_edge(nodes, eids):
+def flat_node_and_edge(nodes):
    """flat_node_and_edge
    """
    nodes = list(set(traverse(nodes)))
-    eids = list(set(traverse(eids)))
+    return nodes
-    return nodes, eids
 def worker(batch_info, graph, graph_wrapper, samples):
@@ -61,31 +60,42 @@ def worker(batch_info, graph, graph_wrapper, samples):
    def work():
        """work
        """
-        first = True
+        _graph_wrapper = copy.copy(graph_wrapper)
+        _graph_wrapper.node_feat_tensor_dict = {}
        for batch_train_samples, batch_train_labels in batch_info:
            start_nodes = batch_train_samples
            nodes = start_nodes
-            eids = []
+            edges = []
            for max_deg in samples:
-                pred, pred_eid = graph.sample_predecessor(
+                pred_nodes = graph.sample_predecessor(
-                    start_nodes, max_degree=max_deg, return_eids=True)
+                    start_nodes, max_degree=max_deg)
+                for dst_node, src_nodes in zip(start_nodes, pred_nodes):
+                    for src_node in src_nodes:
+                        edges.append((src_node, dst_node))
                last_nodes = nodes
-                nodes = [nodes, pred]
+                nodes = [nodes, pred_nodes]
-                eids = [eids, pred_eid]
+                nodes = flat_node_and_edge(nodes)
-                nodes, eids = flat_node_and_edge(nodes, eids)
                # Find new nodes
                start_nodes = list(set(nodes) - set(last_nodes))
                if len(start_nodes) == 0:
                    break
-            subgraph = graph.subgraph(nodes=nodes, eid=eids)
+            subgraph = graph.subgraph(
+                nodes=nodes,
+                edges=edges,
+                with_node_feat=False,
+                with_edge_feat=False)
            sub_node_index = subgraph.reindex_from_parrent_nodes(
                batch_train_samples)
-            feed_dict = graph_wrapper.to_feed(subgraph)
+            feed_dict = _graph_wrapper.to_feed(subgraph)
            feed_dict["node_label"] = np.expand_dims(
                np.array(
                    batch_train_labels, dtype="int64"), -1)
            feed_dict["node_index"] = sub_node_index
+            feed_dict["parent_node_index"] = np.array(nodes, dtype="int64")
            yield feed_dict
    return work
@@ -97,23 +107,25 @@ def multiprocess_graph_reader(graph,
                              node_index,
                              batch_size,
                              node_label,
+                              with_parent_node_index=False,
                              num_workers=4):
    """multiprocess_graph_reader
    """
-    def parse_to_subgraph(rd):
+    def parse_to_subgraph(rd, prefix, node_feat, _with_parent_node_index):
        """parse_to_subgraph
        """
        def work():
            """work
            """
-            last = time.time()
            for data in rd():
-                this = time.time()
                feed_dict = data
-                now = time.time()
+                for key in node_feat:
-                last = now
+                    feed_dict[prefix + '/node_feat/' + key] = node_feat[key][
+                        feed_dict["parent_node_index"]]
+                if not _with_parent_node_index:
+                    del feed_dict["parent_node_index"]
                yield feed_dict
        return work
@@ -129,46 +141,17 @@ def multiprocess_graph_reader(graph,
            reader_pool.append(
                worker(batch_info[block_size * i:block_size * (i + 1)], graph,
                       graph_wrapper, samples))
+        if len(reader_pool) == 1:
+            r = parse_to_subgraph(reader_pool[0],
+                                  repr(graph_wrapper), graph.node_feat,
+                                  with_parent_node_index)
+        else:
            multi_process_sample = mp_reader.multiprocess_reader(
                reader_pool, use_pipe=True, queue_size=1000)
-        r = parse_to_subgraph(multi_process_sample)
+            r = parse_to_subgraph(multi_process_sample,
-        return paddle.reader.buffered(r, 1000)
+                                  repr(graph_wrapper), graph.node_feat,
+                                  with_parent_node_index)
+        return paddle.reader.buffered(r, num_workers)
    return reader()
-def graph_reader(graph, graph_wrapper, samples, node_index, batch_size,
-                 node_label):
-    """graph_reader"""
-    def reader():
-        """reader"""
-        for batch_train_samples, batch_train_labels in node_batch_iter(
-                node_index, node_label, batch_size=batch_size):
-            start_nodes = batch_train_samples
-            nodes = start_nodes
-            eids = []
-            for max_deg in samples:
-                pred, pred_eid = graph.sample_predecessor(
-                    start_nodes, max_degree=max_deg, return_eids=True)
-                last_nodes = nodes
-                nodes = [nodes, pred]
-                eids = [eids, pred_eid]
-                nodes, eids = flat_node_and_edge(nodes, eids)
-                # Find new nodes
-                start_nodes = list(set(nodes) - set(last_nodes))
-                if len(start_nodes) == 0:
-                    break
-            subgraph = graph.subgraph(nodes=nodes, eid=eids)
-            feed_dict = graph_wrapper.to_feed(subgraph)
-            sub_node_index = subgraph.reindex_from_parrent_nodes(
-                batch_train_samples)
-            feed_dict["node_label"] = np.expand_dims(
-                np.array(
-                    batch_train_labels, dtype="int64"), -1)
-            feed_dict["node_index"] = np.array(sub_node_index, dtype="int32")
-            yield feed_dict
-    return paddle.reader.buffered(reader, 1000)
--- a/examples/graphsage/train.py
+++ b/examples/graphsage/train.py
@@ -63,10 +63,7 @@ def load_data(normalize=True, symmetry=True):
    log.info("Feature shape %s" % (repr(feature.shape)))
    graph = pgl.graph.Graph(
-        num_nodes=feature.shape[0],
+        num_nodes=feature.shape[0], edges=list(zip(src, dst)))
-        edges=list(zip(src, dst)),
-        node_feat={"index": np.arange(
-            0, len(feature), dtype="int64")})
    return {
        "graph": graph,
@@ -89,7 +86,13 @@ def build_graph_model(graph_wrapper, num_class, k_hop, graphsage_type,
    node_label = fluid.layers.data(
        "node_label", shape=[None, 1], dtype="int64", append_batch_size=False)
-    feature = fluid.layers.gather(feature, graph_wrapper.node_feat['index'])
+    parent_node_index = fluid.layers.data(
+        "parent_node_index",
+        shape=[None],
+        dtype="int64",
+        append_batch_size=False)
+    feature = fluid.layers.gather(feature, parent_node_index)
    feature.stop_gradient = True
    for i in range(k_hop):
@@ -221,57 +224,33 @@ def main(args):
    exe.run(startup_program)
    feature_init(place)
-    if args.sample_workers > 1:
    train_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
-            node_index=data['train_index'],
+        with_parent_node_index=True,
-            node_label=data["train_label"])
-    else:
-        train_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
        node_index=data['train_index'],
        node_label=data["train_label"])
-    if args.sample_workers > 1:
    val_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
-            node_index=data['val_index'],
+        with_parent_node_index=True,
-            node_label=data["val_label"])
-    else:
-        val_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
        node_index=data['val_index'],
        node_label=data["val_label"])
-    if args.sample_workers > 1:
    test_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
-            node_index=data['test_index'],
+        with_parent_node_index=True,
-            node_label=data["test_label"])
-    else:
-        test_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
        node_index=data['test_index'],
        node_label=data["test_label"])

--- a/examples/graphsage/train_multi.py
+++ b/examples/graphsage/train_multi.py
@@ -195,7 +195,7 @@ def run_epoch(batch_iter,
        if num_trainer > 1:
            num_samples = sum(
-                [len(batch["node_index"]) for batch in batch_feed_dict])
+                [len(_batch["node_index"]) for _batch in batch_feed_dict])
        else:
            num_samples = len(batch_feed_dict["node_index"])
        total_loss += batch_loss * num_samples
@@ -262,7 +262,6 @@ def main(args):
    else:
        train_exe = exe
-    if args.sample_workers > 1:
    train_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
@@ -271,16 +270,7 @@ def main(args):
        batch_size=args.batch_size,
        node_index=data['train_index'],
        node_label=data["train_label"])
-    else:
-        train_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
-            node_index=data['train_index'],
-            node_label=data["train_label"])
-    if args.sample_workers > 1:
    val_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
@@ -289,16 +279,7 @@ def main(args):
        batch_size=args.batch_size,
        node_index=data['val_index'],
        node_label=data["val_label"])
-    else:
-        val_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
-            node_index=data['val_index'],
-            node_label=data["val_label"])
-    if args.sample_workers > 1:
    test_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
@@ -307,14 +288,6 @@ def main(args):
        batch_size=args.batch_size,
        node_index=data['test_index'],
        node_label=data["test_label"])
-    else:
-        test_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
-            node_index=data['test_index'],
-            node_label=data["test_label"])
    for epoch in range(args.epoch):
        run_epoch(

--- a/examples/graphsage/train_scale.py
+++ b/examples/graphsage/train_scale.py
@@ -97,11 +97,7 @@ def load_data(normalize=True, symmetry=True, scale=1):
    graph = pgl.graph.Graph(
        num_nodes=feature.shape[0],
        edges=edges,
-        node_feat={
+        node_feat={"feature": feature})
-            "index": np.arange(
-                0, len(feature), dtype="int64"),
-            "feature": feature
-        })
    return {
        "graph": graph,
@@ -244,7 +240,6 @@ def main(args):
    test_program = train_program.clone(for_test=True)
-    if args.sample_workers > 1:
    train_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
@@ -253,16 +248,7 @@ def main(args):
        batch_size=args.batch_size,
        node_index=data['train_index'],
        node_label=data["train_label"])
-    else:
-        train_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
-            node_index=data['train_index'],
-            node_label=data["train_label"])
-    if args.sample_workers > 1:
    val_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
@@ -271,16 +257,7 @@ def main(args):
        batch_size=args.batch_size,
        node_index=data['val_index'],
        node_label=data["val_label"])
-    else:
-        val_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
-            node_index=data['val_index'],
-            node_label=data["val_label"])
-    if args.sample_workers > 1:
    test_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
@@ -289,14 +266,6 @@ def main(args):
        batch_size=args.batch_size,
        node_index=data['test_index'],
        node_label=data["test_label"])
-    else:
-        test_iter = reader.graph_reader(
-            data['graph'],
-            graph_wrapper,
-            samples=samples,
-            batch_size=args.batch_size,
-            node_index=data['test_index'],
-            node_label=data["test_label"])
    with fluid.program_guard(train_program, startup_program):
        adam = fluid.optimizer.Adam(learning_rate=args.lr)

--- a/examples/metapath2vec/Dataset.py
+++ b/examples/metapath2vec/Dataset.py
@@ -23,7 +23,7 @@ import tqdm
 import time
 import logging
 import random
-from pgl.contrib import heter_graph
+from pgl import heter_graph
 import pickle as pkl
@@ -40,8 +40,10 @@ class Dataset(object):
    def __init__(self, config):
        self.config = config
-        self.walk_files = config['input_path'] + config['walk_path']
+        self.walk_files = os.path.join(config['input_path'],
-        self.word2id_file = config['input_path'] + config['word2id_file']
+                                       config['walk_path'])
+        self.word2id_file = os.path.join(config['input_path'],
+                                         config['word2id_file'])
        self.word2freq = {}
        self.word2id = {}
@@ -65,10 +67,14 @@ class Dataset(object):
        for walk_file in glob.glob(self.walk_files):
            with open(walk_file, 'r') as reader:
                for walk in reader:
-                    walk = walk.strip().split(' ')
+                    walk = walk.strip().split()
                    if len(walk) > 1:
                        self.sentences_count += 1
                        for word in walk:
+                            if int(word) >= self.config[
+                                    'paper_start_index']:  # remove paper
+                                continue
+                            else:
                                self.token_count += 1
                                word_freq[word] = word_freq.get(word, 0) + 1
@@ -123,7 +129,11 @@ class Dataset(object):
        for filename in walkpath_files:
            with open(filename) as reader:
                for line in reader:
-                    words = line.strip().split(' ')
+                    words = line.strip().split()
+                    words = [
+                        w for w in words
+                        if int(w) < self.config['paper_start_index']
+                    ]
                    if len(words) > 1:
                        word_ids = [
                            self.word2id[w] for w in words if w in self.word2id

--- a/examples/metapath2vec/config.yaml
+++ b/examples/metapath2vec/config.yaml
@@ -13,9 +13,12 @@ sampler:
        new_author_label_file: author_label.txt
        new_venue_label_file: venue_label.txt
        walk_saved_path: walks/
+        walk_batch_size: 1000
        num_walks: 1000
        walk_length: 100
-        metapath: conf-paper-author-paper-conf
+        num_sample_workers: 16
+        first_node_type: conf
+        metapath: c2p-p2a-a2p-p2c  #conf-paper-author-paper-conf
 optimizer:
    type: Adam
@@ -39,9 +42,10 @@ data_loader:
        walk_path: walks/*
        word2id_file: word2id.pkl
        batch_size: 32
-        win_size: 7  # default: 7
+        win_size: 5  # default: 7
        neg_num: 5
        min_count: 10
+        paper_start_index: 1697414
 model:
    type: SkipgramModel

--- a/examples/metapath2vec/model.py
+++ b/examples/metapath2vec/model.py
@@ -101,7 +101,7 @@ class SkipgramModel(object):
        pos_score = fl.squeeze(pos_logits, axes=[1])
        pos_score = fl.clip(pos_score, min=-10, max=10)
-        pos_score = -1.0 * fl.logsigmoid(pos_score)
+        pos_score = -self.neg_num * fl.logsigmoid(pos_score)
        neg_logits = fl.matmul(
            embed_src, weight_negs,
@@ -111,4 +111,4 @@ class SkipgramModel(object):
        neg_score = -1.0 * fl.logsigmoid(-1.0 * neg_score)
        neg_score = fl.reduce_sum(neg_score, dim=1, keep_dim=True)
-        self.loss = fl.reduce_mean(pos_score + neg_score)
+        self.loss = fl.reduce_mean(pos_score + neg_score) / self.neg_num / 2
--- a/examples/metapath2vec/sample.py
+++ b/examples/metapath2vec/sample.py
@@ -18,6 +18,7 @@ training metapath2vec model.
 import multiprocessing
 from multiprocessing import Pool
+from multiprocessing import Process
 import argparse
 import sys
 import os
@@ -27,7 +28,7 @@ import tqdm
 import time
 import logging
 import random
-from pgl.contrib import heter_graph
+from pgl import heter_graph
 from pgl.sample import metapath_randomwalk
 from utils import *
@@ -77,9 +78,14 @@ class Sampler(object):
            self.config['data_path'] + 'paper_conf.txt', self.paper_id2index,
            self.conf_id2index)
-        edges_by_types['edge'] = paper_author_edges + paper_conf_edges
+        #  edges_by_types['edge'] = paper_author_edges + paper_conf_edges
-        logging.info('%d edges have been loaded.' %
+        edges_by_types['p2c'] = paper_conf_edges
-                     (len(edges_by_types['edge'])))
+        edges_by_types['c2p'] = [(dst, src) for src, dst in paper_conf_edges]
+        edges_by_types['p2a'] = paper_author_edges
+        edges_by_types['a2p'] = [(dst, src) for src, dst in paper_author_edges]
+        #  logging.info('%d edges have been loaded.' %
+        #               (len(edges_by_types['edge'])))
        node_features = {
            'index': np.array([i for i in range(num_nodes)]).reshape(
@@ -110,7 +116,7 @@ class Sampler(object):
        return id2index, name2index, node_types
-    def load_edges(self, file_, src2index, dst2index, symmetry=True):
+    def load_edges(self, file_, src2index, dst2index, symmetry=False):
        """Load edges from file.
        """
        edges = []
@@ -143,41 +149,65 @@ class Sampler(object):
        return index_label_list
-def generate_walks(args):
+def walk_generator(graph, batch_size, metapath, n_type, walk_length):
-    """Generate metapath random walk and save to file.
+    """Generate metapath random walk.
    """
-    g, meta_path, filename, walk_length = args
+    np.random.seed(os.getpid())
-    walks = []
+    while True:
-    node_types = g._node_types
+        for start_nodes in graph.node_batch_iter(
-    first_type = meta_path.split('-')[0]
+                batch_size=batch_size, n_type=n_type):
-    nodes = np.where(node_types == first_type)[0]
+            walks = metapath_randomwalk(
-    if len(nodes) > 4000:
+                graph=graph,
-        nodes = np.random.choice(nodes, 4000, replace=False)
+                start_nodes=start_nodes,
+                metapath=metapath,
-    logging.info('%d number of start nodes' % (len(nodes)))
+                walk_length=walk_length)
-    logging.info('save walks in file: %s' % (filename))
+            yield walks
+def walk_to_files(g, batch_size, metapath, n_type, walk_length, max_num,
+                  filename):
+    """Generate metapath randomwalk and save in files"""
+    #  g, batch_size, metapath, n_type, walk_length, max_num, filename = args
    with open(filename, 'w') as writer:
-        for start_node in nodes:
+        cc = 0
-            walk = metapath_randomwalk(g, start_node, meta_path, walk_length)
+        for walks in walk_generator(g, batch_size, metapath, n_type,
-            walk = [str(walk[i]) for i in range(0, len(walk), 2)]  # skip paper
+                                    walk_length):
-            writer.write(' '.join(walk) + '\n')
+            for walk in walks:
+                writer.write("%s\n" % "\t".join([str(i) for i in walk]))
+                cc += 1
+                if cc == max_num:
+                    return
+        return
+def multiprocess_generate_walks_to_files(graph, n_type, meta_path, num_walks,
+                                         walk_length, batch_size,
+                                         num_sample_workers, saved_path):
+    """Use multiprocess to generate metapath random walk to files.
+    """
+    num_nodes_by_type = graph.num_nodes_by_type(n_type)
+    logging.info("num_nodes_by_type: %s" % num_nodes_by_type)
+    max_num = (num_walks * num_nodes_by_type // num_sample_workers) + 1
+    logging.info("max sample number of every worker: %s" % max_num)
-def multiprocess_generate_walks(sampler, edge_type, meta_path, num_walks,
-                                walk_length, saved_path):
-    """Use multiprocess to generate metapath random walk.
-    """
    args = []
-    for i in range(num_walks):
+    for i in range(num_sample_workers):
-        filename = saved_path + '%04d' % (i)
+        filename = os.path.join(saved_path, 'part-%05d' % (i))
-        args.append(
+        args.append((graph, batch_size, meta_path, n_type, walk_length,
-            (sampler.graph[edge_type], meta_path, filename, walk_length))
+                     max_num, filename))
-    pool = Pool(16)
+    ps = []
-    pool.map(generate_walks, args)
+    for i in range(num_sample_workers):
-    pool.close()
+        p = Process(target=walk_to_files, args=args[i])
-    pool.join()
+        p.start()
+        ps.append(p)
+    for i in range(num_sample_workers):
+        ps[i].join()
+    #  pool = Pool(num_sample_workers)
+    #  pool.map(walk_to_files, args)
+    #  pool.close()
+    #  pool.join()
 if __name__ == "__main__":
@@ -220,13 +250,15 @@ if __name__ == "__main__":
    begin = time.time()
    logging.info('multi process sampling')
-    multiprocess_generate_walks(
+    multiprocess_generate_walks_to_files(
-        sampler=sampler,
+        graph=sampler.graph,
-        edge_type='edge',
+        n_type=config['first_node_type'],
        meta_path=config['metapath'],
        num_walks=config['num_walks'],
        walk_length=config['walk_length'],
-        saved_path=config['walk_saved_path'])
+        batch_size=config['walk_batch_size'],
+        num_sample_workers=config['num_sample_workers'],
+        saved_path=config['walk_saved_path'], )
    logging.info('total time: %.4f' % (time.time() - begin))
    logging.info('generating multi class data')

--- a/examples/stgcn/README.md
+++ b/examples/stgcn/README.md
+# STGCN: Spatio-Temporal Graph Convolutional Network
+[Spatio-Temporal Graph Convolutional Network \(STGCN\)](https://arxiv.org/pdf/1709.04875.pdf) is a novel deep learning framework to tackle time series prediction problem. Based on PGL, we reproduce STGCN algorithms to predict new confirmed patients in some cities with the historical immigration records.
+### Datasets
+You can make your customized dataset by the following format:
+* input.csv: Historical immigration records with shape of [num\_time\_steps * num\_cities].
+* output.csv: New confirmed patients records with shape of [num\_time\_steps * num\_cities].
+* W.csv: Weighted Adjacency Matrix with shape of [num\_cities * num\_cities].
+* city.csv: Each line is a number and the corresponding city name.
+### Dependencies
+- paddlepaddle 1.6
+- pgl 1.0.0
+### How to run
+For examples, use gpu to train STGCN on your dataset.
+```
+python main.py --use_cuda --input_file dataset/input_csv --label_file dataset/output.csv --adj_mat_file dataset/W.csv --city_file dataset/city.csv 
+```
+#### Hyperparameters
+- n\_route: Number of city.
+- n\_his: "n\_his" time steps of previous observations of historical immigration records.
+- n\_pred: Next "n\_pred" time steps of New confirmed patients records.
+- Ks: Number of GCN layers.
+- Kt: Kernel size of temporal convolution.
+- use\_cuda: Use gpu if assign use\_cuda. 
--- a/examples/stgcn/data_loader/__init__.py
+++ b/examples/stgcn/data_loader/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""__init__"""
--- a/examples/stgcn/data_loader/data_utils.py
+++ b/examples/stgcn/data_loader/data_utils.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""data processing
+"""
+import numpy as np
+import pandas as pd
+from utils.math_utils import z_score
+class Dataset(object):
+    """Dataset
+    """
+    def __init__(self, data, stats):
+        self.__data = data
+        self.mean = stats['mean']
+        self.std = stats['std']
+    def get_data(self, type):  # type: train, val or test
+        return self.__data[type]
+    def get_stats(self):
+        return {'mean': self.mean, 'std': self.std}
+    def get_len(self, type):
+        return len(self.__data[type])
+    def z_inverse(self, type):
+        return self.__data[type] * self.std + self.mean
+def seq_gen(len_seq, data_seq, offset, n_frame, n_route, day_slot, C_0=1):
+    """Generate data in the form of standard sequence unit."""
+    n_slot = day_slot - n_frame + 1
+    tmp_seq = np.zeros((len_seq * n_slot, n_frame, n_route, C_0))
+    for i in range(len_seq):
+        for j in range(n_slot):
+            sta = (i + offset) * day_slot + j
+            end = sta + n_frame
+            tmp_seq[i * n_slot + j, :, :, :] = np.reshape(
+                data_seq[sta:end, :], [n_frame, n_route, C_0])
+    return tmp_seq
+def adj_matrx_gen_custom(input_file, city_file):
+    """genenrate Adjacency Matrix from file 
+    """
+    print("generate adj_matrix data (take long time)...")
+    # data
+    df = pd.read_csv(
+        input_file,
+        sep='\t',
+        names=['date', '迁出省份', '迁出城市', '迁入省份', '迁入城市', '人数'])
+    # 只需要2020年的数据
+    df['date'] = pd.to_datetime(df['date'], format="%Y%m%d")
+    df = df.set_index('date')
+    df = df['2020']
+    city_df = pd.read_csv(city_file)
+    # 剔除武汉
+    city_df = city_df.drop(0)
+    num = len(city_df)
+    matrix = np.zeros([num, num])
+    for i in city_df['city']:
+        for j in city_df['city']:
+            if (i == j):
+                continue
+            # 选出从i到j的每日人数
+            cut = df[df['迁出城市'].str.contains(i)]
+            cut = cut[cut['迁入城市'].str.contains(j)]
+            # 求均值作为权重
+            average = cut['人数'].mean()
+            # 赋值给matrix
+            i_index = int(city_df[city_df['city'] == i]['num']) - 1
+            j_index = int(city_df[city_df['city'] == j]['num']) - 1
+            matrix[i_index, j_index] = average
+    np.savetxt("dataset/W_74.csv", matrix, delimiter=",")
+def data_gen_custom(input_file, output_file, city_file, n, n_his, n_pred,
+                    n_config):
+    """data_gen_custom"""
+    print("generate training data...")
+    # data
+    df = pd.read_csv(
+        input_file,
+        sep='\t',
+        names=['date', '迁出省份', '迁出城市', '迁入省份', '迁入城市', '人数'])
+    # 只需要2020年的数据
+    df['date'] = pd.to_datetime(df['date'], format="%Y%m%d")
+    df = df.set_index('date')
+    df = df['2020']
+    city_df = pd.read_csv(city_file)
+    input_df = pd.DataFrame()
+    out_df_wuhan = df[df['迁出城市'].str.contains('武汉')]
+    for i in city_df['city']:
+        # 筛选迁入城市
+        in_df_i = out_df_wuhan[out_df_wuhan['迁入城市'].str.contains(i)]
+        # 确保按时间升序
+        # in_df_i.sort_values("date",inplace=True)
+        # 按时间插入
+        in_df_i.reset_index(drop=True, inplace=True)
+        input_df[i] = in_df_i['人数']
+    # 替换Nan值
+    input_df = input_df.replace(np.nan, 0)
+    x = input_df
+    y = pd.read_csv(output_file)
+    # 删除第1列
+    x.drop(
+        x.columns[x.columns.str.contains(
+            'unnamed', case=False)],
+        axis=1,
+        inplace=True)
+    y = y.drop(columns=['date'])
+    # 剔除迁入武汉的数据
+    x = x.drop(columns=['武汉'])
+    y = y.drop(columns=['武汉'])
+    # param
+    n_val, n_test = n_config
+    n_train = len(y) - n_val - n_test - 2
+    # (?,26,74,1)
+    df = pd.DataFrame(columns=x.columns)
+    for i in range(len(y) - n_pred + 1):
+        df = df.append(x[i:i + n_his])
+        df = df.append(y[i:i + n_pred])
+    data = df.values.reshape(-1, n_his + n_pred, n,
+                             1)  # n == num_nodes == city num
+    x_stats = {'mean': np.mean(data), 'std': np.std(data)}
+    x_train = data[:n_train]
+    x_val = data[n_train:n_train + n_val]
+    x_test = data[n_train + n_val:]
+    x_data = {'train': x_train, 'val': x_val, 'test': x_test}
+    dataset = Dataset(x_data, x_stats)
+    print("generate successfully!")
+    return dataset
+def data_gen_mydata(input_file, label_file, n, n_his, n_pred, n_config):
+    """data processing
+    """
+    # data
+    x = pd.read_csv(input_file)
+    y = pd.read_csv(label_file)
+    x = x.drop(columns=['date'])
+    y = y.drop(columns=['date'])
+    x = x.drop(columns=['武汉'])
+    y = y.drop(columns=['武汉'])
+    # param
+    n_val, n_test = n_config
+    n_train = len(y) - n_val - n_test - 2
+    # (?,26,74,1)
+    df = pd.DataFrame(columns=x.columns)
+    for i in range(len(y) - n_pred + 1):
+        df = df.append(x[i:i + n_his])
+        df = df.append(y[i:i + n_pred])
+    data = df.values.reshape(-1, n_his + n_pred, n, 1)
+    x_stats = {'mean': np.mean(data), 'std': np.std(data)}
+    x_train = data[:n_train]
+    x_val = data[n_train:n_train + n_val]
+    x_test = data[n_train + n_val:]
+    x_data = {'train': x_train, 'val': x_val, 'test': x_test}
+    dataset = Dataset(x_data, x_stats)
+    return dataset
+def data_gen(file_path, data_config, n_route, n_frame=21, day_slot=288):
+    """Source file load and dataset generation."""
+    n_train, n_val, n_test = data_config
+    # generate training, validation and test data
+    try:
+        data_seq = pd.read_csv(file_path, header=None).values
+    except FileNotFoundError:
+        print(f'ERROR: input file was not found in {file_path}.')
+    seq_train = seq_gen(n_train, data_seq, 0, n_frame, n_route, day_slot)
+    seq_val = seq_gen(n_val, data_seq, n_train, n_frame, n_route, day_slot)
+    seq_test = seq_gen(n_test, data_seq, n_train + n_val, n_frame, n_route,
+                       day_slot)
+    # x_stats: dict, the stats for the train dataset, including the value of mean and standard deviation.
+    x_stats = {'mean': np.mean(seq_train), 'std': np.std(seq_train)}
+    # x_train, x_val, x_test: np.array, [sample_size, n_frame, n_route, channel_size].
+    x_train = z_score(seq_train, x_stats['mean'], x_stats['std'])
+    x_val = z_score(seq_val, x_stats['mean'], x_stats['std'])
+    x_test = z_score(seq_test, x_stats['mean'], x_stats['std'])
+    x_data = {'train': x_train, 'val': x_val, 'test': x_test}
+    dataset = Dataset(x_data, x_stats)
+    return dataset
+def gen_batch(inputs, batch_size, dynamic_batch=False, shuffle=False):
+    """Data iterator in batch.
+    Args:
+        inputs: np.ndarray, [len_seq, n_frame, n_route, C_0], standard sequence units.
+        batch_size: int, size of batch.
+        dynamic_batch: bool, whether changes the batch size in the last batch 
+            if its length is less than the default.
+        shuffle: bool, whether shuffle the batches.
+    """
+    len_inputs = len(inputs)
+    if shuffle:
+        idx = np.arange(len_inputs)
+        np.random.shuffle(idx)
+    for start_idx in range(0, len_inputs, batch_size):
+        end_idx = start_idx + batch_size
+        if end_idx > len_inputs:
+            if dynamic_batch:
+                end_idx = len_inputs
+            else:
+                break
+        if shuffle:
+            slide = idx[start_idx:end_idx]
+        else:
+            slide = slice(start_idx, end_idx)
+        yield inputs[slide]
--- a/examples/stgcn/data_loader/graph.py
+++ b/examples/stgcn/data_loader/graph.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PGL Graph
+"""
+import sys
+import os
+import numpy as np
+import pandas as pd
+from pgl.graph import Graph
+def weight_matrix(file_path, sigma2=0.1, epsilon=0.5, scaling=True):
+    """Load weight matrix function."""
+    try:
+        W = pd.read_csv(file_path, header=None).values
+    except FileNotFoundError:
+        print(f'ERROR: input file was not found in {file_path}.')
+    # check whether W is a 0/1 matrix.
+    if set(np.unique(W)) == {0, 1}:
+        print('The input graph is a 0/1 matrix; set "scaling" to False.')
+        scaling = False
+    if scaling:
+        n = W.shape[0]
+        W = W / 10000.
+        W2, W_mask = W * W, np.ones([n, n]) - np.identity(n)
+        # refer to Eq.10
+        return np.exp(-W2 / sigma2) * (
+            np.exp(-W2 / sigma2) >= epsilon) * W_mask
+    else:
+        return W
+class GraphFactory(object):
+    """GraphFactory"""
+    def __init__(self, args):
+        self.args = args
+        self.adj_matrix = weight_matrix(self.args.adj_mat_file)
+        L = np.eye(self.adj_matrix.shape[0]) + self.adj_matrix
+        D = np.sum(self.adj_matrix, axis=1)
+        #  L = D - self.adj_matrix
+        #  import ipdb; ipdb.set_trace()
+        edges = []
+        weights = []
+        for i in range(self.adj_matrix.shape[0]):
+            for j in range(self.adj_matrix.shape[1]):
+                edges.append([i, j])
+                weights.append(L[i][j])
+        self.edges = np.array(edges, dtype=np.int64)
+        self.weights = np.array(weights, dtype=np.float32).reshape(-1, 1)
+        self.norm = np.zeros_like(D, dtype=np.float32)
+        self.norm[D > 0] = np.power(D[D > 0], -0.5)
+        self.norm = self.norm.reshape(-1, 1)
+    def build_graph(self, x_batch):
+        """build graph"""
+        B, T, n, _ = x_batch.shape
+        batch = B * T
+        batch_edges = []
+        for i in range(batch):
+            batch_edges.append(self.edges + (i * n))
+        batch_edges = np.vstack(batch_edges)
+        num_nodes = B * T * n
+        node_feat = {'norm': np.tile(self.norm, [batch, 1])}
+        edge_feat = {'weights': np.tile(self.weights, [batch, 1])}
+        graph = Graph(
+            num_nodes=num_nodes,
+            edges=batch_edges,
+            node_feat=node_feat,
+            edge_feat=edge_feat)
+        return graph
--- a/examples/stgcn/main.py
+++ b/examples/stgcn/main.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This file implement the training process of STGCN model.
+"""
+import os
+import sys
+import time
+import argparse
+import numpy as np
+import paddle.fluid as fluid
+import paddle.fluid.layers as fl
+import pgl
+from pgl.utils.logger import log
+from data_loader.data_utils import data_gen_mydata, gen_batch
+from data_loader.graph import GraphFactory
+from models.model import STGCNModel
+from models.tester import model_inference, model_test
+def main(args):
+    """main"""
+    PeMS = data_gen_mydata(args.input_file, args.label_file, args.n_route,
+                           args.n_his, args.n_pred, (args.n_val, args.n_test))
+    log.info(PeMS.get_stats())
+    log.info(PeMS.get_len('train'))
+    gf = GraphFactory(args)
+    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
+    train_program = fluid.Program()
+    startup_program = fluid.Program()
+    with fluid.program_guard(train_program, startup_program):
+        gw = pgl.graph_wrapper.GraphWrapper(
+            "gw",
+            place,
+            node_feat=[('norm', [None, 1], "float32")],
+            edge_feat=[('weights', [None, 1], "float32")])
+        model = STGCNModel(args, gw)
+        train_loss, y_pred = model.forward()
+    infer_program = train_program.clone(for_test=True)
+    with fluid.program_guard(train_program, startup_program):
+        epoch_step = int(PeMS.get_len('train') / args.batch_size) + 1
+        lr = fl.exponential_decay(
+            learning_rate=args.lr,
+            decay_steps=5 * epoch_step,
+            decay_rate=0.7,
+            staircase=True)
+        if args.opt == 'RMSProp':
+            train_op = fluid.optimizer.RMSPropOptimizer(lr).minimize(
+                train_loss)
+        elif args.opt == 'ADAM':
+            train_op = fluid.optimizer.Adam(lr).minimize(train_loss)
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+    if args.inf_mode == 'sep':
+        # for inference mode 'sep', the type of step index is int.
+        step_idx = args.n_pred - 1
+        tmp_idx = [step_idx]
+        min_val = min_va_val = np.array([4e1, 1e5, 1e5])
+    elif args.inf_mode == 'merge':
+        # for inference mode 'merge', the type of step index is np.ndarray.
+        step_idx = tmp_idx = np.arange(3, args.n_pred + 1, 3) - 1
+        min_val = min_va_val = np.array([4e1, 1e5, 1e5]) * len(step_idx)
+    else:
+        raise ValueError(f'ERROR: test mode "{args.inf_mode}" is not defined.')
+    step = 0
+    for epoch in range(1, args.epochs + 1):
+        for idx, x_batch in enumerate(
+                gen_batch(
+                    PeMS.get_data('train'),
+                    args.batch_size,
+                    dynamic_batch=True,
+                    shuffle=True)):
+            x = np.array(x_batch[:, 0:args.n_his, :, :], dtype=np.float32)
+            graph = gf.build_graph(x)
+            feed = gw.to_feed(graph)
+            feed['input'] = np.array(
+                x_batch[:, 0:args.n_his + 1, :, :], dtype=np.float32)
+            b_loss, b_lr = exe.run(train_program,
+                                   feed=feed,
+                                   fetch_list=[train_loss, lr])
+            if idx % 5 == 0:
+                log.info("epoch %d | step %d | lr %.6f | loss %.6f" %
+                         (epoch, idx, b_lr[0], b_loss[0]))
+        min_va_val, min_val = \
+                model_inference(exe, gw, gf, infer_program, y_pred, PeMS, args, \
+                                step_idx, min_va_val, min_val)
+        for ix in tmp_idx:
+            va, te = min_va_val[ix - 2:ix + 1], min_val[ix - 2:ix + 1]
+            print(f'Time Step {ix + 1}: '
+                  f'MAPE {va[0]:7.3%}, {te[0]:7.3%}; '
+                  f'MAE  {va[1]:4.3f}, {te[1]:4.3f}; '
+                  f'RMSE {va[2]:6.3f}, {te[2]:6.3f}.')
+        if epoch % 5 == 0:
+            model_test(exe, gw, gf, infer_program, y_pred, PeMS, args)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--n_route', type=int, default=74)
+    parser.add_argument('--n_his', type=int, default=23)
+    parser.add_argument('--n_pred', type=int, default=3)
+    parser.add_argument('--batch_size', type=int, default=10)
+    parser.add_argument('--epochs', type=int, default=100)
+    parser.add_argument('--save', type=int, default=10)
+    parser.add_argument('--Ks', type=int, default=3)  #equal to num_layers
+    parser.add_argument('--Kt', type=int, default=3)
+    parser.add_argument('--lr', type=float, default=1e-2)
+    parser.add_argument('--keep_prob', type=float, default=1.0)
+    parser.add_argument('--opt', type=str, default='RMSProp')
+    parser.add_argument('--inf_mode', type=str, default='sep')
+    parser.add_argument('--input_file', type=str, default='dataset/input.csv')
+    parser.add_argument('--label_file', type=str, default='dataset/output.csv')
+    parser.add_argument(
+        '--city_file', type=str, default='dataset/crawl_list.csv')
+    parser.add_argument('--adj_mat_file', type=str, default='dataset/W_74.csv')
+    parser.add_argument('--output_path', type=str, default='./outputs/')
+    parser.add_argument('--n_val', type=str, default=1)
+    parser.add_argument('--n_test', type=str, default=1)
+    parser.add_argument('--use_cuda', action='store_true')
+    args = parser.parse_args()
+    blocks = [[1, 32, 64], [64, 32, 128]]
+    args.blocks = blocks
+    log.info(args)
+    if not os.path.exists(args.output_path):
+        os.makedirs(args.output_path)
+    main(args)
--- a/examples/stgcn/models/model.py
+++ b/examples/stgcn/models/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file implement the STGCN model.
+"""
+import numpy as np
+import paddle.fluid as fluid
+import paddle.fluid.layers as fl
+import pgl
+class STGCNModel(object):
+    """Implementation of Spatio-Temporal Graph Convolutional Networks"""
+    def __init__(self, args, gw):
+        self.args = args
+        self.gw = gw
+        self.input = fl.data(
+            name="input",
+            shape=[None, args.n_his + 1, args.n_route, 1],
+            dtype="float32")
+    def forward(self):
+        """forward"""
+        x = self.input[:, 0:self.args.n_his, :, :]
+        # Ko>0: kernel size of temporal convolution in the output layer.
+        Ko = self.args.n_his
+        # ST-Block
+        for i, channels in enumerate(self.args.blocks):
+            x = self.st_conv_block(
+                x,
+                self.args.Ks,
+                self.args.Kt,
+                channels,
+                "st_conv_%d" % i,
+                self.args.keep_prob,
+                act_func='GLU')
+        # output layer
+        if Ko > 1:
+            y = self.output_layer(x, Ko, 'output_layer')
+        else:
+            raise ValueError(f'ERROR: kernel size Ko must be greater than 1, \
+                    but received "{Ko}".')
+        label = self.input[:, self.args.n_his:self.args.n_his + 1, :, :]
+        train_loss = fl.reduce_sum((y - label) * (y - label))
+        single_pred = y[:, 0, :, :]  # shape: [batch, n, 1]
+        return train_loss, single_pred
+    def st_conv_block(self,
+                      x,
+                      Ks,
+                      Kt,
+                      channels,
+                      name,
+                      keep_prob,
+                      act_func='GLU'):
+        """Spatio-Temporal convolution block"""
+        c_si, c_t, c_oo = channels
+        x_s = self.temporal_conv_layer(
+            x, Kt, c_si, c_t, "%s_tconv_in" % name, act_func=act_func)
+        x_t = self.spatio_conv_layer(x_s, Ks, c_t, c_t, "%s_sonv" % name)
+        x_o = self.temporal_conv_layer(x_t, Kt, c_t, c_oo,
+                                       "%s_tconv_out" % name)
+        x_ln = fl.layer_norm(x_o)
+        return fl.dropout(x_ln, dropout_prob=(1.0 - keep_prob))
+    def temporal_conv_layer(self, x, Kt, c_in, c_out, name, act_func='relu'):
+        """Temporal convolution layer"""
+        _, T, n, _ = x.shape
+        if c_in > c_out:
+            x_input = fl.conv2d(
+                input=x,
+                num_filters=c_out,
+                filter_size=[1, 1],
+                stride=[1, 1],
+                padding="SAME",
+                data_format="NHWC",
+                param_attr=fluid.ParamAttr(name="%s_conv2d_1" % name))
+        elif c_in < c_out:
+            # if the size of input channel is less than the output,
+            # padding x to the same size of output channel.
+            pad = fl.fill_constant_batch_size_like(
+                input=x,
+                shape=[-1, T, n, c_out - c_in],
+                dtype="float32",
+                value=0.0)
+            x_input = fl.concat([x, pad], axis=3)
+        else:
+            x_input = x
+        #  x_input = x_input[:, Kt - 1:T, :, :]
+        if act_func == 'GLU':
+            # gated liner unit
+            bt_init = fluid.initializer.ConstantInitializer(value=0.0)
+            bt = fl.create_parameter(
+                shape=[2 * c_out],
+                dtype="float32",
+                attr=fluid.ParamAttr(
+                    name="%s_bt" % name, trainable=True, initializer=bt_init),
+            )
+            x_conv = fl.conv2d(
+                input=x,
+                num_filters=2 * c_out,
+                filter_size=[Kt, 1],
+                stride=[1, 1],
+                padding="SAME",
+                data_format="NHWC",
+                param_attr=fluid.ParamAttr(name="%s_conv2d_wt" % name))
+            x_conv = x_conv + bt
+            return (x_conv[:, :, :, 0:c_out] + x_input
+                    ) * fl.sigmoid(x_conv[:, :, :, -c_out:])
+        else:
+            bt_init = fluid.initializer.ConstantInitializer(value=0.0)
+            bt = fl.create_parameter(
+                shape=[c_out],
+                dtype="float32",
+                attr=fluid.ParamAttr(
+                    name="%s_bt" % name, trainable=True, initializer=bt_init),
+            )
+            x_conv = fl.conv2d(
+                input=x,
+                num_filters=c_out,
+                filter_size=[Kt, 1],
+                stride=[1, 1],
+                padding="SAME",
+                data_format="NHWC",
+                param_attr=fluid.ParamAttr(name="%s_conv2d_wt" % name))
+            x_conv = x_conv + bt
+            if act_func == "linear":
+                return x_conv
+            elif act_func == "sigmoid":
+                return fl.sigmoid(x_conv)
+            elif act_func == "relu":
+                return fl.relu(x_conv + x_input)
+            else:
+                raise ValueError(
+                    f'ERROR: activation function "{act_func}" is not defined.')
+    def spatio_conv_layer(self, x, Ks, c_in, c_out, name):
+        """Spatio convolution layer"""
+        _, T, n, _ = x.shape
+        if c_in > c_out:
+            x_input = fl.conv2d(
+                input=x,
+                num_filters=c_out,
+                filter_size=[1, 1],
+                stride=[1, 1],
+                padding="SAME",
+                data_format="NHWC",
+                param_attr=fluid.ParamAttr(name="%s_conv2d_1" % name))
+        elif c_in < c_out:
+            # if the size of input channel is less than the output,
+            # padding x to the same size of output channel.
+            pad = fl.fill_constant_batch_size_like(
+                input=x,
+                shape=[-1, T, n, c_out - c_in],
+                dtype="float32",
+                value=0.0)
+            x_input = fl.concat([x, pad], axis=3)
+        else:
+            x_input = x
+        for i in range(Ks):
+            # x_input shape: [B,T, num_nodes, c_out]
+            x_input = fl.reshape(x_input, [-1, c_out])
+            x_input = self.message_passing(
+                self.gw,
+                x_input,
+                name="%s_mp_%d" % (name, i),
+                norm=self.gw.node_feat["norm"])
+            x_input = fl.fc(x_input,
+                            size=c_out,
+                            bias_attr=False,
+                            param_attr=fluid.ParamAttr(name="%s_gcn_fc_%d" %
+                                                       (name, i)))
+            bias = fluid.layers.create_parameter(
+                shape=[c_out],
+                dtype='float32',
+                is_bias=True,
+                name='%s_gcn_bias_%d' % (name, i))
+            x_input = fluid.layers.elementwise_add(x_input, bias, act="relu")
+            x_input = fl.reshape(x_input, [-1, T, n, c_out])
+        return x_input
+    def message_passing(self, gw, feature, name, norm=None):
+        """Message passing layer"""
+        def send_src_copy(src_feat, dst_feat, edge_feat):
+            """send function"""
+            return src_feat["h"] * edge_feat['w']
+        if norm is not None:
+            feature = feature * norm
+        msg = gw.send(
+            send_src_copy,
+            nfeat_list=[("h", feature)],
+            efeat_list=[('w', gw.edge_feat['weights'])])
+        output = gw.recv(msg, "sum")
+        if norm is not None:
+            output = output * norm
+        return output
+    def output_layer(self, x, T, name, act_func='GLU'):
+        """Output layer"""
+        _, _, n, channel = x.shape
+        # maps multi-steps to one.
+        x_i = self.temporal_conv_layer(
+            x=x,
+            Kt=T,
+            c_in=channel,
+            c_out=channel,
+            name="%s_in" % name,
+            act_func=act_func)
+        x_ln = fl.layer_norm(x_i)
+        x_o = self.temporal_conv_layer(
+            x=x_ln,
+            Kt=1,
+            c_in=channel,
+            c_out=channel,
+            name="%s_out" % name,
+            act_func='sigmoid')
+        # maps multi-channels to one.
+        x_fc = self.fully_con_layer(
+            x=x_o, n=n, channel=channel, name="%s_fc" % name)
+        return x_fc
+    def fully_con_layer(self, x, n, channel, name):
+        """Fully connected layer"""
+        bt_init = fluid.initializer.ConstantInitializer(value=0.0)
+        bt = fl.create_parameter(
+            shape=[n, 1],
+            dtype="float32",
+            attr=fluid.ParamAttr(
+                name="%s_bt" % name, trainable=True, initializer=bt_init), )
+        x_conv = fl.conv2d(
+            input=x,
+            num_filters=1,
+            filter_size=[1, 1],
+            stride=[1, 1],
+            padding="SAME",
+            data_format="NHWC",
+            param_attr=fluid.ParamAttr(name="%s_conv2d" % name))
+        x_conv = x_conv + bt
+        return x_conv
--- a/examples/stgcn/models/tester.py
+++ b/examples/stgcn/models/tester.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file implement the testing process of STGCN model.
+"""
+import os
+import sys
+import time
+import argparse
+import numpy as np
+import pandas as pd
+import paddle.fluid as fluid
+import paddle.fluid.layers as fl
+import pgl
+from pgl.utils.logger import log
+from data_loader.data_utils import gen_batch
+from utils.math_utils import evaluation
+def multi_pred(exe, gw, gf, program, y_pred, seq, batch_size, \
+        n_his, n_pred, step_idx, dynamic_batch=True):
+    """multi step prediction"""
+    pred_list = []
+    for i in gen_batch(
+            seq, min(batch_size, len(seq)), dynamic_batch=dynamic_batch):
+        # Note: use np.copy() to avoid the modification of source data.
+        test_seq = np.copy(i[:, 0:n_his + 1, :, :]).astype(np.float32)
+        graph = gf.build_graph(i[:, 0:n_his, :, :])
+        feed = gw.to_feed(graph)
+        step_list = []
+        for j in range(n_pred):
+            feed['input'] = test_seq
+            pred = exe.run(program, feed=feed, fetch_list=[y_pred])
+            if isinstance(pred, list):
+                pred = np.array(pred[0])
+            test_seq[:, 0:n_his - 1, :, :] = test_seq[:, 1:n_his, :, :]
+            test_seq[:, n_his - 1, :, :] = pred
+            step_list.append(pred)
+        pred_list.append(step_list)
+    #  pred_array -> [n_pred, len(seq), n_route, C_0)
+    pred_array = np.concatenate(pred_list, axis=1)
+    return pred_array, pred_array.shape[1]
+def model_inference(exe, gw, gf, program, pred, inputs, args, step_idx,
+                    min_va_val, min_val):
+    """inference model"""
+    x_val, x_test, x_stats = inputs.get_data('val'), inputs.get_data(
+        'test'), inputs.get_stats()
+    if args.n_his + args.n_pred > x_val.shape[1]:
+        raise ValueError(
+            f'ERROR: the value of n_pred "{args.n_pred}" exceeds the length limit.'
+        )
+    # y_val shape: [n_pred, len(x_val), n_route, C_0)
+    y_val, len_val = multi_pred(exe, gw, gf, program, pred, \
+            x_val, args.batch_size, args.n_his, args.n_pred, step_idx)
+    evl_val = evaluation(x_val[0:len_val, step_idx + args.n_his, :, :],
+                         y_val[step_idx], x_stats)
+    # chks: indicator that reflects the relationship of values between evl_val and min_va_val.
+    chks = evl_val < min_va_val
+    # update the metric on test set, if model's performance got improved on the validation.
+    if sum(chks):
+        min_va_val[chks] = evl_val[chks]
+        y_pred, len_pred = multi_pred(exe, gw, gf, program, pred, \
+                x_test, args.batch_size, args.n_his, args.n_pred, step_idx)
+        evl_pred = evaluation(x_test[0:len_pred, step_idx + args.n_his, :, :],
+                              y_pred[step_idx], x_stats)
+        min_val = evl_pred
+    return min_va_val, min_val
+def model_test(exe, gw, gf, program, pred, inputs, args):
+    """test model"""
+    if args.inf_mode == 'sep':
+        # for inference mode 'sep', the type of step index is int.
+        step_idx = args.n_pred - 1
+        tmp_idx = [step_idx]
+    elif args.inf_mode == 'merge':
+        # for inference mode 'merge', the type of step index is np.ndarray.
+        step_idx = tmp_idx = np.arange(3, args.n_pred + 1, 3) - 1
+        print(step_idx)
+    else:
+        raise ValueError(f'ERROR: test mode "{args.inf_mode}" is not defined.')
+    x_test, x_stats = inputs.get_data('test'), inputs.get_stats()
+    y_test, len_test = multi_pred(exe, gw, gf, program, pred, \
+            x_test, args.batch_size, args.n_his, args.n_pred, step_idx)
+    # save result
+    gt = x_test[0:len_test, args.n_his:, :, :].reshape(-1, args.n_route)
+    y_pred = y_test.reshape(-1, args.n_route)
+    city_df = pd.read_csv(args.city_file)
+    city_df = city_df.drop(0)
+    np.savetxt(
+        os.path.join(args.output_path, "groundtruth.csv"),
+        gt.astype(np.int32),
+        fmt='%d',
+        delimiter=',',
+        header=",".join(city_df['city']))
+    np.savetxt(
+        os.path.join(args.output_path, "prediction.csv"),
+        y_pred.astype(np.int32),
+        fmt='%d',
+        delimiter=",",
+        header=",".join(city_df['city']))
+    for i in range(step_idx + 1):
+        evl = evaluation(x_test[0:len_test, step_idx + args.n_his, :, :],
+                         y_test[i], x_stats)
+        for ix in tmp_idx:
+            te = evl[ix - 2:ix + 1]
+            print(
+                f'Time Step {i + 1}: MAPE {te[0]:7.3%}; MAE  {te[1]:4.3f}; RMSE {te[2]:6.3f}.'
+            )
--- a/examples/stgcn/utils/math_utils.py
+++ b/examples/stgcn/utils/math_utils.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Evaluation"""
+import os
+import sys
+import time
+import argparse
+import numpy as np
+def z_score(x, mean, std):
+    """z_score"""
+    return (x - mean) / std
+def z_inverse(x, mean, std):
+    """The inverse of function z_score"""
+    return x * std + mean
+def MAPE(v, v_):
+    """Mean absolute percentage error."""
+    return np.mean(np.abs(v_ - v) / (v + 1e-5))
+def RMSE(v, v_):
+    """Mean squared error."""
+    return np.sqrt(np.mean((v_ - v)**2))
+def MAE(v, v_):
+    """Mean absolute error."""
+    return np.mean(np.abs(v_ - v))
+def evaluation(y, y_, x_stats):
+    """Calculate MAPE, MAE and RMSE between ground truth and prediction."""
+    dim = len(y_.shape)
+    if dim == 3:
+        # single_step case
+        v = z_inverse(y, x_stats['mean'], x_stats['std'])
+        v_ = z_inverse(y_, x_stats['mean'], x_stats['std'])
+        return np.array([MAPE(v, v_), MAE(v, v_), RMSE(v, v_)])
+    else:
+        # multi_step case
+        tmp_list = []
+        # y -> [time_step, batch_size, n_route, 1]
+        y = np.swapaxes(y, 0, 1)
+        # recursively call
+        for i in range(y_.shape[0]):
+            tmp_res = evaluation(y[i], y_[i], x_stats)
+            tmp_list.append(tmp_res)
+        return np.concatenate(tmp_list, axis=-1)
--- a/ogb_examples/graphproppred/main_pgl.py
+++ b/ogb_examples/graphproppred/main_pgl.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""test ogb
+"""
+import argparse
+import pgl
+import numpy as np
+import paddle.fluid as fluid
+from pgl.contrib.ogb.graphproppred.dataset_pgl import PglGraphPropPredDataset
+from pgl.utils import paddle_helper
+from ogb.graphproppred import Evaluator
+from pgl.contrib.ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
+def train(exe, batch_size, graph_wrapper, train_program, splitted_idx, dataset,
+          evaluator, fetch_loss, fetch_pred):
+    """Train"""
+    graphs, labels = dataset[splitted_idx["train"]]
+    perm = np.arange(0, len(graphs))
+    np.random.shuffle(perm)
+    start_batch = 0
+    batch_no = 0
+    pred_output = np.zeros_like(labels, dtype="float32")
+    while start_batch < len(perm):
+        batch_index = perm[start_batch:start_batch + batch_size]
+        start_batch += batch_size
+        batch_graph = pgl.graph.MultiGraph(graphs[batch_index])
+        batch_label = labels[batch_index]
+        batch_valid = (batch_label == batch_label).astype("float32")
+        batch_label = np.nan_to_num(batch_label).astype("float32")
+        feed_dict = graph_wrapper.to_feed(batch_graph)
+        feed_dict["label"] = batch_label
+        feed_dict["weight"] = batch_valid
+        loss, pred = exe.run(train_program,
+                             feed=feed_dict,
+                             fetch_list=[fetch_loss, fetch_pred])
+        pred_output[batch_index] = pred
+        batch_no += 1
+    print("train", evaluator.eval({"y_true": labels, "y_pred": pred_output}))
+def evaluate(exe, batch_size, graph_wrapper, val_program, splitted_idx,
+             dataset, mode, evaluator, fetch_pred):
+    """Eval"""
+    graphs, labels = dataset[splitted_idx[mode]]
+    perm = np.arange(0, len(graphs))
+    start_batch = 0
+    batch_no = 0
+    pred_output = np.zeros_like(labels, dtype="float32")
+    while start_batch < len(perm):
+        batch_index = perm[start_batch:start_batch + batch_size]
+        start_batch += batch_size
+        batch_graph = pgl.graph.MultiGraph(graphs[batch_index])
+        feed_dict = graph_wrapper.to_feed(batch_graph)
+        pred = exe.run(val_program, feed=feed_dict, fetch_list=[fetch_pred])
+        pred_output[batch_index] = pred[0]
+        batch_no += 1
+    print(mode, evaluator.eval({"y_true": labels, "y_pred": pred_output}))
+def send_func(src_feat, dst_feat, edge_feat):
+    """Send"""
+    return src_feat["h"] + edge_feat["h"]
+class GNNModel(object):
+    """GNNModel"""
+    def __init__(self, name, emb_dim, num_task, num_layers):
+        self.num_task = num_task
+        self.emb_dim = emb_dim
+        self.num_layers = num_layers
+        self.name = name
+        self.atom_encoder = AtomEncoder(name=name, emb_dim=emb_dim)
+        self.bond_encoder = BondEncoder(name=name, emb_dim=emb_dim)
+    def forward(self, graph):
+        """foward"""
+        h_node = self.atom_encoder(graph.node_feat['feat'])
+        h_edge = self.bond_encoder(graph.edge_feat['feat'])
+        for layer in range(self.num_layers):
+            msg = graph.send(
+                send_func,
+                nfeat_list=[("h", h_node)],
+                efeat_list=[("h", h_edge)])
+            h_node = graph.recv(msg, 'sum') + h_node
+            h_node = fluid.layers.fc(h_node,
+                                     size=self.emb_dim,
+                                     name=self.name + '_%s' % layer,
+                                     act="relu")
+        graph_nodes = pgl.layers.graph_pooling(graph, h_node, "average")
+        graph_pred = fluid.layers.fc(graph_nodes, self.num_task, name="final")
+        return graph_pred
+def main():
+    """main
+    """
+    # Training settings
+    parser = argparse.ArgumentParser(description='Graph Dataset')
+    parser.add_argument(
+        '--epochs',
+        type=int,
+        default=100,
+        help='number of epochs to train (default: 100)')
+    parser.add_argument(
+        '--dataset',
+        type=str,
+        default="ogbg-mol-tox21",
+        help='dataset name (default: proteinfunc)')
+    args = parser.parse_args()
+    place = fluid.CPUPlace()  # Dataset too big to use GPU
+    ### automatic dataloading and splitting
+    dataset = PglGraphPropPredDataset(name=args.dataset)
+    splitted_idx = dataset.get_idx_split()
+    ### automatic evaluator. takes dataset name as input
+    evaluator = Evaluator(args.dataset)
+    graph_data, label = dataset[:2]
+    batch_graph = pgl.graph.MultiGraph(graph_data)
+    graph_data = batch_graph
+    train_program = fluid.Program()
+    startup_program = fluid.Program()
+    test_program = fluid.Program()
+    # degree normalize
+    graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype("int64")
+    graph_data.node_feat["feat"] = graph_data.node_feat["feat"].astype("int64")
+    model = GNNModel(
+        name="gnn", num_task=dataset.num_tasks, emb_dim=64, num_layers=2)
+    with fluid.program_guard(train_program, startup_program):
+        gw = pgl.graph_wrapper.GraphWrapper(
+            "graph",
+            place=place,
+            node_feat=graph_data.node_feat_info(),
+            edge_feat=graph_data.edge_feat_info())
+        pred = model.forward(gw)
+        sigmoid_pred = fluid.layers.sigmoid(pred)
+    val_program = train_program.clone(for_test=True)
+    initializer = []
+    with fluid.program_guard(train_program, startup_program):
+        train_label = fluid.layers.data(
+            name="label", dtype="float32", shape=[None, dataset.num_tasks])
+        train_weight = fluid.layers.data(
+            name="weight", dtype="float32", shape=[None, dataset.num_tasks])
+        train_loss_t = fluid.layers.sigmoid_cross_entropy_with_logits(
+            x=pred, label=train_label) * train_weight
+        train_loss_t = fluid.layers.reduce_sum(train_loss_t)
+        adam = fluid.optimizer.Adam(
+            learning_rate=1e-2,
+            regularization=fluid.regularizer.L2DecayRegularizer(
+                regularization_coeff=0.0005))
+        adam.minimize(train_loss_t)
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+    for epoch in range(1, args.epochs + 1):
+        print("Epoch", epoch)
+        train(exe, 128, gw, train_program, splitted_idx, dataset, evaluator,
+              train_loss_t, sigmoid_pred)
+        evaluate(exe, 128, gw, val_program, splitted_idx, dataset, "valid",
+                 evaluator, sigmoid_pred)
+        evaluate(exe, 128, gw, val_program, splitted_idx, dataset, "test",
+                 evaluator, sigmoid_pred)
+if __name__ == "__main__":
+    main()
--- a/ogb_examples/linkproppred/main_pgl.py
+++ b/ogb_examples/linkproppred/main_pgl.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""test ogb
+"""
+import argparse
+import time
+import logging
+import numpy as np
+import paddle.fluid as fluid
+import pgl
+from pgl.contrib.ogb.linkproppred.dataset_pgl import PglLinkPropPredDataset
+from pgl.utils import paddle_helper
+from ogb.linkproppred import Evaluator
+def send_func(src_feat, dst_feat, edge_feat):
+    """send_func"""
+    return src_feat["h"]
+def recv_func(feat):
+    """recv_func"""
+    return fluid.layers.sequence_pool(feat, pool_type="sum")
+class GNNModel(object):
+    """GNNModel"""
+    def __init__(self, name, num_nodes, emb_dim, num_layers):
+        self.num_nodes = num_nodes
+        self.emb_dim = emb_dim
+        self.num_layers = num_layers
+        self.name = name
+        self.src_nodes = fluid.layers.data(
+            name='src_nodes',
+            shape=[None],
+            dtype='int64', )
+        self.dst_nodes = fluid.layers.data(
+            name='dst_nodes',
+            shape=[None],
+            dtype='int64', )
+        self.edge_label = fluid.layers.data(
+            name='edge_label',
+            shape=[None, 1],
+            dtype='float32', )
+    def forward(self, graph):
+        """forward"""
+        h = fluid.layers.create_parameter(
+            shape=[self.num_nodes, self.emb_dim],
+            dtype="float32",
+            name=self.name + "_embedding")
+        for layer in range(self.num_layers):
+            msg = graph.send(
+                send_func,
+                nfeat_list=[("h", h)], )
+            h = graph.recv(msg, recv_func)
+            h = fluid.layers.fc(
+                h,
+                size=self.emb_dim,
+                bias_attr=False,
+                param_attr=fluid.ParamAttr(name=self.name + '_%s' % layer))
+            h = h * graph.node_feat["norm"]
+            bias = fluid.layers.create_parameter(
+                shape=[self.emb_dim],
+                dtype='float32',
+                is_bias=True,
+                name=self.name + '_bias_%s' % layer)
+            h = fluid.layers.elementwise_add(h, bias, act="relu")
+        src = fluid.layers.gather(h, self.src_nodes, overwrite=False)
+        dst = fluid.layers.gather(h, self.dst_nodes, overwrite=False)
+        edge_embed = src * dst
+        pred = fluid.layers.fc(input=edge_embed,
+                               size=1,
+                               name=self.name + "_pred_output")
+        prob = fluid.layers.sigmoid(pred)
+        loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred,
+                                                              self.edge_label)
+        loss = fluid.layers.reduce_mean(loss)
+        return pred, prob, loss
+def main():
+    """main
+    """
+    # Training settings
+    parser = argparse.ArgumentParser(description='Graph Dataset')
+    parser.add_argument(
+        '--epochs',
+        type=int,
+        default=4,
+        help='number of epochs to train (default: 100)')
+    parser.add_argument(
+        '--dataset',
+        type=str,
+        default="ogbl-ppa",
+        help='dataset name (default: protein protein associations)')
+    parser.add_argument('--use_cuda', action='store_true')
+    parser.add_argument('--batch_size', type=int, default=5120)
+    parser.add_argument('--embed_dim', type=int, default=64)
+    parser.add_argument('--num_layers', type=int, default=2)
+    parser.add_argument('--lr', type=float, default=0.001)
+    args = parser.parse_args()
+    print(args)
+    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
+    ### automatic dataloading and splitting
+    print("loadding dataset")
+    dataset = PglLinkPropPredDataset(name=args.dataset)
+    splitted_edge = dataset.get_edge_split()
+    print(splitted_edge['train_edge'].shape)
+    print(splitted_edge['train_edge_label'].shape)
+    print("building evaluator")
+    ### automatic evaluator. takes dataset name as input
+    evaluator = Evaluator(args.dataset)
+    graph_data = dataset[0]
+    print("num_nodes: %d" % graph_data.num_nodes)
+    train_program = fluid.Program()
+    startup_program = fluid.Program()
+    # degree normalize
+    indegree = graph_data.indegree()
+    norm = np.zeros_like(indegree, dtype="float32")
+    norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5)
+    graph_data.node_feat["norm"] = np.expand_dims(norm, -1).astype("float32")
+    #  graph_data.node_feat["index"] = np.array([i for i in range(graph_data.num_nodes)], dtype=np.int64).reshape(-1,1)
+    with fluid.program_guard(train_program, startup_program):
+        model = GNNModel(
+            name="gnn",
+            num_nodes=graph_data.num_nodes,
+            emb_dim=args.embed_dim,
+            num_layers=args.num_layers)
+        gw = pgl.graph_wrapper.GraphWrapper(
+            "graph",
+            place,
+            node_feat=graph_data.node_feat_info(),
+            edge_feat=graph_data.edge_feat_info())
+        pred, prob, loss = model.forward(gw)
+    val_program = train_program.clone(for_test=True)
+    with fluid.program_guard(train_program, startup_program):
+        global_steps = int(splitted_edge['train_edge'].shape[0] /
+                           args.batch_size * 2)
+        learning_rate = fluid.layers.polynomial_decay(args.lr, global_steps,
+                                                      0.00005)
+        adam = fluid.optimizer.Adam(
+            learning_rate=learning_rate,
+            regularization=fluid.regularizer.L2DecayRegularizer(
+                regularization_coeff=0.0005))
+        adam.minimize(loss)
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+    feed = gw.to_feed(graph_data)
+    print("evaluate result before training: ")
+    result = test(exe, val_program, prob, evaluator, feed, splitted_edge)
+    print(result)
+    print("training")
+    cc = 0
+    for epoch in range(1, args.epochs + 1):
+        for batch_data, batch_label in data_generator(
+                graph_data,
+                splitted_edge["train_edge"],
+                splitted_edge["train_edge_label"],
+                batch_size=args.batch_size):
+            feed['src_nodes'] = batch_data[:, 0].reshape(-1, 1)
+            feed['dst_nodes'] = batch_data[:, 1].reshape(-1, 1)
+            feed['edge_label'] = batch_label.astype("float32")
+            res_loss, y_pred, b_lr = exe.run(
+                train_program,
+                feed=feed,
+                fetch_list=[loss, prob, learning_rate])
+            if cc % 1 == 0:
+                print("epoch %d | step %d | lr %s | Loss %s" %
+                      (epoch, cc, b_lr[0], res_loss[0]))
+            cc += 1
+            if cc % 20 == 0:
+                print("Evaluating...")
+                result = test(exe, val_program, prob, evaluator, feed,
+                              splitted_edge)
+                print("epoch %d | step %d" % (epoch, cc))
+                print(result)
+def test(exe, val_program, prob, evaluator, feed, splitted_edge):
+    """Evaluation"""
+    result = {}
+    feed['src_nodes'] = splitted_edge["valid_edge"][:, 0].reshape(-1, 1)
+    feed['dst_nodes'] = splitted_edge["valid_edge"][:, 1].reshape(-1, 1)
+    feed['edge_label'] = splitted_edge["valid_edge_label"].astype(
+        "float32").reshape(-1, 1)
+    y_pred = exe.run(val_program, feed=feed, fetch_list=[prob])[0]
+    input_dict = {
+        "y_true": splitted_edge["valid_edge_label"],
+        "y_pred": y_pred.reshape(-1, ),
+    }
+    result["valid"] = evaluator.eval(input_dict)
+    feed['src_nodes'] = splitted_edge["test_edge"][:, 0].reshape(-1, 1)
+    feed['dst_nodes'] = splitted_edge["test_edge"][:, 1].reshape(-1, 1)
+    feed['edge_label'] = splitted_edge["test_edge_label"].astype(
+        "float32").reshape(-1, 1)
+    y_pred = exe.run(val_program, feed=feed, fetch_list=[prob])[0]
+    input_dict = {
+        "y_true": splitted_edge["test_edge_label"],
+        "y_pred": y_pred.reshape(-1, ),
+    }
+    result["test"] = evaluator.eval(input_dict)
+    return result
+def data_generator(graph, data, label_data, batch_size, shuffle=True):
+    """Data Generator"""
+    perm = np.arange(0, len(data))
+    if shuffle:
+        np.random.shuffle(perm)
+    offset = 0
+    while offset < len(perm):
+        batch_index = perm[offset:(offset + batch_size)]
+        offset += batch_size
+        pos_data = data[batch_index]
+        pos_label = label_data[batch_index]
+        neg_src_node = pos_data[:, 0]
+        neg_dst_node = np.random.choice(
+            pos_data.reshape(-1, ), size=len(neg_src_node))
+        neg_data = np.hstack(
+            [neg_src_node.reshape(-1, 1), neg_dst_node.reshape(-1, 1)])
+        exists = graph.has_edges_between(neg_src_node, neg_dst_node)
+        neg_data = neg_data[np.invert(exists)]
+        neg_label = np.zeros(shape=len(neg_data), dtype=np.int64)
+        batch_data = np.vstack([pos_data, neg_data])
+        label = np.vstack([pos_label.reshape(-1, 1), neg_label.reshape(-1, 1)])
+        yield batch_data, label
+if __name__ == "__main__":
+    main()
--- a/ogb_examples/nodeproppred/main_pgl.py
+++ b/ogb_examples/nodeproppred/main_pgl.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""test ogb
+"""
+import argparse
+import pgl
+import numpy as np
+import paddle.fluid as fluid
+from pgl.contrib.ogb.nodeproppred.dataset_pgl import PglNodePropPredDataset
+from pgl.utils import paddle_helper
+from ogb.nodeproppred import Evaluator
+def train():
+    pass
+def send_func(src_feat, dst_feat, edge_feat):
+    return (src_feat["h"] + edge_feat["h"]) * src_feat["norm"]
+class GNNModel(object):
+    def __init__(self, name, emb_dim, num_task, num_layers):
+        self.num_task = num_task
+        self.emb_dim = emb_dim
+        self.num_layers = num_layers
+        self.name = name
+    def forward(self, graph):
+        h = fluid.layers.embedding(
+            graph.node_feat["x"],
+            size=(2, self.emb_dim))  # name=self.name + "_embedding") 
+        edge_attr = fluid.layers.fc(graph.edge_feat["feat"], size=self.emb_dim)
+        for layer in range(self.num_layers):
+            msg = graph.send(
+                send_func,
+                nfeat_list=[("h", h), ("norm", graph.node_feat["norm"])],
+                efeat_list=[("h", edge_attr)])
+            h = graph.recv(msg, "sum")
+            h = fluid.layers.fc(
+                h,
+                size=self.emb_dim,
+                bias_attr=False,
+                param_attr=fluid.ParamAttr(name=self.name + '_%s' % layer))
+            h = h * graph.node_feat["norm"]
+            bias = fluid.layers.create_parameter(
+                shape=[self.emb_dim],
+                dtype='float32',
+                is_bias=True,
+                name=self.name + '_bias_%s' % layer)
+            h = fluid.layers.elementwise_add(h, bias, act="relu")
+        pred = fluid.layers.fc(h,
+                               self.num_task,
+                               act=None,
+                               name=self.name + "_pred_output")
+        return pred
+def main():
+    """main
+    """
+    # Training settings
+    parser = argparse.ArgumentParser(description='Graph Dataset')
+    parser.add_argument(
+        '--epochs',
+        type=int,
+        default=100,
+        help='number of epochs to train (default: 100)')
+    parser.add_argument(
+        '--dataset',
+        type=str,
+        default="ogbn-proteins",
+        help='dataset name (default: proteinfunc)')
+    args = parser.parse_args()
+    #device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
+    #place = fluid.CUDAPlace(0)
+    place = fluid.CPUPlace()  # Dataset too big to use GPU
+    ### automatic dataloading and splitting
+    dataset = PglNodePropPredDataset(name=args.dataset)
+    splitted_idx = dataset.get_idx_split()
+    ### automatic evaluator. takes dataset name as input
+    evaluator = Evaluator(args.dataset)
+    graph_data, label = dataset[0]
+    train_program = fluid.Program()
+    startup_program = fluid.Program()
+    test_program = fluid.Program()
+    # degree normalize
+    indegree = graph_data.indegree()
+    norm = np.zeros_like(indegree, dtype="float32")
+    norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5)
+    graph_data.node_feat["norm"] = np.expand_dims(norm, -1).astype("float32")
+    graph_data.node_feat["x"] = np.zeros((len(indegree), 1), dtype="int64")
+    graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype(
+        "float32")
+    model = GNNModel(
+        name="gnn", num_task=dataset.num_tasks, emb_dim=64, num_layers=2)
+    with fluid.program_guard(train_program, startup_program):
+        gw = pgl.graph_wrapper.StaticGraphWrapper("graph", graph_data, place)
+        pred = model.forward(gw)
+        sigmoid_pred = fluid.layers.sigmoid(pred)
+    val_program = train_program.clone(for_test=True)
+    initializer = []
+    with fluid.program_guard(train_program, startup_program):
+        train_node_index, init = paddle_helper.constant(
+            "train_node_index", dtype="int64", value=splitted_idx["train"])
+        initializer.append(init)
+        train_node_label, init = paddle_helper.constant(
+            "train_node_label",
+            dtype="float32",
+            value=label[splitted_idx["train"]].astype("float32"))
+        initializer.append(init)
+        train_pred_t = fluid.layers.gather(pred, train_node_index)
+        train_loss_t = fluid.layers.sigmoid_cross_entropy_with_logits(
+            x=train_pred_t, label=train_node_label)
+        train_loss_t = fluid.layers.reduce_sum(train_loss_t)
+        train_pred_t = fluid.layers.sigmoid(train_pred_t)
+        adam = fluid.optimizer.Adam(
+            learning_rate=1e-2,
+            regularization=fluid.regularizer.L2DecayRegularizer(
+                regularization_coeff=0.0005))
+        adam.minimize(train_loss_t)
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+    gw.initialize(place)
+    for init in initializer:
+        init(place)
+    for epoch in range(1, args.epochs + 1):
+        loss = exe.run(train_program, feed={}, fetch_list=[train_loss_t])
+        print("Loss %s" % loss[0])
+        print("Evaluating...")
+        y_pred = exe.run(val_program, feed={}, fetch_list=[sigmoid_pred])[0]
+        result = {}
+        input_dict = {
+            "y_true": label[splitted_idx["train"]],
+            "y_pred": y_pred[splitted_idx["train"]]
+        }
+        result["train"] = evaluator.eval(input_dict)
+        input_dict = {
+            "y_true": label[splitted_idx["valid"]],
+            "y_pred": y_pred[splitted_idx["valid"]]
+        }
+        result["valid"] = evaluator.eval(input_dict)
+        input_dict = {
+            "y_true": label[splitted_idx["test"]],
+            "y_pred": y_pred[splitted_idx["test"]]
+        }
+        result["test"] = evaluator.eval(input_dict)
+        print(result)
+if __name__ == "__main__":
+    main()
--- a/pgl/__init__.py
+++ b/pgl/__init__.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 """Generate pgl apis
 """
-__version__ = "1.0.1"
+__version__ = "1.0.2"
 from pgl import layers
 from pgl import graph_wrapper
 from pgl import graph
 from pgl import data_loader
+from pgl import heter_graph
+from pgl import heter_graph_wrapper
 from pgl import contrib
--- a/pgl/contrib/__init__.py
+++ b/pgl/contrib/__init__.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,8 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Generate Contrib api
-"""
-from pgl.contrib import heter_graph
-from pgl.contrib import heter_graph_wrapper
--- a/pgl/contrib/ogb/__init__.py
+++ b/pgl/contrib/ogb/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/pgl/contrib/ogb/graphproppred/__init__.py
+++ b/pgl/contrib/ogb/graphproppred/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""__init__.py"""
--- a/pgl/contrib/ogb/graphproppred/dataset_pgl.py
+++ b/pgl/contrib/ogb/graphproppred/dataset_pgl.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PglGraphPropPredDataset
+"""
+import pandas as pd
+import shutil, os
+import os.path as osp
+import numpy as np
+from ogb.utils.url import decide_download, download_url, extract_zip
+from ogb.graphproppred import make_master_file
+from pgl.contrib.ogb.io.read_graph_pgl import read_csv_graph_pgl
+def to_bool(value):
+    """to_bool"""
+    return np.array([value], dtype="bool")[0]
+class PglGraphPropPredDataset(object):
+    """PglGraphPropPredDataset"""
+    def __init__(self, name, root="dataset"):
+        self.name = name  ## original name, e.g., ogbg-mol-tox21
+        self.dir_name = "_".join(
+            name.split("-")
+        ) + "_pgl"  ## replace hyphen with underline, e.g., ogbg_mol_tox21_dgl
+        self.original_root = root
+        self.root = osp.join(root, self.dir_name)
+        self.meta_info = make_master_file.df  #pd.read_csv(
+        #os.path.join(os.path.dirname(__file__), "master.csv"), index_col=0)
+        if not self.name in self.meta_info:
+            print(self.name)
+            error_mssg = "Invalid dataset name {}.\n".format(self.name)
+            error_mssg += "Available datasets are as follows:\n"
+            error_mssg += "\n".join(self.meta_info.keys())
+            raise ValueError(error_mssg)
+        self.download_name = self.meta_info[self.name][
+            "download_name"]  ## name of downloaded file, e.g., tox21
+        self.num_tasks = int(self.meta_info[self.name]["num tasks"])
+        self.task_type = self.meta_info[self.name]["task type"]
+        super(PglGraphPropPredDataset, self).__init__()
+        self.pre_process()
+    def pre_process(self):
+        """Pre-processing"""
+        processed_dir = osp.join(self.root, 'processed')
+        raw_dir = osp.join(self.root, 'raw')
+        pre_processed_file_path = osp.join(processed_dir, 'pgl_data_processed')
+        if os.path.exists(pre_processed_file_path):
+            # TODO: Load Preprocessed
+            pass
+        else:
+            ### download
+            url = self.meta_info[self.name]["url"]
+            if decide_download(url):
+                path = download_url(url, self.original_root)
+                extract_zip(path, self.original_root)
+                os.unlink(path)
+                # delete folder if there exists
+                try:
+                    shutil.rmtree(self.root)
+                except:
+                    pass
+                shutil.move(
+                    osp.join(self.original_root, self.download_name),
+                    self.root)
+            else:
+                print("Stop download.")
+                exit(-1)
+            ### preprocess
+            add_inverse_edge = to_bool(self.meta_info[self.name][
+                "add_inverse_edge"])
+            self.graphs = read_csv_graph_pgl(
+                raw_dir, add_inverse_edge=add_inverse_edge)
+            self.graphs = np.array(self.graphs)
+            self.labels = np.array(
+                pd.read_csv(
+                    osp.join(raw_dir, "graph-label.csv.gz"),
+                    compression="gzip",
+                    header=None).values)
+            # TODO: Load Graph
+            ### load preprocessed files
+    def get_idx_split(self):
+        """Train/Valid/Test split"""
+        split_type = self.meta_info[self.name]["split"]
+        path = osp.join(self.root, "split", split_type)
+        train_idx = pd.read_csv(
+            osp.join(path, "train.csv.gz"), compression="gzip",
+            header=None).values.T[0]
+        valid_idx = pd.read_csv(
+            osp.join(path, "valid.csv.gz"), compression="gzip",
+            header=None).values.T[0]
+        test_idx = pd.read_csv(
+            osp.join(path, "test.csv.gz"), compression="gzip",
+            header=None).values.T[0]
+        return {
+            "train": np.array(
+                train_idx, dtype="int64"),
+            "valid": np.array(
+                valid_idx, dtype="int64"),
+            "test": np.array(
+                test_idx, dtype="int64")
+        }
+    def __getitem__(self, idx):
+        """Get datapoint with index"""
+        return self.graphs[idx], self.labels[idx]
+    def __len__(self):
+        """Length of the dataset
+        Returns
+        -------
+        int
+            Length of Dataset
+        """
+        return len(self.graphs)
+    def __repr__(self):  # pragma: no cover
+        return '{}({})'.format(self.__class__.__name__, len(self))
+if __name__ == "__main__":
+    pgl_dataset = PglGraphPropPredDataset(name="ogbg-mol-bace")
+    splitted_index = pgl_dataset.get_idx_split()
+    print(pgl_dataset)
+    print(pgl_dataset[3:20])
+    #print(pgl_dataset[splitted_index["train"]])
+    #print(pgl_dataset[splitted_index["valid"]])
+    #print(pgl_dataset[splitted_index["test"]])
--- a/pgl/contrib/ogb/graphproppred/mol_encoder.py
+++ b/pgl/contrib/ogb/graphproppred/mol_encoder.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MolEncoder for ogb
+"""
+import paddle.fluid as fluid
+from ogb.utils.features import get_atom_feature_dims, get_bond_feature_dims
+class AtomEncoder(object):
+    """AtomEncoder for encoding node features"""
+    def __init__(self, name, emb_dim):
+        self.emb_dim = emb_dim
+        self.name = name
+    def __call__(self, x):
+        atom_feature = get_atom_feature_dims()
+        atom_input = fluid.layers.split(
+            x, num_or_sections=len(atom_feature), dim=-1)
+        outputs = None
+        count = 0
+        for _x, _atom_input_dim in zip(atom_input, atom_feature):
+            count += 1
+            emb = fluid.layers.embedding(
+                _x,
+                size=(_atom_input_dim, self.emb_dim),
+                param_attr=fluid.ParamAttr(
+                    name=self.name + '_atom_feat_%s' % count))
+            if outputs is None:
+                outputs = emb
+            else:
+                outputs = outputs + emb
+        return outputs
+class BondEncoder(object):
+    """Bond for encoding edge features"""
+    def __init__(self, name, emb_dim):
+        self.emb_dim = emb_dim
+        self.name = name
+    def __call__(self, x):
+        bond_feature = get_bond_feature_dims()
+        bond_input = fluid.layers.split(
+            x, num_or_sections=len(bond_feature), dim=-1)
+        outputs = None
+        count = 0
+        for _x, _bond_input_dim in zip(bond_input, bond_feature):
+            count += 1
+            emb = fluid.layers.embedding(
+                _x,
+                size=(_bond_input_dim, self.emb_dim),
+                param_attr=fluid.ParamAttr(
+                    name=self.name + '_bond_feat_%s' % count))
+            if outputs is None:
+                outputs = emb
+            else:
+                outputs = outputs + emb
+        return outputs
--- a/pgl/contrib/ogb/io/__init__.py
+++ b/pgl/contrib/ogb/io/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""__init__.py
+"""
--- a/pgl/contrib/ogb/io/read_graph_pgl.py
+++ b/pgl/contrib/ogb/io/read_graph_pgl.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""pgl read_csv_graph for ogb
+"""
+import pandas as pd
+import os.path as osp
+import numpy as np
+import pgl
+from ogb.io.read_graph_raw import read_csv_graph_raw
+def read_csv_graph_pgl(raw_dir, add_inverse_edge=False):
+    """Read CSV data and build PGL Graph
+    """
+    graph_list = read_csv_graph_raw(raw_dir, add_inverse_edge)
+    pgl_graph_list = []
+    for graph in graph_list:
+        edges = list(zip(graph["edge_index"][0], graph["edge_index"][1]))
+        g = pgl.graph.Graph(num_nodes=graph["num_nodes"], edges=edges)
+        if graph["edge_feat"] is not None:
+            g.edge_feat["feat"] = graph["edge_feat"]
+        if graph["node_feat"] is not None:
+            g.node_feat["feat"] = graph["node_feat"]
+        pgl_graph_list.append(g)
+    return pgl_graph_list
+if __name__ == "__main__":
+    # graph_list = read_csv_graph_dgl('dataset/proteinfunc_v2/raw', add_inverse_edge = True)
+    graph_list = read_csv_graph_pgl(
+        'dataset/ogbn_proteins_pgl/raw', add_inverse_edge=True)
+    print(graph_list)
--- a/pgl/contrib/ogb/linkproppred/__init__.py
+++ b/pgl/contrib/ogb/linkproppred/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""__init__.py
+"""
--- a/pgl/contrib/ogb/linkproppred/dataset_pgl.py
+++ b/pgl/contrib/ogb/linkproppred/dataset_pgl.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LinkPropPredDataset for pgl
+"""
+import pandas as pd
+import shutil, os
+import os.path as osp
+import numpy as np
+from ogb.utils.url import decide_download, download_url, extract_zip
+from ogb.linkproppred import make_master_file
+from pgl.contrib.ogb.io.read_graph_pgl import read_csv_graph_pgl
+def to_bool(value):
+    """to_bool"""
+    return np.array([value], dtype="bool")[0]
+class PglLinkPropPredDataset(object):
+    """PglLinkPropPredDataset
+    """
+    def __init__(self, name, root="dataset"):
+        self.name = name  ## original name, e.g., ogbl-ppa
+        self.dir_name = "_".join(name.split(
+            "-")) + "_pgl"  ## replace hyphen with underline, e.g., ogbl_ppa_pgl
+        self.original_root = root
+        self.root = osp.join(root, self.dir_name)
+        self.meta_info = make_master_file.df  #pd.read_csv(os.path.join(os.path.dirname(__file__), "master.csv"), index_col=0)
+        if not self.name in self.meta_info:
+            print(self.name)
+            error_mssg = "Invalid dataset name {}.\n".format(self.name)
+            error_mssg += "Available datasets are as follows:\n"
+            error_mssg += "\n".join(self.meta_info.keys())
+            raise ValueError(error_mssg)
+        self.download_name = self.meta_info[self.name][
+            "download_name"]  ## name of downloaded file, e.g., ppassoc
+        self.task_type = self.meta_info[self.name]["task type"]
+        super(PglLinkPropPredDataset, self).__init__()
+        self.pre_process()
+    def pre_process(self):
+        """pre_process downlaoding data
+        """
+        processed_dir = osp.join(self.root, 'processed')
+        pre_processed_file_path = osp.join(processed_dir, 'pgl_data_processed')
+        if osp.exists(pre_processed_file_path):
+            #TODO: Reload Preprocess files
+            pass
+        else:
+            ### check download
+            if not osp.exists(osp.join(self.root, "raw", "edge.csv.gz")):
+                url = self.meta_info[self.name]["url"]
+                if decide_download(url):
+                    path = download_url(url, self.original_root)
+                    extract_zip(path, self.original_root)
+                    os.unlink(path)
+                    # delete folder if there exists
+                    try:
+                        shutil.rmtree(self.root)
+                    except:
+                        pass
+                    shutil.move(
+                        osp.join(self.original_root, self.download_name),
+                        self.root)
+                else:
+                    print("Stop download.")
+                    exit(-1)
+            raw_dir = osp.join(self.root, "raw")
+            ### pre-process and save
+            add_inverse_edge = to_bool(self.meta_info[self.name][
+                "add_inverse_edge"])
+            self.graph = read_csv_graph_pgl(
+                raw_dir, add_inverse_edge=add_inverse_edge)
+            #TODO: SAVE preprocess graph
+    def get_edge_split(self):
+        """Train/Validation/Test split
+        """
+        split_type = self.meta_info[self.name]["split"]
+        path = osp.join(self.root, "split", split_type)
+        train_idx = pd.read_csv(
+            osp.join(path, "train.csv.gz"), compression="gzip",
+            header=None).values
+        valid_idx = pd.read_csv(
+            osp.join(path, "valid.csv.gz"), compression="gzip",
+            header=None).values
+        test_idx = pd.read_csv(
+            osp.join(path, "test.csv.gz"), compression="gzip",
+            header=None).values
+        if self.task_type == "link prediction":
+            target_type = np.int64
+        else:
+            target_type = np.float32
+        return {
+            "train_edge": np.array(
+                train_idx[:, :2], dtype="int64"),
+            "train_edge_label": np.array(
+                train_idx[:, 2], dtype=target_type),
+            "valid_edge": np.array(
+                valid_idx[:, :2], dtype="int64"),
+            "valid_edge_label": np.array(
+                valid_idx[:, 2], dtype=target_type),
+            "test_edge": np.array(
+                test_idx[:, :2], dtype="int64"),
+            "test_edge_label": np.array(
+                test_idx[:, 2], dtype=target_type)
+        }
+    def __getitem__(self, idx):
+        assert idx == 0, "This dataset has only one graph"
+        return self.graph[0]
+    def __len__(self):
+        return 1
+    def __repr__(self):  # pragma: no cover
+        return '{}({})'.format(self.__class__.__name__, len(self))
+if __name__ == "__main__":
+    pgl_dataset = PglLinkPropPredDataset(name="ogbl-ppa")
+    splitted_edge = pgl_dataset.get_edge_split()
+    print(pgl_dataset[0])
+    print(splitted_edge)
--- a/pgl/contrib/ogb/nodeproppred/__init__.py
+++ b/pgl/contrib/ogb/nodeproppred/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""__init__.py
+"""
--- a/pgl/contrib/ogb/nodeproppred/dataset_pgl.py
+++ b/pgl/contrib/ogb/nodeproppred/dataset_pgl.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""NodePropPredDataset for pgl
+"""
+import pandas as pd
+import shutil, os
+import os.path as osp
+import numpy as np
+from ogb.utils.url import decide_download, download_url, extract_zip
+from ogb.nodeproppred import make_master_file  # create master.csv
+from pgl.contrib.ogb.io.read_graph_pgl import read_csv_graph_pgl
+def to_bool(value):
+    """to_bool"""
+    return np.array([value], dtype="bool")[0]
+class PglNodePropPredDataset(object):
+    """PglNodePropPredDataset
+    """
+    def __init__(self, name, root="dataset"):
+        self.name = name  ## original name, e.g., ogbn-proteins
+        self.dir_name = "_".join(
+            name.split("-")
+        ) + "_pgl"  ## replace hyphen with underline, e.g., ogbn_proteins_pgl
+        self.original_root = root
+        self.root = osp.join(root, self.dir_name)
+        self.meta_info = make_master_file.df  #pd.read_csv(
+        #os.path.join(os.path.dirname(__file__), "master.csv"), index_col=0)
+        if not self.name in self.meta_info:
+            error_mssg = "Invalid dataset name {}.\n".format(self.name)
+            error_mssg += "Available datasets are as follows:\n"
+            error_mssg += "\n".join(self.meta_info.keys())
+            raise ValueError(error_mssg)
+        self.download_name = self.meta_info[self.name][
+            "download_name"]  ## name of downloaded file, e.g., tox21
+        self.num_tasks = int(self.meta_info[self.name]["num tasks"])
+        self.task_type = self.meta_info[self.name]["task type"]
+        super(PglNodePropPredDataset, self).__init__()
+        self.pre_process()
+    def pre_process(self):
+        """pre_process downlaoding data
+        """
+        processed_dir = osp.join(self.root, 'processed')
+        pre_processed_file_path = osp.join(processed_dir, 'pgl_data_processed')
+        if osp.exists(pre_processed_file_path):
+            # TODO: Reload Preprocess files 
+            pass
+        else:
+            ### check download
+            if not osp.exists(osp.join(self.root, "raw", "edge.csv.gz")):
+                url = self.meta_info[self.name]["url"]
+                if decide_download(url):
+                    path = download_url(url, self.original_root)
+                    extract_zip(path, self.original_root)
+                    os.unlink(path)
+                    # delete folder if there exists
+                    try:
+                        shutil.rmtree(self.root)
+                    except:
+                        pass
+                    shutil.move(
+                        osp.join(self.original_root, self.download_name),
+                        self.root)
+                else:
+                    print("Stop download.")
+                    exit(-1)
+            raw_dir = osp.join(self.root, "raw")
+            ### pre-process and save
+            add_inverse_edge = to_bool(self.meta_info[self.name][
+                "add_inverse_edge"])
+            self.graph = read_csv_graph_pgl(
+                raw_dir, add_inverse_edge=add_inverse_edge)
+            ### adding prediction target
+            node_label = pd.read_csv(
+                osp.join(raw_dir, 'node-label.csv.gz'),
+                compression="gzip",
+                header=None).values
+            if "classification" in self.task_type:
+                node_label = np.array(node_label, dtype=np.int64)
+            else:
+                node_label = np.array(node_label, dtype=np.float32)
+            label_dict = {"labels": node_label}
+            # TODO: SAVE preprocess graph
+            self.labels = label_dict['labels']
+    def get_idx_split(self):
+        """Train/Validation/Test split
+        """
+        split_type = self.meta_info[self.name]["split"]
+        path = osp.join(self.root, "split", split_type)
+        train_idx = pd.read_csv(
+            osp.join(path, "train.csv.gz"), compression="gzip",
+            header=None).values.T[0]
+        valid_idx = pd.read_csv(
+            osp.join(path, "valid.csv.gz"), compression="gzip",
+            header=None).values.T[0]
+        test_idx = pd.read_csv(
+            osp.join(path, "test.csv.gz"), compression="gzip",
+            header=None).values.T[0]
+        return {
+            "train": np.array(
+                train_idx, dtype="int64"),
+            "valid": np.array(
+                valid_idx, dtype="int64"),
+            "test": np.array(
+                test_idx, dtype="int64")
+        }
+    def __getitem__(self, idx):
+        assert idx == 0, "This dataset has only one graph"
+        return self.graph[idx], self.labels
+    def __len__(self):
+        return 1
+    def __repr__(self):  # pragma: no cover
+        return '{}({})'.format(self.__class__.__name__, len(self))
+if __name__ == "__main__":
+    pgl_dataset = PglNodePropPredDataset(name="ogbn-proteins")
+    splitted_index = pgl_dataset.get_idx_split()
+    print(pgl_dataset[0])
+    print(splitted_index)
--- a/pgl/graph.py
+++ b/pgl/graph.py
@@ -15,12 +15,14 @@
    This package implement Graph structure for handling graph data.
 """
+import os
 import numpy as np
 import pickle as pkl
 import time
 import pgl.graph_kernel as graph_kernel
+from collections import defaultdict
-__all__ = ['Graph', 'SubGraph']
+__all__ = ['Graph', 'SubGraph', 'MultiGraph']
 def _hide_num_nodes(shape):
@@ -43,8 +45,8 @@ class EdgeIndex(object):
    """
    def __init__(self, u, v, num_nodes):
-        self._v, self._eid, self._degree, self._sorted_u,\
+        self._degree, self._sorted_v, self._sorted_u, \
-                self._sorted_v, self._sorted_eid = graph_kernel.build_index(u, v, num_nodes)
+             self._sorted_eid, self._indptr = graph_kernel.build_index(u, v, num_nodes)
    @property
    def degree(self):
@@ -52,23 +54,40 @@ class EdgeIndex(object):
        """
        return self._degree
-    @property
+    def view_v(self, u=None):
-    def v(self):
+        """Return the compressed v for given u.
-        """Return the compressed v.
        """
-        return self._v
+        if u is None:
+            return np.split(self._sorted_v, self._indptr[1:])
+        else:
+            u = np.array(u, dtype="int64")
+            return graph_kernel.slice_by_index(
+                self._sorted_v, self._indptr, index=u)
-    @property
+    def view_eid(self, u=None):
-    def eid(self):
+        """Return the compressed edge id for given u.
-        """Return the edge id.
        """
-        return self._eid
+        if u is None:
+            return np.split(self._sorted_eid, self._indptr[1:])
+        else:
+            u = np.array(u, dtype="int64")
+            return graph_kernel.slice_by_index(
+                self._sorted_eid, self._indptr, index=u)
    def triples(self):
        """Return the sorted (u, v, eid) tuples.
        """
        return self._sorted_u, self._sorted_v, self._sorted_eid
+    def dump(self, path):
+        if not os.path.exists(path):
+            os.makedirs(path)
+        np.save(os.path.join(path, 'degree.npy'), self._degree)
+        np.save(os.path.join(path, 'sorted_u.npy'), self._sorted_u)
+        np.save(os.path.join(path, 'sorted_v.npy'), self._sorted_v)
+        np.save(os.path.join(path, 'sorted_eid.npy'), self._sorted_eid)
+        np.save(os.path.join(path, 'indptr.npy'), self._indptr)
 class Graph(object):
    """Implementation of graph structure in pgl.
@@ -122,21 +141,51 @@ class Graph(object):
        self._edges = edges
        self._num_nodes = num_nodes
-        if len(edges) == 0:
-            raise ValueError("The Graph have no edges.")
        self._adj_src_index = None
        self._adj_dst_index = None
+        self.indegree()
+        self._num_graph = 1
+        self._graph_lod = np.array([0, self.num_nodes], dtype="int32")
+    def dump(self, path):
+        if not os.path.exists(path):
+            os.makedirs(path)
+        np.save(os.path.join(path, 'num_nodes.npy'), self._num_nodes)
+        np.save(os.path.join(path, 'edges.npy'), self._edges)
+        if self._adj_src_index:
+            self._adj_src_index.dump(os.path.join(path, 'adj_src'))
+        if self._adj_dst_index:
+            self._adj_dst_index.dump(os.path.join(path, 'adj_dst'))
+        def dump_feat(feat_path, feat):
+            """Dump all features to .npy file.
+            """
+            if len(feat) == 0:
+                return
+            if not os.path.exists(feat_path):
+                os.makedirs(feat_path)
+            for key in feat:
+                np.save(os.path.join(feat_path, key + ".npy"), feat[key])
+        dump_feat(os.path.join(path, "node_feat"), self.node_feat)
+        dump_feat(os.path.join(path, "edge_feat"), self.edge_feat)
    @property
    def adj_src_index(self):
        """Return an EdgeIndex object for src.
        """
        if self._adj_src_index is None:
+            if len(self._edges) == 0:
+                u = np.array([], dtype="int64")
+                v = np.array([], dtype="int64")
+            else:
+                u = self._edges[:, 0]
+                v = self._edges[:, 1]
            self._adj_src_index = EdgeIndex(
-                u=self._edges[:, 0],
+                u=u, v=v, num_nodes=self._num_nodes)
-                v=self._edges[:, 1],
-                num_nodes=self._num_nodes)
        return self._adj_src_index
    @property
@@ -144,10 +193,15 @@ class Graph(object):
        """Return an EdgeIndex object for dst.
        """
        if self._adj_dst_index is None:
+            if len(self._edges) == 0:
+                v = np.array([], dtype="int64")
+                u = np.array([], dtype="int64")
+            else:
+                v = self._edges[:, 0]
+                u = self._edges[:, 1]
            self._adj_dst_index = EdgeIndex(
-                u=self._edges[:, 1],
+                u=u, v=v, num_nodes=self._num_nodes)
-                v=self._edges[:, 0],
-                num_nodes=self._num_nodes)
        return self._adj_dst_index
    @property
@@ -287,17 +341,11 @@ class Graph(object):
                       []]
        """
-        if nodes is None:
-            if return_eids:
-                return self.adj_src_index.v, self.adj_src_index.eid
-            else:
-                return self.adj_src_index.v
-        else:
        if return_eids:
-                return self.adj_src_index.v[nodes], self.adj_src_index.eid[
+            return self.adj_src_index.view_v(
-                    nodes]
+                nodes), self.adj_src_index.view_eid(nodes)
        else:
-                return self.adj_src_index.v[nodes]
+            return self.adj_src_index.view_v(nodes)
    def sample_successor(self,
                         nodes,
@@ -385,17 +433,11 @@ class Graph(object):
                       [2]]
        """
-        if nodes is None:
        if return_eids:
-                return self.adj_dst_index.v, self.adj_dst_index.eid
+            return self.adj_dst_index.view_v(
+                nodes), self.adj_dst_index.view_eid(nodes)
        else:
-                return self.adj_dst_index.v
+            return self.adj_dst_index.view_v(nodes)
-        else:
-            if return_eids:
-                return self.adj_dst_index.v[nodes], self.adj_dst_index.eid[
-                    nodes]
-            else:
-                return self.adj_dst_index.v[nodes]
    def sample_predecessor(self,
                           nodes,
@@ -510,7 +552,13 @@ class Graph(object):
                (key, _hide_num_nodes(value.shape), value.dtype))
        return edge_feat_info
-    def subgraph(self, nodes, eid=None, edges=None):
+    def subgraph(self,
+                 nodes,
+                 eid=None,
+                 edges=None,
+                 edge_feats=None,
+                 with_node_feat=True,
+                 with_edge_feat=True):
        """Generate subgraph with nodes and edge ids.
        This function will generate a :code:`pgl.graph.Subgraph` object and
@@ -526,6 +574,10 @@ class Graph(object):
            edges (optional): Edge(src, dst) list which will be included in the subgraph.
+            with_node_feat: Whether to inherit node features from parent graph.
+            with_edge_feat: Whether to inherit edge features from parent graph.
        Return:
            A :code:`pgl.graph.Subgraph` object.
        """
@@ -547,12 +599,18 @@ class Graph(object):
                len(edges), dtype="int64"), edges, reindex)
        sub_edge_feat = {}
+        if edges is None:
+            if with_edge_feat:
                for key, value in self._edge_feat.items():
                    if eid is None:
-                raise ValueError("Eid can not be None with edge features.")
+                        raise ValueError(
+                            "Eid can not be None with edge features.")
                    sub_edge_feat[key] = value[eid]
+        else:
+            sub_edge_feat = edge_feats
        sub_node_feat = {}
+        if with_node_feat:
            for key, value in self._node_feat.items():
                sub_node_feat[key] = value[nodes]
@@ -730,6 +788,16 @@ class Graph(object):
            cur_nodes = nxt_nodes
        return walk
+    @property
+    def num_graph(self):
+        """ Return Number of Graphs"""
+        return self._num_graph
+    @property
+    def graph_lod(self):
+        """ Return Graph Lod Index for Paddle Computation"""
+        return self._graph_lod
 class SubGraph(Graph):
    """Implementation of SubGraph in pgl.
@@ -783,3 +851,120 @@ class SubGraph(Graph):
            A list of node ids in parent graph.
        """
        return graph_kernel.map_nodes(nodes, self._to_reindex)
+class MultiGraph(Graph):
+    """Implementation of multiple disjoint graph structure in pgl.
+    This is a simple implementation of graph structure in pgl.
+    Args:
+        graph_list :  A list of Graph Instances
+    Examples:
+        .. code-block:: python
+            batch_graph = MultiGraph([graph1, graph2, graph3])
+    """
+    def __init__(self, graph_list):
+        num_nodes = np.sum([g.num_nodes for g in graph_list])
+        node_feat = self._join_node_feature(graph_list)
+        edge_feat = self._join_edge_feature(graph_list)
+        edges = self._join_edges(graph_list)
+        super(MultiGraph, self).__init__(
+            num_nodes=num_nodes,
+            edges=edges,
+            node_feat=node_feat,
+            edge_feat=edge_feat)
+        self._num_graph = len(graph_list)
+        self._src_graph = graph_list
+        graph_lod = [g.num_nodes for g in graph_list]
+        graph_lod = np.cumsum(graph_lod, dtype="int32")
+        graph_lod = np.insert(graph_lod, 0, 0)
+        self._graph_lod = graph_lod
+    def __getitem__(self, index):
+        return self._src_graph[index]
+    def _join_node_feature(self, graph_list):
+        """join node features for multiple graph"""
+        node_feat = defaultdict(lambda: [])
+        for graph in graph_list:
+            for key in graph.node_feat:
+                node_feat[key].append(graph.node_feat[key])
+        ret_node_feat = {}
+        for key in node_feat:
+            ret_node_feat[key] = np.vstack(node_feat[key])
+        return ret_node_feat
+    def _join_edge_feature(self, graph_list):
+        """join edge features for multiple graph"""
+        edge_feat = defaultdict(lambda: [])
+        for graph in graph_list:
+            for key in graph.edge_feat:
+                efeat = graph.edge_feat[key]
+                if len(efeat) > 0:
+                    edge_feat[key].append(efeat)
+        ret_edge_feat = {}
+        for key in edge_feat:
+            ret_edge_feat[key] = np.vstack(edge_feat[key])
+        return ret_edge_feat
+    def _join_edges(self, graph_list):
+        """join edges for multiple graph"""
+        list_edges = []
+        start_offset = 0
+        for graph in graph_list:
+            edges = graph.edges
+            if len(edges) > 0:
+                edges = edges + start_offset
+                list_edges.append(edges)
+            start_offset += graph.num_nodes
+        edges = np.vstack(list_edges)
+        return edges
+class MemmapEdgeIndex(EdgeIndex):
+    def __init__(self, path):
+        self._degree = np.load(os.path.join(path, 'degree.npy'), mmap_mode="r")
+        self._sorted_u = np.load(
+            os.path.join(path, 'sorted_u.npy'), mmap_mode="r")
+        self._sorted_v = np.load(
+            os.path.join(path, 'sorted_v.npy'), mmap_mode="r")
+        self._sorted_eid = np.load(
+            os.path.join(path, 'sorted_eid.npy'), mmap_mode="r")
+        self._indptr = np.load(os.path.join(path, 'indptr.npy'), mmap_mode="r")
+class MemmapGraph(Graph):
+    def __init__(self, path):
+        self._num_nodes = np.load(os.path.join(path, 'num_nodes.npy'))
+        self._edges = np.load(os.path.join(path, 'edges.npy'), mmap_mode="r")
+        if os.path.isdir(os.path.join(path, 'adj_src')):
+            self._adj_src_index = MemmapEdgeIndex(
+                os.path.join(path, 'adj_src'))
+        else:
+            self._adj_src_index = None
+        if os.path.isdir(os.path.join(path, 'adj_dst')):
+            self._adj_dst_index = MemmapEdgeIndex(
+                os.path.join(path, 'adj_dst'))
+        else:
+            self._adj_dst_index = None
+        def load_feat(feat_path):
+            """Load features from .npy file.
+            """
+            feat = {}
+            if os.path.isdir(feat_path):
+                for feat_name in os.listdir(feat_path):
+                    feat[os.path.splitext(feat_name)[0]] = np.load(
+                        os.path.join(feat_path, feat_name), mmap_mode="r")
+            return feat
+        self._node_feat = load_feat(os.path.join(path, 'node_feat'))
+        self._edge_feat = load_feat(os.path.join(path, 'edge_feat'))
--- a/pgl/graph_kernel.pyx
+++ b/pgl/graph_kernel.pyx
@@ -53,14 +53,21 @@ def build_index(np.ndarray[np.int64_t, ndim=1] u,
            _tmp_eid[indptr[u[i]] + count[u[i]]] = i
            _tmp_u[indptr[u[i]] + count[u[i]]] = u[i]
            count[u[i]] += 1
+    return degree, _tmp_v, _tmp_u, _tmp_eid, indptr
-    cdef list output_eid = []
+@cython.boundscheck(False)
-    cdef list output_v = []
+@cython.wraparound(False)
-    for i in xrange(n_size):
+def slice_by_index(np.ndarray[np.int64_t, ndim=1] u,
-        output_eid.append(_tmp_eid[indptr[i]:indptr[i+1]])
+    np.ndarray[np.int64_t, ndim=1] indptr,
-        output_v.append(_tmp_v[indptr[i]:indptr[i+1]])
+    np.ndarray[np.int64_t, ndim=1] index):
-    return np.array(output_v), np.array(output_eid), degree, _tmp_u, _tmp_v, _tmp_eid
+    cdef list output = []
+    cdef long long i
+    cdef long long h = len(index)
+    cdef long long j
+    for i in xrange(h):
+        j = index[i] 
+        output.append(u[indptr[j]:indptr[j+1]])
+    return np.array(output)
 @cython.boundscheck(False)
 @cython.wraparound(False)
@@ -212,7 +219,11 @@ def sample_subset(list nids, long long maxdegree, shuffle=False):
            output.append(nids[inc])
        else:
            sample_size = buff_size if buff_size <= maxdegree else maxdegree
-            subset_choose_index(sample_size, nids[inc], rnd, buff_nid, offset)
+            if isinstance(nids[inc], list):
+                tmp = np.array(nids[inc], dtype=np.int64)
+            else:
+                tmp = nids[inc]
+            subset_choose_index(sample_size, tmp, rnd, buff_nid, offset)
            output.append(buff_nid[offset:offset+sample_size])
            offset += sample_size
    return output
@@ -245,7 +256,14 @@ def sample_subset_with_eid(list nids, list eids, long long maxdegree, shuffle=Fa
            output_eid.append(eids[inc])
        else:
            sample_size = buff_size if buff_size <= maxdegree else maxdegree
-            subset_choose_index_eid(sample_size, nids[inc], eids[inc], rnd, buff_nid, buff_eid, offset)
+            if isinstance(nids[inc], list):
+                tmp = np.array(nids[inc], dtype=np.int64)
+                tmp_eids = np.array(eids[inc], dtype=np.int64)
+            else:
+                tmp = nids[inc]
+                tmp_eids = eids[inc]
+            subset_choose_index_eid(sample_size, tmp, tmp_eids, rnd, buff_nid, buff_eid, offset)
            output.append(buff_nid[offset:offset+sample_size])
            output_eid.append(buff_eid[offset:offset+sample_size])
            offset += sample_size
@@ -253,22 +271,10 @@ def sample_subset_with_eid(list nids, list eids, long long maxdegree, shuffle=Fa
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5):
+def skip_gram_gen_pair(vector[long long] walk, long win_size=5):
-    """Return node paris generated by skip-gram algorithm.
-    This function will auto remove the pair which src node is the same 
-    as dst node.
-    Args:
-        walk_path: List of nodes as a walk path.
-        win_size: the windows size used in skip-gram.
-    Return:
-        A tuple of (src node list, dst node list).
-    """
    cdef vector[long long] src
    cdef vector[long long] dst
-    cdef long long l = len(walk_path)
+    cdef long long l = len(walk)
    cdef long long real_win_size, left, right, i
    cdef np.ndarray[np.int64_t, ndim=1] rnd = np.random.randint(1,  win_size+1,
                                    dtype=np.int64, size=l)
@@ -282,23 +288,15 @@ def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5):
            if right >= l:
                right = l - 1
            for j in xrange(left, right+1):
-                if walk_path[i] == walk_path[j]:
+                if walk[i] == walk[j]:
                    continue
-                src.push_back(walk_path[i])
+                src.push_back(walk[i])
-                dst.push_back(walk_path[j])
+                dst.push_back(walk[j])
    return src, dst
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def alias_sample_build_table(np.ndarray[np.float64_t, ndim=1] probs):
-    """Return the alias table and event table for alias sampling.
-    Args:
-        porobs: A list of float numbers as the probability.
-    Return:
-        A tuple of (alias table, event table).
-    """
    cdef long long l = len(probs)
    cdef np.ndarray[np.float64_t, ndim=1] alias = probs * l
    cdef np.ndarray[np.int64_t, ndim=1] events = np.zeros(l, dtype=np.int64)

--- a/pgl/graph_wrapper.py
+++ b/pgl/graph_wrapper.py
--- a/pgl/contrib/heter_graph.py
+++ b/pgl/contrib/heter_graph.py
--- a/pgl/contrib/heter_graph_wrapper.py
+++ b/pgl/contrib/heter_graph_wrapper.py
@@ -64,8 +64,8 @@ class HeterGraphWrapper(object):
            import paddle.fluid as fluid
            import numpy as np
-            from pgl.contrib import heter_graph
+            from pgl import heter_graph
-            from pgl.contrib import heter_graph_wrapper
+            from pgl import heter_graph_wrapper
            num_nodes = 4
            node_types = [(0, 'user'), (1, 'item'), (2, 'item'), (3, 'user')]
            edges = {

--- a/pgl/layers/__init__.py
+++ b/pgl/layers/__init__.py
@@ -18,7 +18,10 @@ from pgl.layers import conv
 from pgl.layers.conv import *
 from pgl.layers import set2set
 from pgl.layers.set2set import *
+from pgl.layers import graph_pool
+from pgl.layers.graph_pool import *
 __all__ = []
 __all__ += conv.__all__
 __all__ += set2set.__all__
+__all__ += graph_pool.__all__
--- a/pgl/layers/graph_pool.py
+++ b/pgl/layers/graph_pool.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This package implements common layers to help building
+graph neural networks.
+"""
+import paddle.fluid as fluid
+from pgl import graph_wrapper
+from pgl.utils import paddle_helper
+from pgl.utils import op
+__all__ = ['graph_pooling']
+def graph_pooling(gw, node_feat, pool_type):
+    """Implementation of graph pooling 
+    This is an implementation of graph pooling
+    Args:
+        gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`)
+        node_feat: A tensor with shape (num_nodes, feature_size).
+        pool_type: The type of pooling ("sum", "average" , "min")
+    Return:
+        A tensor with shape (num_graph, hidden_size)
+    """
+    graph_feat = op.nested_lod_reset(node_feat, gw.graph_lod)
+    graph_feat = fluid.layers.sequence_pool(graph_feat, pool_type)
+    return graph_feat
--- a/pgl/redis_hetergraph.py
+++ b/pgl/redis_hetergraph.py
--- a/pgl/sample.py
+++ b/pgl/sample.py
--- a/pgl/tests/test_hetergraph.py
+++ b/pgl/tests/test_hetergraph.py
--- a/pgl/tests/test_metapath_randomwalk.py
+++ b/pgl/tests/test_metapath_randomwalk.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""test_metapath_randomwalk"""
+import time
+import unittest
+import json
+import os
+import numpy as np
+from pgl.sample import metapath_randomwalk
+from pgl.graph import Graph
+from pgl import heter_graph
+np.random.seed(1)
+class MetapathRandomwalkTest(unittest.TestCase):
+    """metapath_randomwalk test
+    """
+    def setUp(self):
+        edges = {}
+        # for test no successor
+        edges['c2p'] = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6),
+                        (3, 7), (3, 4), (3, 8)]
+        edges['p2c'] = [(v, u) for u, v in edges['c2p']]
+        edges['p2a'] = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12),
+                        (6, 11), (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)]
+        edges['a2p'] = [(v, u) for u, v in edges['p2a']]
+        # for test speed
+        #  edges['c2p'] = [(0, 4), (0, 5), (1, 9), (1,8), (2,8), (2,5), (3,6), (3,7), (3,4), (3,8)]
+        #  edges['p2c'] = [(v,u) for u, v in edges['c2p']]
+        #  edges['p2a'] = [(4,10), (4,11), (4,12), (4,14), (5,13), (6,13), (6,11), (6,14), (7,12), (7,11), (8,14), (9,13)]
+        #  edges['a2p'] = [(v,u) for u, v in edges['p2a']]
+        self.node_types = ['c' for _ in range(4)] + [
+            'p' for _ in range(6)
+        ] + ['a' for _ in range(5)]
+        node_types = [(i, t) for i, t in enumerate(self.node_types)]
+        self.graph = heter_graph.HeterGraph(
+            num_nodes=len(node_types), edges=edges, node_types=node_types)
+    def test_metapath_randomwalk(self):
+        meta_path = 'c2p-p2a-a2p-p2c'
+        path = ['c', 'p', 'a', 'p', 'c']
+        start_nodes = [0, 1, 2, 3]
+        walk_len = 10
+        walks = metapath_randomwalk(
+            graph=self.graph,
+            start_nodes=start_nodes,
+            metapath=meta_path,
+            walk_length=walk_len)
+        self.assertEqual(len(walks), 4)
+        for walk in walks:
+            for i in range(len(walk)):
+                idx = i % (len(path) - 1)
+                self.assertEqual(self.node_types[walk[i]], path[idx])
+if __name__ == "__main__":
+    unittest.main()
--- a/pgl/utils/mp_reader.py
+++ b/pgl/utils/mp_reader.py