node_classification_train.py 2.9 KB
Newer Older
W
weiyue.su 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import argparse
import traceback

import yaml
import numpy as np
from easydict import EasyDict as edict
from pgl.utils.logger import log
from pgl.utils import paddle_helper

from learner import Learner
S
suweiyue 已提交
25
from models.model import LinkPredictModel
26 27
from models.model import NodeClassificationModel
from dataset.graph_reader import NodeClassificationGenerator 
W
weiyue.su 已提交
28 29 30


class TrainData(object):
S
suweiyue 已提交
31
    def __init__(self, graph_work_path):
W
weiyue.su 已提交
32 33 34 35
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
        trainer_count = int(os.getenv("PADDLE_TRAINERS_NUM", "1"))
        log.info("trainer_id: %s, trainer_count: %s." % (trainer_id, trainer_count))

36
        edges = np.load(os.path.join(graph_work_path, "train_data.npy"), allow_pickle=True)
W
weiyue.su 已提交
37
        # edges is bidirectional.
38 39
        train_node = edges[trainer_id::trainer_count, 0]
        train_label = edges[trainer_id::trainer_count, 1]
W
weiyue.su 已提交
40
        returns = {
41
            "train_data": [train_node, train_label]
W
weiyue.su 已提交
42 43 44 45 46 47
        }

        log.info("Load train_data done.")
        self.data = returns

    def __getitem__(self, index):
S
suweiyue 已提交
48
        return [data[index] for data in self.data["train_data"]]
W
weiyue.su 已提交
49 50 51 52 53 54 55

    def __len__(self):
        return len(self.data["train_data"][0])


def main(config):
    # Select Model
56
    model = NodeClassificationModel(config)
W
weiyue.su 已提交
57 58

    # Build Train Edges
S
suweiyue 已提交
59
    data = TrainData(config.graph_work_path)
W
weiyue.su 已提交
60 61

    # Build Train Data
62
    train_iter = NodeClassificationGenerator(
W
weiyue.su 已提交
63 64 65 66 67 68 69 70
        graph_wrappers=model.graph_wrappers,
        batch_size=config.batch_size,
        data=data,
        samples=config.samples,
        num_workers=config.sample_workers,
        feed_name_list=[var.name for var in model.feed_list],
        use_pyreader=config.use_pyreader,
        phase="train",
S
suweiyue 已提交
71
        graph_data_path=config.graph_work_path,
72 73
        shuffle=True,
        neg_type=config.neg_type)
W
weiyue.su 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

    log.info("build graph reader done.")

    learner = Learner.factory(config.learner_type)
    learner.build(model, train_iter, config)

    learner.start()
    learner.stop()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='main')
    parser.add_argument("--conf", type=str, default="./config.yaml")
    args = parser.parse_args()
    config = edict(yaml.load(open(args.conf), Loader=yaml.FullLoader))
    print(config)
    main(config)