link_predict_train.py 3.1 KB
Newer Older
W
weiyue.su 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import argparse
import traceback

import yaml
import numpy as np
from easydict import EasyDict as edict
from pgl.utils.logger import log
from pgl.utils import paddle_helper

from learner import Learner
S
suweiyue 已提交
25
from models.model import LinkPredictModel
W
weiyue.su 已提交
26 27 28 29
from dataset.graph_reader import GraphGenerator 


class TrainData(object):
S
suweiyue 已提交
30
    def __init__(self, graph_work_path):
W
weiyue.su 已提交
31 32 33 34
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
        trainer_count = int(os.getenv("PADDLE_TRAINERS_NUM", "1"))
        log.info("trainer_id: %s, trainer_count: %s." % (trainer_id, trainer_count))

35
        edges = np.load(os.path.join(graph_work_path, "train_data.npy"), allow_pickle=True)
W
weiyue.su 已提交
36 37 38 39 40 41 42
        # edges is bidirectional.
        train_usr = edges[trainer_id::trainer_count, 0]
        train_ad = edges[trainer_id::trainer_count, 1]
        returns = {
            "train_data": [train_usr, train_ad]
        }

S
suweiyue 已提交
43 44
        if os.path.exists(os.path.join(graph_work_path, "neg_samples.npy")):
            neg_samples = np.load(os.path.join(graph_work_path, "neg_samples.npy"), allow_pickle=True)
W
weiyue.su 已提交
45 46 47 48 49 50 51
            if neg_samples.size != 0:
                train_negs = neg_samples[trainer_id::trainer_count]
                returns["train_data"].append(train_negs)
        log.info("Load train_data done.")
        self.data = returns

    def __getitem__(self, index):
S
suweiyue 已提交
52
        return [data[index] for data in self.data["train_data"]]
W
weiyue.su 已提交
53 54 55 56 57 58 59

    def __len__(self):
        return len(self.data["train_data"][0])


def main(config):
    # Select Model
S
suweiyue 已提交
60
    model = LinkPredictModel(config)
W
weiyue.su 已提交
61 62

    # Build Train Edges
S
suweiyue 已提交
63
    data = TrainData(config.graph_work_path)
W
weiyue.su 已提交
64 65 66 67 68 69 70 71 72 73 74

    # Build Train Data
    train_iter = GraphGenerator(
        graph_wrappers=model.graph_wrappers,
        batch_size=config.batch_size,
        data=data,
        samples=config.samples,
        num_workers=config.sample_workers,
        feed_name_list=[var.name for var in model.feed_list],
        use_pyreader=config.use_pyreader,
        phase="train",
S
suweiyue 已提交
75
        graph_data_path=config.graph_work_path,
76 77
        shuffle=True,
        neg_type=config.neg_type)
W
weiyue.su 已提交
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94

    log.info("build graph reader done.")

    learner = Learner.factory(config.learner_type)
    learner.build(model, train_iter, config)

    learner.start()
    learner.stop()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='main')
    parser.add_argument("--conf", type=str, default="./config.yaml")
    args = parser.parse_args()
    config = edict(yaml.load(open(args.conf), Loader=yaml.FullLoader))
    print(config)
    main(config)