main.cc 2.8 KB
Newer Older
X
xiexionghang 已提交
1 2 3 4
#include <time.h>
#include <fstream>
#include <yaml-cpp/yaml.h>
#include "paddle/fluid/train/custom_trainer/feed/trainer_context.h"
X
xiexionghang 已提交
5
#include "paddle/fluid/platform/init.h"
X
xiexionghang 已提交
6 7
#include "paddle/fluid/train/custom_trainer/feed/process/process.h"
#include "paddle/fluid/train/custom_trainer/feed/process/init_env_process.h"
R
rensilin 已提交
8
#include "paddle/fluid/framework/op_registry.h"
R
rensilin 已提交
9
#include "paddle/fluid/pybind/pybind.h"
X
xiexionghang 已提交
10 11 12 13 14 15 16 17 18 19 20 21 22 23

using namespace paddle::custom_trainer::feed;

DEFINE_string(feed_trainer_conf_path, "./conf/trainer.yaml", "path of trainer conf");

int main(int argc, char* argv[]) {
    //gflags
    google::ParseCommandLineFlags(&argc, &argv, true);
    std::string gflag_conf = "./conf/gflags.conf";
    google::SetCommandLineOption("flagfile", gflag_conf.c_str()); 

    //load trainer config
    auto trainer_context_ptr = std::make_shared<TrainerContext>();
    trainer_context_ptr->trainer_config = YAML::LoadFile(FLAGS_feed_trainer_conf_path);    
X
xiexionghang 已提交
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45

    //environment
    auto& config = trainer_context_ptr->trainer_config;
    std::string env_class = config["environment"]["environment_class"].as<std::string>();
    trainer_context_ptr->environment.reset(CREATE_INSTANCE(RuntimeEnvironment, env_class));
    if (trainer_context_ptr->environment->initialize(config["environment"]) != 0) {
        return -1;
    }
    EnvironmentRole role;
    auto* environment = trainer_context_ptr->environment.get();
    environment->wireup();
    if (environment->rank_id(EnvironmentRole::ALL) % 2 == 0) {
        role = EnvironmentRole::WORKER;
    } else {
        role = EnvironmentRole::PSERVER;
    } 
    environment->set_role(role);
    trainer_context_ptr->pslib.reset(new PSlib());
    std::string ps_config = config["environment"]["ps"].as<std::string>();
    trainer_context_ptr->pslib->initialize(ps_config, environment, role);
    //VLOG(3) << "Node Start With Role:" << role;    
     
X
xiexionghang 已提交
46
    std::vector<std::string> process_name_list = {
X
xiexionghang 已提交
47 48
        "InitEnvProcess",
        "LearnerProcess"
X
xiexionghang 已提交
49
    };
X
xiexionghang 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
    switch (role) {
    case EnvironmentRole::WORKER:
        for (const auto& process_name : process_name_list) {
            Process* process = CREATE_INSTANCE(Process, process_name);
            if (process == NULL) {
                VLOG(1) << "Process:" << process_name << " does not exist"; 
                return -1;
            }
            if (process->initialize(trainer_context_ptr) != 0) {
                VLOG(1) << "Process:" << process_name << " initialize failed"; 
                return -1;
            }
            trainer_context_ptr->process_list.push_back(std::shared_ptr<Process>(process));
        } 
        for (auto& process : trainer_context_ptr->process_list) {
            process->run();
X
xiexionghang 已提交
66
        }
X
xiexionghang 已提交
67 68 69 70 71
        break;
    case EnvironmentRole::PSERVER:
        //wait server done
        while (true) {
            sleep(10000);
X
xiexionghang 已提交
72
        }
X
xiexionghang 已提交
73
        break;
X
xiexionghang 已提交
74 75 76 77
    }

    return 0;
}