scheduler.cpp 4.2 KB
Newer Older
W
willzhang4a58 已提交
1 2 3
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "oneflow/core/common/str_util.h"
W
willzhang4a58 已提交
4 5
#include "oneflow/core/control/ctrl_client.h"
#include "oneflow/core/control/ctrl_server.h"
W
willzhang4a58 已提交
6
#include "oneflow/core/job/job_desc.h"
W
willzhang4a58 已提交
7
#include "oneflow/core/job/plan.pb.h"
W
willzhang4a58 已提交
8
#include "oneflow/core/job/runtime_context.h"
W
willzhang4a58 已提交
9
#include "oneflow/core/persistence/file_system.h"
W
willzhang4a58 已提交
10 11 12

namespace oneflow {

W
willzhang4a58 已提交
13 14 15 16 17 18 19 20
class Scheduler final {
 public:
  OF_DISALLOW_COPY_AND_MOVE(Scheduler);
  ~Scheduler() = default;

  OF_SINGLETON(Scheduler);

  void Process(const std::string& job_conf_filepath,
W
willzhang4a58 已提交
21
               const std::string& this_machine_name, char** env);
W
willzhang4a58 已提交
22 23

 private:
W
willzhang4a58 已提交
24
  Scheduler() = default;
W
willzhang4a58 已提交
25
  void NewAllSingleton(const std::string& job_conf_filepath,
W
willzhang4a58 已提交
26
                       const std::string& this_machine_name, char** env);
W
willzhang4a58 已提交
27
  void DeleteAllSingleton();
W
willzhang4a58 已提交
28
  void SystemCall(const std::string& cmd);
W
willzhang4a58 已提交
29

W
willzhang4a58 已提交
30
  std::unique_ptr<CtrlServer> ctrl_server_;
W
willzhang4a58 已提交
31
  std::string env_prefix_;
W
willzhang4a58 已提交
32 33 34
};

void Scheduler::Process(const std::string& job_conf_filepath,
W
willzhang4a58 已提交
35 36
                        const std::string& this_machine_name, char** env) {
  NewAllSingleton(job_conf_filepath, this_machine_name, env);
W
willzhang4a58 已提交
37 38 39
  auto plan = of_make_unique<Plan>();
  std::string naive_plan_filepath = JoinPath(LogDir(), "naive_plan");
  // Compile
W
willzhang4a58 已提交
40
  if (RuntimeCtx::Singleton()->IsThisMachineMaster()) {
W
willzhang4a58 已提交
41
    std::stringstream compile_cmd;
C
run  
chengtbf 已提交
42
#ifdef PLATFORM_WINDOWS
J
jiyuan 已提交
43
    compile_cmd << "compiler.exe "
C
chengtbf 已提交
44 45
                << "-job_conf_filepath=\"" << job_conf_filepath << "\" "
                << "-plan_filepath=\"" << naive_plan_filepath << "\"";
C
run  
chengtbf 已提交
46
#else
W
willzhang4a58 已提交
47 48 49
    compile_cmd << "./compiler "
                << "-job_conf_filepath=" << job_conf_filepath << " "
                << "-plan_filepath=" << naive_plan_filepath;
C
chengtbf 已提交
50
#endif  // PLATFORM_WINDOWS
W
willzhang4a58 已提交
51 52
    SystemCall(compile_cmd.str());
    ParseProtoFromTextFile(naive_plan_filepath, plan.get());
W
willzhang4a58 已提交
53
    CtrlClient::Singleton()->PushPlan(*plan);
W
willzhang4a58 已提交
54
  } else {
W
willzhang4a58 已提交
55
    CtrlClient::Singleton()->PullPlan(plan.get());
W
willzhang4a58 已提交
56 57 58
  }
  OF_BARRIER();
  if (RuntimeCtx::Singleton()->IsThisMachineMaster()) {
W
willzhang4a58 已提交
59
    CtrlClient::Singleton()->ClearPlan();
W
willzhang4a58 已提交
60 61
  } else {
    PrintProtoToTextFile(*plan, naive_plan_filepath);
W
willzhang4a58 已提交
62
  }
J
jiyuan 已提交
63

W
willzhang4a58 已提交
64 65
  // Runtime
  std::stringstream runtime_cmd;
C
run  
chengtbf 已提交
66
#ifdef PLATFORM_WINDOWS
J
jiyuan 已提交
67
  runtime_cmd << "runtime.exe "
C
chengtbf 已提交
68 69
              << "-plan_filepath=\"" << naive_plan_filepath << "\" "
              << "-this_machine_name=\"" << this_machine_name << "\"";
C
run  
chengtbf 已提交
70
#else
W
willzhang4a58 已提交
71 72
  runtime_cmd << "./runtime "
              << "-plan_filepath=" << naive_plan_filepath << " "
W
willzhang4a58 已提交
73
              << "-this_machine_name=" << this_machine_name;
C
chengtbf 已提交
74
#endif  // PLATFORM_WINDOWS
W
willzhang4a58 已提交
75
  SystemCall(runtime_cmd.str());
W
willzhang4a58 已提交
76
  DeleteAllSingleton();
W
willzhang4a58 已提交
77
}
W
willzhang4a58 已提交
78 79

void Scheduler::NewAllSingleton(const std::string& job_conf_filepath,
W
willzhang4a58 已提交
80 81
                                const std::string& this_machine_name,
                                char** env) {
W
willzhang4a58 已提交
82 83 84 85 86
  oneflow::JobConf job_conf;
  oneflow::ParseProtoFromTextFile(job_conf_filepath, &job_conf);
  JobDesc::NewSingleton(job_conf);
  IDMgr::NewSingleton();
  RuntimeCtx::NewSingleton(this_machine_name);
W
willzhang4a58 已提交
87 88 89
  ctrl_server_.reset(
      new CtrlServer(RuntimeCtx::Singleton()->GetThisCtrlAddr()));
  CtrlClient::NewSingleton();
W
willzhang4a58 已提交
90
  env_prefix_ = "";
W
willzhang4a58 已提交
91
  std::stringstream ss;
W
willzhang4a58 已提交
92 93 94 95 96
  while (*env) {
    LOG(INFO) << *env;
    ss << (*env++) << " ";
  }
  env_prefix_ = ss.str();
W
willzhang4a58 已提交
97 98
}

W
willzhang4a58 已提交
99 100 101 102 103 104 105 106
void Scheduler::DeleteAllSingleton() {
  CtrlClient::DeleteSingleton();
  ctrl_server_.reset();
  RuntimeCtx::DeleteSingleton();
  IDMgr::DeleteSingleton();
  JobDesc::DeleteSingleton();
}

W
willzhang4a58 已提交
107
void Scheduler::SystemCall(const std::string& cmd) {
W
willzhang4a58 已提交
108
  LOG(INFO) << "SystemCall: [" << cmd << "]";
W
willzhang4a58 已提交
109
  CHECK_EQ(std::system(cmd.c_str()), 0);
W
willzhang4a58 已提交
110 111
}

W
willzhang4a58 已提交
112 113
}  // namespace oneflow

W
willzhang4a58 已提交
114
DEFINE_string(job_conf_filepath, "", "");
W
willzhang4a58 已提交
115 116
DEFINE_string(this_machine_name, "", "");

W
willzhang4a58 已提交
117
int main(int argc, char** argv, char** env) {
W
willzhang4a58 已提交
118 119
  google::InitGoogleLogging(argv[0]);
  google::ParseCommandLineFlags(&argc, &argv, true);
W
willzhang4a58 已提交
120
  oneflow::LocalFS()->CreateDirIfNotExist(oneflow::LogDir());
W
willzhang4a58 已提交
121
  oneflow::RedirectStdoutAndStderrToGlogDir();
W
willzhang4a58 已提交
122 123 124
  LOG(INFO) << "Scheduler Start";
  oneflow::Scheduler::NewSingleton();
  oneflow::Scheduler::Singleton()->Process(FLAGS_job_conf_filepath,
W
willzhang4a58 已提交
125
                                           FLAGS_this_machine_name, env);
W
willzhang4a58 已提交
126
  oneflow::Scheduler::DeleteSingleton();
W
willzhang4a58 已提交
127
  oneflow::CloseStdoutAndStderr();
W
willzhang4a58 已提交
128
  LOG(INFO) << "Scheduler Stop";
W
willzhang4a58 已提交
129 130
  return 0;
}