async_executor.cc 18.5 KB
Newer Older
W
wangguibao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/async_executor.h"
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
W
wangguibao 已提交
19 20
#include <sys/types.h>
#include <sys/stat.h>
W
wangguibao 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <map>
#include <algorithm>
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "google/protobuf/io/zero_copy_stream_impl.h"

#include "gflags/gflags.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/platform/place.h"
W
wangguibao 已提交
38
#include "paddle/fluid/inference/io.h"
W
wangguibao 已提交
39 40 41 42
#include "paddle/fluid/pybind/pybind.h"

namespace paddle {
namespace framework {
W
wangguibao 已提交
43 44 45 46 47 48 49
std::mutex ExecutorThreadWorker::s_locker_for_pick_file_;
unsigned int ExecutorThreadWorker::s_current_file_idx_ = 0;
size_t ExecutorThreadWorker::s_current_finished_file_cnt_ = 0;
unsigned int ExecutorThreadWorker::s_current_epoch_ = 0;
int ExecutorThreadWorker::s_current_save_epoch_ = 0;
bool ExecutorThreadWorker::s_is_first_worker_ = false;
std::vector<std::string> ExecutorThreadWorker::s_thread_filelist_;
W
wangguibao 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147

void CreateTensor(Variable* var, proto::VarType::Type var_type) {
  if (var_type == proto::VarType::LOD_TENSOR) {
    var->GetMutable<LoDTensor>();
  } else if (var_type == proto::VarType::SELECTED_ROWS) {
    var->GetMutable<SelectedRows>();
  } else if (var_type == proto::VarType::FEED_MINIBATCH) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::FETCH_LIST) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::STEP_SCOPES) {
    var->GetMutable<std::vector<Scope>>();
  } else if (var_type == proto::VarType::LOD_RANK_TABLE) {
    var->GetMutable<LoDRankTable>();
  } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
    var->GetMutable<LoDTensorArray>();
  } else if (var_type == proto::VarType::PLACE_LIST) {
    var->GetMutable<platform::PlaceList>();
  } else if (var_type == proto::VarType::READER) {
    var->GetMutable<ReaderHolder>();
  } else if (var_type == proto::VarType::RAW) {
    // GetMutable will be called in operator
  } else {
    PADDLE_THROW(
        "Variable type %d is not in "
        "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
        "LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
        var_type);
  }
}

static void read_binary_file(const std::string& filename,
                             std::string* content) {
  std::string &contents = *content;
  std::ifstream fin(filename, std::ios::in | std::ios::binary);
  if (!fin.good()) {
    LOG(ERROR) << "Cannot open file " << filename.c_str();
  }
  fin.seekg(0, std::ios::end);
  contents.clear();
  contents.resize(fin.tellg());
  fin.seekg(0, std::ios::beg);
  fin.read(&contents[0], contents.size());
  fin.close();
}

static void save_model(
    const std::unique_ptr<ProgramDesc> & main_program,
    Scope* scope,
    const std::vector<std::string> & param_names,
    const std::string & model_name,
    bool save_combine) {
  auto place = platform::CPUPlace();
  const BlockDesc& global_block = main_program->Block(0);
  std::vector<std::string> paralist;

  for (auto* var : global_block.AllVars()) {
    bool is_model_param = false;
    for (auto param_name : param_names) {
      if (var->Name() == param_name) {
        is_model_param = true;
        break;
      }
    }

    if (!is_model_param)  continue;

    if (!save_combine) {
      LOG(ERROR) << "model var name: " << var->Name().c_str();

      paddle::framework::AttributeMap attrs;
      attrs.insert({"file_path", model_name + "/" + var->Name()});
      auto save_op = paddle::framework::OpRegistry::CreateOp(
                                                      "save",
                                                      {{"X", {var->Name()}}},
                                                      {},
                                                      attrs);

      save_op->Run(*scope, place);
    } else {
      paralist.push_back(var->Name());
    }
  }

  if (save_combine) {
    std::sort(paralist.begin(), paralist.end());
    paddle::framework::AttributeMap attrs;
    attrs.insert({"file_path", model_name});
    auto save_op = paddle::framework::OpRegistry::CreateOp(
                                                      "save_combine",
                                                      {{"X", paralist}},
                                                      {},
                                                      attrs);
    save_op->Run(*scope, place);
  }
}   // end save_model


W
wangguibao 已提交
148 149
void ExecutorThreadWorker::AddTrainFile(const std::string& file) {
  s_thread_filelist_.push_back(file);
W
wangguibao 已提交
150 151
}

W
wangguibao 已提交
152
void ExecutorThreadWorker::CreateThreadOperators(const ProgramDesc& program) {
W
wangguibao 已提交
153
  auto& block = program.Block(0);
W
wangguibao 已提交
154
  op_names_.clear();
W
wangguibao 已提交
155 156
  for (auto& op_desc : block.AllOps()) {
    std::unique_ptr<OperatorBase> local_op = OpRegistry::CreateOp(*op_desc);
W
wangguibao 已提交
157
    op_names_.push_back(op_desc->Type());
W
wangguibao 已提交
158
    OperatorBase* local_op_ptr = local_op.release();
W
wangguibao 已提交
159
    ops_.push_back(local_op_ptr);
W
wangguibao 已提交
160 161 162 163
    continue;
  }
}

W
wangguibao 已提交
164
void ExecutorThreadWorker::CreateThreadScope(const ProgramDesc& program) {
W
wangguibao 已提交
165
  auto& block = program.Block(0);
W
wangguibao 已提交
166
  thread_scope_ = &root_scope_->NewScope();
W
wangguibao 已提交
167 168
  for (auto& var : block.AllVars()) {
    if (var->Persistable()) {
W
wangguibao 已提交
169
      auto* ptr = root_scope_->Var(var->Name());
W
wangguibao 已提交
170 171 172 173
      CreateTensor(ptr, var->GetType());
      // LOGERR("create Persistable var[%s] finished",
      //      var->Name().c_str());
    } else {
W
wangguibao 已提交
174
      auto* ptr = thread_scope_->Var(var->Name());
W
wangguibao 已提交
175 176 177 178 179 180 181
      CreateTensor(ptr, var->GetType());
      // LOGERR("create unpersistable var[%s] finished",
      //      var->Name().c_str());
    }
  }
}

W
wangguibao 已提交
182 183
void ExecutorThreadWorker::SetDataFeed(const std::shared_ptr<DataFeed>& datafeed) {
  local_reader_ = datafeed;
W
wangguibao 已提交
184 185
}

W
wangguibao 已提交
186 187
void ExecutorThreadWorker::BindingDataFeedMemory() {
  const std::vector<std::string>& input_feed = local_reader_->GetUseSlotAlias();
W
wangguibao 已提交
188
  for (auto name : input_feed) {
W
wangguibao 已提交
189
    local_reader_->AddFeedVar(thread_scope_->Var(name), name);
W
wangguibao 已提交
190 191 192
  }
}

W
wangguibao 已提交
193
void ExecutorThreadWorker::SetInspectVarName(
W
wangguibao 已提交
194
    const std::string& inspect_var_name) {
W
wangguibao 已提交
195
  inspect_var_name_ = inspect_var_name;
W
wangguibao 已提交
196 197
}

W
wangguibao 已提交
198
void ExecutorThreadWorker::SetModelParamNames(
W
wangguibao 已提交
199
    const std::vector<std::string>& param_names) {
W
wangguibao 已提交
200
  model_param_names_ = param_names;
W
wangguibao 已提交
201 202
}

W
wangguibao 已提交
203
void ExecutorThreadWorker::SetSparseCommData(
W
wangguibao 已提交
204
    const std::map<std::string, int>& param_names) {
W
wangguibao 已提交
205
  sparse_comm_data_ = param_names;
W
wangguibao 已提交
206 207
}

W
wangguibao 已提交
208
void ExecutorThreadWorker::SetDevice() {
W
wangguibao 已提交
209 210 211 212 213 214 215 216 217 218 219
  static unsigned priority[] = {
    0, 1, 2, 3, 4, 5,
    6, 7, 8, 9, 10, 11,
    12, 13, 14, 15, 16, 17,
    18, 19, 20, 21, 22, 23,
    24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35,
    36, 37, 38, 39, 40, 41,
    42, 43, 44, 45, 46, 47
  };

W
wangguibao 已提交
220
  unsigned int i = this->thread_id_;
W
wangguibao 已提交
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

  if (i < sizeof(priority) / sizeof(unsigned)) {
    unsigned proc = priority[i];

    cpu_set_t mask;
    CPU_ZERO(&mask);
    CPU_SET(proc, &mask);

    if (-1 == sched_setaffinity(0, sizeof(mask), &mask)) {
      LOG(ERROR) << "WARNING: Failed to set thread affinity for thread " << i;
    } else {
      CPU_ZERO(&mask);
      if ((0 == sched_getaffinity(0, sizeof(mask), &mask))
          && CPU_ISSET(proc, &mask)) {
        LOG(ERROR) << "TRACE: Thread " << i << " is running on processor " << proc << "...";
      }
    }
  }
}

W
wangguibao 已提交
241 242
void ExecutorThreadWorker::UpdateEpochNum() {
  s_current_finished_file_cnt_++;
W
wangguibao 已提交
243

W
wangguibao 已提交
244 245 246
  if (s_current_finished_file_cnt_ >= s_thread_filelist_.size()) {
    s_current_finished_file_cnt_ = 0;
    s_current_epoch_++;
W
wangguibao 已提交
247 248 249
  }
}

W
wangguibao 已提交
250
const char* ExecutorThreadWorker::PickOneFile() {
W
wangguibao 已提交
251
  std::string file_to_be_preocessed;
W
wangguibao 已提交
252 253 254 255 256 257 258 259 260
  std::lock_guard<std::mutex> lock(s_locker_for_pick_file_);

  if (s_current_file_idx_ >= s_thread_filelist_.size()) {
    std::random_shuffle(s_thread_filelist_.begin(),
    s_thread_filelist_.end());
    s_current_file_idx_ = 0;
    // s_current_epoch_++; //example: when one file, one thread, it's bug
    LOG(ERROR) << "thread " << thread_id_
               << ": finish traing for epoch " << s_current_epoch_ + 1;
W
wangguibao 已提交
261
  }
W
wangguibao 已提交
262
  file_to_be_preocessed = s_thread_filelist_[s_current_file_idx_];
W
wangguibao 已提交
263

W
wangguibao 已提交
264
  s_current_file_idx_++;
W
wangguibao 已提交
265 266 267
  return file_to_be_preocessed.c_str();
}

W
wangguibao 已提交
268
void ExecutorThreadWorker::Train() {
W
wangguibao 已提交
269
  LOG(ERROR) << "begin to train";
W
wangguibao 已提交
270
  SetDevice();
W
wangguibao 已提交
271 272 273 274
#ifdef LOCAL_PROF
  std::vector<double> op_total_time;
  std::vector<std::string> op_name;
  // int total_batch = 0;
W
wangguibao 已提交
275
  for (auto& op : ops_) {
W
wangguibao 已提交
276 277
    op_name.push_back(op->Type());
  }
W
wangguibao 已提交
278
  op_total_time.resize(ops_.size());
W
wangguibao 已提交
279 280 281 282 283
  for (int i = 0; i < op_total_time.size(); ++i) {
    op_total_time[i] = 0.0;
  }
#endif
  std::string inspect_key = "inspect";
W
wangguibao 已提交
284 285 286
  if (!inspect_var_name_.empty()) {
    inspect_key = inspect_var_name_.substr(0,
                                          inspect_var_name_.find_first_of('_'));
W
wangguibao 已提交
287 288
  }

W
wangguibao 已提交
289
  for (unsigned i = 0; i < max_epoch_; ++i) {
W
wangguibao 已提交
290 291 292 293 294 295 296 297
    LOG(ERROR) << "epoch: " << i;
#ifdef LOCAL_PROF
    Timer timeline;
    double total_time = 0.0;
    double read_time = 0.0;
#endif
    float total_inspect = 0;
    int batch_num = 1;
W
wangguibao 已提交
298 299 300
    while (i == s_current_epoch_) {
      const char* filename = PickOneFile();
      local_reader_->SetFile(filename);
W
wangguibao 已提交
301 302 303 304
      while (true) {
#ifdef LOCAL_PROF
        timeline.start();
#endif
W
wangguibao 已提交
305
        bool flag = local_reader_->ReadBatch();
W
wangguibao 已提交
306 307 308 309 310 311 312 313 314 315 316 317
        if (!flag) {
          break;
        }
#ifdef LOCAL_PROF
        timeline.pause();
        read_time += timeline.elapsed_sec();
        total_time += timeline.elapsed_sec();
#endif
        if (!flag) {
          break;
        }

W
wangguibao 已提交
318
        for (unsigned int i = 0; i < ops_.size(); ++i) {
W
wangguibao 已提交
319 320 321
#ifdef LOCAL_PROF
          timeline.start();
#endif
W
wangguibao 已提交
322
          ops_[i]->Run(*thread_scope_, place_);
W
wangguibao 已提交
323 324 325 326 327 328 329 330
#ifdef LOCAL_PROF
          timeline.pause();
          op_total_time[i] += timeline.elapsed_sec();
          total_time += timeline.elapsed_sec();
#endif
        }
        batch_num++;
        float avg_inspect = 0.0;
W
wangguibao 已提交
331 332
        if (!inspect_var_name_.empty()) {
          avg_inspect = thread_scope_->FindVar(inspect_var_name_)
W
wangguibao 已提交
333 334 335 336
                                     ->GetMutable<LoDTensor>()
                                     ->data<float>()[0];
        }
        total_inspect += avg_inspect;
W
wangguibao 已提交
337
        thread_scope_->DropKids();
W
wangguibao 已提交
338
      }
W
wangguibao 已提交
339
      UpdateEpochNum();
W
wangguibao 已提交
340
      LOG(ERROR) << "memory used after epoch " << i + 1
W
wangguibao 已提交
341
                 << " called: " << memory::memory_usage(place_);
W
wangguibao 已提交
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359

#ifdef LOCAL_PROF
      for (int i = 0; i < op_total_time.size(); ++i) {
        std::cerr << "op_name:[" << i << "][" << op_name[i] << "]"
                  << " op_mean_time:[" << op_total_time[i] << "s]"
                  << std::endl;
      }
      std::cerr << "read time: " << read_time << "s" << std::endl;
#endif
    }
#ifdef LOCAL_PROF
    LOG(ERROR) << "mean " << inspect_key.c_str()
               << " of epoch " << i + 1 << ": " << total_inspect / batch_num
               << ", total_time: " << total_time;
#else
    LOG(ERROR) << "mean " << inspect_key.c_str()
               << " of epoch " << i + 1 << ": " << total_inspect / batch_num;
#endif
W
wangguibao 已提交
360
    if (thread_id_ == 0) {
W
wangguibao 已提交
361 362 363 364
      char modelfile[1024];
      snprintf(&modelfile[0],
              sizeof(modelfile),
              "%s_epoch%d.model",
W
wangguibao 已提交
365
              model_prefix_.c_str(),
W
wangguibao 已提交
366 367 368 369 370 371 372
              i);
      std::string model_filename = std::string(modelfile);
      // this save_inference_model can only save imdbtask, should make this
      // general
      //
      // currently comment it
      LOG(ERROR) << "Going to save model " << modelfile;
W
wangguibao 已提交
373 374 375
      save_model(main_program_,
          thread_scope_,
          model_param_names_,
W
wangguibao 已提交
376 377 378 379 380 381
          model_filename,
          true);
    }
  }
}

W
wangguibao 已提交
382 383
void ExecutorThreadWorker::SetThreadId(int tid) {
  thread_id_ = tid;
W
wangguibao 已提交
384 385
}

W
wangguibao 已提交
386 387
void ExecutorThreadWorker::SetPlace(const platform::Place& place) {
  place_ = place;
W
wangguibao 已提交
388 389
}

W
wangguibao 已提交
390
void ExecutorThreadWorker::SetMainProgram(
W
wangguibao 已提交
391
    const ProgramDesc& main_program_desc) {
W
wangguibao 已提交
392
  main_program_.reset(new ProgramDesc(main_program_desc));
W
wangguibao 已提交
393 394
}

W
wangguibao 已提交
395 396
void ExecutorThreadWorker::SetRootScope(Scope* g_scope) {
  root_scope_ = g_scope;
W
wangguibao 已提交
397 398
}

W
wangguibao 已提交
399 400
void ExecutorThreadWorker::SetMaxTrainingEpoch(int max_epoch) {
  max_epoch_ = max_epoch;
W
wangguibao 已提交
401 402
}

W
wangguibao 已提交
403
AsyncExecutor::AsyncExecutor(const platform::Place& place) : place_(place) {}
W
wangguibao 已提交
404

W
wangguibao 已提交
405
void AsyncExecutor::InitRootScope(Scope* scope) {
W
wangguibao 已提交
406
  root_scope_ = scope;
W
wangguibao 已提交
407 408
}

W
wangguibao 已提交
409
void AsyncExecutor::SetMaxTrainingEpoch(int max_epoch) {
W
wangguibao 已提交
410
  max_epoch_ = max_epoch;
W
wangguibao 已提交
411 412
}

W
wangguibao 已提交
413
void AsyncExecutor::SetDataFeedName(const char* feedname) {
W
wangguibao 已提交
414
  feed_name_ = std::string(feedname);
W
wangguibao 已提交
415 416
}

W
wangguibao 已提交
417
void AsyncExecutor::SetModelPrefix(const std::string& model_prefix) {
W
wangguibao 已提交
418
  model_prefix_ = model_prefix;
W
wangguibao 已提交
419 420
}

W
wangguibao 已提交
421
void AsyncExecutor::RunStartupProgram(const ProgramDesc& program,
W
wangguibao 已提交
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
                                        Scope* scope) {
  auto& block = program.Block(0);
  for (auto& var : block.AllVars()) {
    if (var->Persistable()) {
      auto* ptr = scope->Var(var->Name());
      CreateTensor(ptr, var->GetType());
      // LOGERR("Persistable Var Name:%s", var->Name().c_str());
    }
  }

  std::map<std::string, int> param_dict;
  std::vector<OperatorBase *> ops;
  for (auto& op_desc : block.AllOps()) {
    std::vector<std::string> param_name_vec = op_desc->OutputArgumentNames();
    bool need_to_run = false;
    for (auto& name : param_name_vec) {
      if (param_dict.find(name) == param_dict.end()) {
        param_dict[name] = 1;
        need_to_run = true;
      }
    }
    if (need_to_run) {
      std::unique_ptr<OperatorBase> local_op = OpRegistry::CreateOp(*op_desc);
      OperatorBase* local_op_ptr = local_op.release();
      ops.push_back(local_op_ptr);
    }
  }
  // LOGERR("There are %d parameters in startup program, %d op needs to run",
  //        param_dict.size(), ops.size());

  for (auto& op : ops) {
W
wangguibao 已提交
453
    op->Run(*scope, place_);
W
wangguibao 已提交
454 455 456 457 458 459 460 461
  }
  // LOGERR("total time for startup program: %fs", timeline.elapsed_sec());
  for (auto& op : ops) {
    delete op;
  }
  // LOGERR("run startup program done.");
}

W
wangguibao 已提交
462
std::unique_ptr<ProgramDesc> AsyncExecutor::LoadDescFromFile(
W
wangguibao 已提交
463 464 465 466 467 468 469
    const std::string& f) {
  std::string program_desc_str;
  read_binary_file(f, &program_desc_str);
  std::unique_ptr<ProgramDesc> program(new ProgramDesc(program_desc_str));
  return program;
}

W
wangguibao 已提交
470
void AsyncExecutor::SetDenseCommTensor(
W
wangguibao 已提交
471
    const std::vector<std::string>& dense_comm_tensor) {
W
wangguibao 已提交
472
  dense_comm_tensor_.resize(dense_comm_tensor.size());
W
wangguibao 已提交
473
  for (unsigned int i = 0; i < dense_comm_tensor.size(); ++i) {
W
wangguibao 已提交
474
    dense_comm_tensor_[i] = dense_comm_tensor[i];
W
wangguibao 已提交
475 476 477
  }
}

W
wangguibao 已提交
478
void AsyncExecutor::SetSparseCommTensor(
W
wangguibao 已提交
479
    const std::vector<std::string>& sparse_comm_tensor) {
W
wangguibao 已提交
480
  sparse_comm_tensor_.resize(sparse_comm_tensor.size());
W
wangguibao 已提交
481
  for (unsigned int i = 0; i < sparse_comm_tensor.size(); ++i) {
W
wangguibao 已提交
482
    sparse_comm_tensor_[i] = sparse_comm_tensor[i];
W
wangguibao 已提交
483 484 485
  }
}

W
wangguibao 已提交
486
void AsyncExecutor::SetSparseCommData(
W
wangguibao 已提交
487
    const std::map<std::string, int>& sparse_comm_data) {
W
wangguibao 已提交
488 489
  sparse_comm_data_ = sparse_comm_data;
  LOG(INFO) << "Sparse comm data: " << sparse_comm_data_.size();
W
wangguibao 已提交
490 491
}

W
wangguibao 已提交
492
void AsyncExecutor::SetFileList(const char* filelist) {
W
wangguibao 已提交
493
  filelist_.clear();
W
wangguibao 已提交
494 495 496 497
  std::ifstream fin(filelist);
  std::string filename;
  while (fin >> filename) {
    LOG(ERROR) << "add " << filename.c_str() << " to filelist";
W
wangguibao 已提交
498
    filelist_.push_back(filename);
W
wangguibao 已提交
499 500 501 502
  }
  fin.close();
}

W
wangguibao 已提交
503
void AsyncExecutor::SetFileList(std::vector<std::string> tfiles) {
W
wangguibao 已提交
504 505
  filelist_.clear();
  filelist_.insert(filelist_.end(), tfiles.begin(), tfiles.end());
W
wangguibao 已提交
506 507 508
  return;
}

W
wangguibao 已提交
509
void AsyncExecutor::SetInspectVarName(const std::string& inspect_var_name) {
W
wangguibao 已提交
510
  inspect_var_name_ = inspect_var_name;
W
wangguibao 已提交
511 512
}

W
wangguibao 已提交
513
void AsyncExecutor::SetParamNames(const std::vector<std::string>& param_names) {
W
wangguibao 已提交
514
  model_param_names_ = param_names;
W
wangguibao 已提交
515 516
}

W
wangguibao 已提交
517
void AsyncExecutor::SetThreadNum(const int thread_num) {
W
wangguibao 已提交
518
  thread_num_ = thread_num;
W
wangguibao 已提交
519 520
}

W
wangguibao 已提交
521
void AsyncExecutor::PrepareThreads(const ProgramDesc& host_program) {
W
wangguibao 已提交
522 523 524 525 526 527 528 529 530 531 532 533 534 535
  workers_.resize(thread_num_);
  for (unsigned i = 0; i < thread_num_; ++i) {
    workers_[i].reset(new ExecutorThreadWorker);
    workers_[i]->SetThreadId(i);
    workers_[i]->CreateThreadOperators(host_program);
    workers_[i]->SetRootScope(root_scope_);
    workers_[i]->SetPlace(place_);
    workers_[i]->SetMaxTrainingEpoch(max_epoch_);
    workers_[i]->CreateThreadScope(host_program);
    workers_[i]->SetInspectVarName(inspect_var_name_);
    workers_[i]->SetModelParamNames(model_param_names_);
    workers_[i]->SetSparseCommData(sparse_comm_data_);
    workers_[i]->SetMainProgram(host_program);
    workers_[i]->SetModelPrefix(model_prefix_);
W
wangguibao 已提交
536 537
  }

W
wangguibao 已提交
538
  for (unsigned i = 0; i < filelist_.size(); ++i) {
W
wangguibao 已提交
539 540
    // suppose at least one trainer thread here, and
    // filelist is static so that we only add filelist once
W
wangguibao 已提交
541
    workers_[0]->AddTrainFile(filelist_[i]);
W
wangguibao 已提交
542 543
  }
  // mpi_wrapper::ModelParam model_param(true);
W
wangguibao 已提交
544
  // workers_[0]->register_parallel_training_param(model_param);
W
wangguibao 已提交
545

W
wangguibao 已提交
546
  for (unsigned i = 0; i < thread_num_; ++i) {
W
wangguibao 已提交
547
    // new a datafeed here
W
wangguibao 已提交
548
    std::shared_ptr<DataFeed> local_feed = CreateDataFeed(feed_name_.c_str());
W
wangguibao 已提交
549
    local_feed->Init();
W
wangguibao 已提交
550 551 552 553
    local_feed->SetBatchSize(batch_size_);
    workers_[i]->SetDataFeed(local_feed);
    workers_[i]->BindingDataFeedMemory();
    workers_[i]->SetThreadId(i);
W
wangguibao 已提交
554 555 556
  }
}

W
wangguibao 已提交
557
void AsyncExecutor::RunAsyncExecutor(const ProgramDesc& host_program) {
W
wangguibao 已提交
558
  // thread binding here?
W
wangguibao 已提交
559 560 561 562
  PrepareThreads(host_program);
  for (unsigned i = 0; i < thread_num_; ++i) {
    threads_.push_back(std::thread(&ExecutorThreadWorker::Train,
                      workers_[i].get()));
W
wangguibao 已提交
563 564
  }

W
wangguibao 已提交
565
  for (auto& th : threads_) {
W
wangguibao 已提交
566 567 568 569
    th.join();
  }
}

W
wangguibao 已提交
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
void AsyncExecutor::LoadInitModel() {
  auto place = paddle::platform::CPUPlace();
  auto* executor = new paddle::framework::Executor(place);

  std::string init_prog_file = model_path_ + "/" + init_prog_file_;
  std::string init_model_file = model_path_ + "/" + init_model_file_;

  struct stat stat_buf;

  if (stat(init_prog_file.c_str(), &stat_buf) == 0 &&
      S_ISREG(stat_buf.st_mode) &&
      stat(init_model_file.c_str(), &stat_buf) == 0 &&
      S_ISREG(stat_buf.st_mode)) {
    paddle::inference::Load(executor,
                          GetRootScope(),
                          model_path_ + "/" + init_prog_file_,
                          model_path_ + "/" + init_model_file_);
  }
}
}   // einit_modelnd namespace framework
W
wangguibao 已提交
590 591 592
}   // end namespace paddle

/* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */