async_executor.cc 18.3 KB
Newer Older
W
wangguibao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/async_executor.h"
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
W
wangguibao 已提交
19 20
#include <sys/types.h>
#include <sys/stat.h>
W
wangguibao 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <map>
#include <algorithm>
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "google/protobuf/io/zero_copy_stream_impl.h"

#include "gflags/gflags.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/platform/place.h"
W
wangguibao 已提交
38
#include "paddle/fluid/inference/io.h"
W
wangguibao 已提交
39 40 41 42
#include "paddle/fluid/pybind/pybind.h"

namespace paddle {
namespace framework {
W
wangguibao 已提交
43 44 45 46 47 48 49
std::mutex ExecutorThreadWorker::s_locker_for_pick_file_;
unsigned int ExecutorThreadWorker::s_current_file_idx_ = 0;
size_t ExecutorThreadWorker::s_current_finished_file_cnt_ = 0;
unsigned int ExecutorThreadWorker::s_current_epoch_ = 0;
int ExecutorThreadWorker::s_current_save_epoch_ = 0;
bool ExecutorThreadWorker::s_is_first_worker_ = false;
std::vector<std::string> ExecutorThreadWorker::s_thread_filelist_;
W
wangguibao 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

void CreateTensor(Variable* var, proto::VarType::Type var_type) {
  if (var_type == proto::VarType::LOD_TENSOR) {
    var->GetMutable<LoDTensor>();
  } else if (var_type == proto::VarType::SELECTED_ROWS) {
    var->GetMutable<SelectedRows>();
  } else if (var_type == proto::VarType::FEED_MINIBATCH) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::FETCH_LIST) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::STEP_SCOPES) {
    var->GetMutable<std::vector<Scope>>();
  } else if (var_type == proto::VarType::LOD_RANK_TABLE) {
    var->GetMutable<LoDRankTable>();
  } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
    var->GetMutable<LoDTensorArray>();
  } else if (var_type == proto::VarType::PLACE_LIST) {
    var->GetMutable<platform::PlaceList>();
  } else if (var_type == proto::VarType::READER) {
    var->GetMutable<ReaderHolder>();
  } else if (var_type == proto::VarType::RAW) {
    // GetMutable will be called in operator
  } else {
    PADDLE_THROW(
        "Variable type %d is not in "
        "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
        "LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
        var_type);
  }
}

W
wangguibao 已提交
81
static void ReadBinaryFile(const std::string& filename,
W
wangguibao 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94 95
                             std::string* content) {
  std::string &contents = *content;
  std::ifstream fin(filename, std::ios::in | std::ios::binary);
  if (!fin.good()) {
    LOG(ERROR) << "Cannot open file " << filename.c_str();
  }
  fin.seekg(0, std::ios::end);
  contents.clear();
  contents.resize(fin.tellg());
  fin.seekg(0, std::ios::beg);
  fin.read(&contents[0], contents.size());
  fin.close();
}

W
wangguibao 已提交
96
static void SaveModel(
W
wangguibao 已提交
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
    const std::unique_ptr<ProgramDesc> & main_program,
    Scope* scope,
    const std::vector<std::string> & param_names,
    const std::string & model_name,
    bool save_combine) {
  auto place = platform::CPUPlace();
  const BlockDesc& global_block = main_program->Block(0);
  std::vector<std::string> paralist;

  for (auto* var : global_block.AllVars()) {
    bool is_model_param = false;
    for (auto param_name : param_names) {
      if (var->Name() == param_name) {
        is_model_param = true;
        break;
      }
    }

    if (!is_model_param)  continue;

    if (!save_combine) {
      LOG(ERROR) << "model var name: " << var->Name().c_str();

      paddle::framework::AttributeMap attrs;
      attrs.insert({"file_path", model_name + "/" + var->Name()});
      auto save_op = paddle::framework::OpRegistry::CreateOp(
                                                      "save",
                                                      {{"X", {var->Name()}}},
                                                      {},
                                                      attrs);

      save_op->Run(*scope, place);
    } else {
      paralist.push_back(var->Name());
    }
  }

  if (save_combine) {
    std::sort(paralist.begin(), paralist.end());
    paddle::framework::AttributeMap attrs;
    attrs.insert({"file_path", model_name});
    auto save_op = paddle::framework::OpRegistry::CreateOp(
                                                      "save_combine",
                                                      {{"X", paralist}},
                                                      {},
                                                      attrs);
    save_op->Run(*scope, place);
  }
W
wangguibao 已提交
145
}   // end SaveModel
W
wangguibao 已提交
146 147


W
wangguibao 已提交
148 149
void ExecutorThreadWorker::AddTrainFile(const std::string& file) {
  s_thread_filelist_.push_back(file);
W
wangguibao 已提交
150 151
}

W
wangguibao 已提交
152
void ExecutorThreadWorker::CreateThreadOperators(const ProgramDesc& program) {
W
wangguibao 已提交
153
  auto& block = program.Block(0);
W
wangguibao 已提交
154
  op_names_.clear();
W
wangguibao 已提交
155 156
  for (auto& op_desc : block.AllOps()) {
    std::unique_ptr<OperatorBase> local_op = OpRegistry::CreateOp(*op_desc);
W
wangguibao 已提交
157
    op_names_.push_back(op_desc->Type());
W
wangguibao 已提交
158
    OperatorBase* local_op_ptr = local_op.release();
W
wangguibao 已提交
159
    ops_.push_back(local_op_ptr);
W
wangguibao 已提交
160 161 162 163
    continue;
  }
}

W
wangguibao 已提交
164
void ExecutorThreadWorker::CreateThreadScope(const ProgramDesc& program) {
W
wangguibao 已提交
165
  auto& block = program.Block(0);
W
wangguibao 已提交
166
  thread_scope_ = &root_scope_->NewScope();
W
wangguibao 已提交
167 168
  for (auto& var : block.AllVars()) {
    if (var->Persistable()) {
W
wangguibao 已提交
169
      auto* ptr = root_scope_->Var(var->Name());
W
wangguibao 已提交
170 171
      CreateTensor(ptr, var->GetType());
    } else {
W
wangguibao 已提交
172
      auto* ptr = thread_scope_->Var(var->Name());
W
wangguibao 已提交
173 174 175 176 177
      CreateTensor(ptr, var->GetType());
    }
  }
}

W
wangguibao 已提交
178 179
void ExecutorThreadWorker::SetDataFeed(const std::shared_ptr<DataFeed>& datafeed) {
  local_reader_ = datafeed;
W
wangguibao 已提交
180 181
}

W
wangguibao 已提交
182 183
void ExecutorThreadWorker::BindingDataFeedMemory() {
  const std::vector<std::string>& input_feed = local_reader_->GetUseSlotAlias();
W
wangguibao 已提交
184
  for (auto name : input_feed) {
W
wangguibao 已提交
185
    local_reader_->AddFeedVar(thread_scope_->Var(name), name);
W
wangguibao 已提交
186 187 188
  }
}

W
wangguibao 已提交
189
void ExecutorThreadWorker::SetInspectVarName(
W
wangguibao 已提交
190
    const std::string& inspect_var_name) {
W
wangguibao 已提交
191
  inspect_var_name_ = inspect_var_name;
W
wangguibao 已提交
192 193
}

W
wangguibao 已提交
194
void ExecutorThreadWorker::SetModelParamNames(
W
wangguibao 已提交
195
    const std::vector<std::string>& param_names) {
W
wangguibao 已提交
196
  model_param_names_ = param_names;
W
wangguibao 已提交
197 198
}

W
wangguibao 已提交
199
void ExecutorThreadWorker::SetSparseCommData(
W
wangguibao 已提交
200
    const std::map<std::string, int>& param_names) {
W
wangguibao 已提交
201
  sparse_comm_data_ = param_names;
W
wangguibao 已提交
202 203
}

W
wangguibao 已提交
204
void ExecutorThreadWorker::SetDevice() {
W
wangguibao 已提交
205 206 207 208 209 210 211 212 213 214 215
  static unsigned priority[] = {
    0, 1, 2, 3, 4, 5,
    6, 7, 8, 9, 10, 11,
    12, 13, 14, 15, 16, 17,
    18, 19, 20, 21, 22, 23,
    24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35,
    36, 37, 38, 39, 40, 41,
    42, 43, 44, 45, 46, 47
  };

W
wangguibao 已提交
216
  unsigned int i = this->thread_id_;
W
wangguibao 已提交
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236

  if (i < sizeof(priority) / sizeof(unsigned)) {
    unsigned proc = priority[i];

    cpu_set_t mask;
    CPU_ZERO(&mask);
    CPU_SET(proc, &mask);

    if (-1 == sched_setaffinity(0, sizeof(mask), &mask)) {
      LOG(ERROR) << "WARNING: Failed to set thread affinity for thread " << i;
    } else {
      CPU_ZERO(&mask);
      if ((0 == sched_getaffinity(0, sizeof(mask), &mask))
          && CPU_ISSET(proc, &mask)) {
        LOG(ERROR) << "TRACE: Thread " << i << " is running on processor " << proc << "...";
      }
    }
  }
}

W
wangguibao 已提交
237 238
void ExecutorThreadWorker::UpdateEpochNum() {
  s_current_finished_file_cnt_++;
W
wangguibao 已提交
239

W
wangguibao 已提交
240 241 242
  if (s_current_finished_file_cnt_ >= s_thread_filelist_.size()) {
    s_current_finished_file_cnt_ = 0;
    s_current_epoch_++;
W
wangguibao 已提交
243 244 245
  }
}

W
wangguibao 已提交
246
const char* ExecutorThreadWorker::PickOneFile() {
W
wangguibao 已提交
247
  std::string file_to_be_preocessed;
W
wangguibao 已提交
248 249 250 251 252 253 254 255 256
  std::lock_guard<std::mutex> lock(s_locker_for_pick_file_);

  if (s_current_file_idx_ >= s_thread_filelist_.size()) {
    std::random_shuffle(s_thread_filelist_.begin(),
    s_thread_filelist_.end());
    s_current_file_idx_ = 0;
    // s_current_epoch_++; //example: when one file, one thread, it's bug
    LOG(ERROR) << "thread " << thread_id_
               << ": finish traing for epoch " << s_current_epoch_ + 1;
W
wangguibao 已提交
257
  }
W
wangguibao 已提交
258
  file_to_be_preocessed = s_thread_filelist_[s_current_file_idx_];
W
wangguibao 已提交
259

W
wangguibao 已提交
260
  s_current_file_idx_++;
W
wangguibao 已提交
261 262 263
  return file_to_be_preocessed.c_str();
}

W
wangguibao 已提交
264
void ExecutorThreadWorker::Train() {
W
wangguibao 已提交
265
  LOG(ERROR) << "begin to train";
W
wangguibao 已提交
266
  SetDevice();
W
wangguibao 已提交
267 268 269 270
#ifdef LOCAL_PROF
  std::vector<double> op_total_time;
  std::vector<std::string> op_name;
  // int total_batch = 0;
W
wangguibao 已提交
271
  for (auto& op : ops_) {
W
wangguibao 已提交
272 273
    op_name.push_back(op->Type());
  }
W
wangguibao 已提交
274
  op_total_time.resize(ops_.size());
W
wangguibao 已提交
275 276 277 278 279
  for (int i = 0; i < op_total_time.size(); ++i) {
    op_total_time[i] = 0.0;
  }
#endif
  std::string inspect_key = "inspect";
W
wangguibao 已提交
280 281 282
  if (!inspect_var_name_.empty()) {
    inspect_key = inspect_var_name_.substr(0,
                                          inspect_var_name_.find_first_of('_'));
W
wangguibao 已提交
283 284
  }

W
wangguibao 已提交
285
  for (unsigned i = 0; i < max_epoch_; ++i) {
W
wangguibao 已提交
286 287 288 289 290 291 292 293
    LOG(ERROR) << "epoch: " << i;
#ifdef LOCAL_PROF
    Timer timeline;
    double total_time = 0.0;
    double read_time = 0.0;
#endif
    float total_inspect = 0;
    int batch_num = 1;
W
wangguibao 已提交
294 295 296
    while (i == s_current_epoch_) {
      const char* filename = PickOneFile();
      local_reader_->SetFile(filename);
W
wangguibao 已提交
297 298 299 300
      while (true) {
#ifdef LOCAL_PROF
        timeline.start();
#endif
W
wangguibao 已提交
301
        bool flag = local_reader_->ReadBatch();
W
wangguibao 已提交
302 303 304 305 306 307 308 309 310 311 312 313
        if (!flag) {
          break;
        }
#ifdef LOCAL_PROF
        timeline.pause();
        read_time += timeline.elapsed_sec();
        total_time += timeline.elapsed_sec();
#endif
        if (!flag) {
          break;
        }

W
wangguibao 已提交
314
        for (unsigned int i = 0; i < ops_.size(); ++i) {
W
wangguibao 已提交
315 316 317
#ifdef LOCAL_PROF
          timeline.start();
#endif
W
wangguibao 已提交
318
          ops_[i]->Run(*thread_scope_, place_);
W
wangguibao 已提交
319 320 321 322 323 324 325 326
#ifdef LOCAL_PROF
          timeline.pause();
          op_total_time[i] += timeline.elapsed_sec();
          total_time += timeline.elapsed_sec();
#endif
        }
        batch_num++;
        float avg_inspect = 0.0;
W
wangguibao 已提交
327 328
        if (!inspect_var_name_.empty()) {
          avg_inspect = thread_scope_->FindVar(inspect_var_name_)
W
wangguibao 已提交
329 330 331 332
                                     ->GetMutable<LoDTensor>()
                                     ->data<float>()[0];
        }
        total_inspect += avg_inspect;
W
wangguibao 已提交
333
        thread_scope_->DropKids();
W
wangguibao 已提交
334
      }
W
wangguibao 已提交
335
      UpdateEpochNum();
W
wangguibao 已提交
336
      LOG(ERROR) << "memory used after epoch " << i + 1
W
wangguibao 已提交
337
                 << " called: " << memory::memory_usage(place_);
W
wangguibao 已提交
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355

#ifdef LOCAL_PROF
      for (int i = 0; i < op_total_time.size(); ++i) {
        std::cerr << "op_name:[" << i << "][" << op_name[i] << "]"
                  << " op_mean_time:[" << op_total_time[i] << "s]"
                  << std::endl;
      }
      std::cerr << "read time: " << read_time << "s" << std::endl;
#endif
    }
#ifdef LOCAL_PROF
    LOG(ERROR) << "mean " << inspect_key.c_str()
               << " of epoch " << i + 1 << ": " << total_inspect / batch_num
               << ", total_time: " << total_time;
#else
    LOG(ERROR) << "mean " << inspect_key.c_str()
               << " of epoch " << i + 1 << ": " << total_inspect / batch_num;
#endif
W
wangguibao 已提交
356
    if (thread_id_ == 0) {
W
wangguibao 已提交
357 358 359 360
      char modelfile[1024];
      snprintf(&modelfile[0],
              sizeof(modelfile),
              "%s_epoch%d.model",
W
wangguibao 已提交
361
              model_prefix_.c_str(),
W
wangguibao 已提交
362 363 364 365 366 367 368
              i);
      std::string model_filename = std::string(modelfile);
      // this save_inference_model can only save imdbtask, should make this
      // general
      //
      // currently comment it
      LOG(ERROR) << "Going to save model " << modelfile;
W
wangguibao 已提交
369 370 371 372 373
      SaveModel(main_program_,
                thread_scope_,
                model_param_names_,
                model_filename,
                true);
W
wangguibao 已提交
374 375 376 377
    }
  }
}

W
wangguibao 已提交
378 379
void ExecutorThreadWorker::SetThreadId(int tid) {
  thread_id_ = tid;
W
wangguibao 已提交
380 381
}

W
wangguibao 已提交
382 383
void ExecutorThreadWorker::SetPlace(const platform::Place& place) {
  place_ = place;
W
wangguibao 已提交
384 385
}

W
wangguibao 已提交
386
void ExecutorThreadWorker::SetMainProgram(
W
wangguibao 已提交
387
    const ProgramDesc& main_program_desc) {
W
wangguibao 已提交
388
  main_program_.reset(new ProgramDesc(main_program_desc));
W
wangguibao 已提交
389 390
}

W
wangguibao 已提交
391 392
void ExecutorThreadWorker::SetRootScope(Scope* g_scope) {
  root_scope_ = g_scope;
W
wangguibao 已提交
393 394
}

W
wangguibao 已提交
395 396
void ExecutorThreadWorker::SetMaxTrainingEpoch(int max_epoch) {
  max_epoch_ = max_epoch;
W
wangguibao 已提交
397 398
}

W
wangguibao 已提交
399
AsyncExecutor::AsyncExecutor(const platform::Place& place) : place_(place) {}
W
wangguibao 已提交
400

W
wangguibao 已提交
401
void AsyncExecutor::InitRootScope(Scope* scope) {
W
wangguibao 已提交
402
  root_scope_ = scope;
W
wangguibao 已提交
403 404
}

W
wangguibao 已提交
405
void AsyncExecutor::SetMaxTrainingEpoch(int max_epoch) {
W
wangguibao 已提交
406
  max_epoch_ = max_epoch;
W
wangguibao 已提交
407 408
}

W
wangguibao 已提交
409
void AsyncExecutor::SetDataFeedName(const char* feedname) {
W
wangguibao 已提交
410
  feed_name_ = std::string(feedname);
W
wangguibao 已提交
411 412
}

W
wangguibao 已提交
413
void AsyncExecutor::SetModelPrefix(const std::string& model_prefix) {
W
wangguibao 已提交
414
  model_prefix_ = model_prefix;
W
wangguibao 已提交
415 416
}

W
wangguibao 已提交
417
void AsyncExecutor::RunStartupProgram(const ProgramDesc& program,
W
wangguibao 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
                                        Scope* scope) {
  auto& block = program.Block(0);
  for (auto& var : block.AllVars()) {
    if (var->Persistable()) {
      auto* ptr = scope->Var(var->Name());
      CreateTensor(ptr, var->GetType());
      // LOGERR("Persistable Var Name:%s", var->Name().c_str());
    }
  }

  std::map<std::string, int> param_dict;
  std::vector<OperatorBase *> ops;
  for (auto& op_desc : block.AllOps()) {
    std::vector<std::string> param_name_vec = op_desc->OutputArgumentNames();
    bool need_to_run = false;
    for (auto& name : param_name_vec) {
      if (param_dict.find(name) == param_dict.end()) {
        param_dict[name] = 1;
        need_to_run = true;
      }
    }
    if (need_to_run) {
      std::unique_ptr<OperatorBase> local_op = OpRegistry::CreateOp(*op_desc);
      OperatorBase* local_op_ptr = local_op.release();
      ops.push_back(local_op_ptr);
    }
  }
  // LOGERR("There are %d parameters in startup program, %d op needs to run",
  //        param_dict.size(), ops.size());

  for (auto& op : ops) {
W
wangguibao 已提交
449
    op->Run(*scope, place_);
W
wangguibao 已提交
450 451 452 453 454 455 456 457
  }
  // LOGERR("total time for startup program: %fs", timeline.elapsed_sec());
  for (auto& op : ops) {
    delete op;
  }
  // LOGERR("run startup program done.");
}

W
wangguibao 已提交
458
std::unique_ptr<ProgramDesc> AsyncExecutor::LoadDescFromFile(
W
wangguibao 已提交
459 460
    const std::string& f) {
  std::string program_desc_str;
W
wangguibao 已提交
461
  ReadBinaryFile(f, &program_desc_str);
W
wangguibao 已提交
462 463 464 465
  std::unique_ptr<ProgramDesc> program(new ProgramDesc(program_desc_str));
  return program;
}

W
wangguibao 已提交
466
void AsyncExecutor::SetDenseCommTensor(
W
wangguibao 已提交
467
    const std::vector<std::string>& dense_comm_tensor) {
W
wangguibao 已提交
468
  dense_comm_tensor_.resize(dense_comm_tensor.size());
W
wangguibao 已提交
469
  for (unsigned int i = 0; i < dense_comm_tensor.size(); ++i) {
W
wangguibao 已提交
470
    dense_comm_tensor_[i] = dense_comm_tensor[i];
W
wangguibao 已提交
471 472 473
  }
}

W
wangguibao 已提交
474
void AsyncExecutor::SetSparseCommTensor(
W
wangguibao 已提交
475
    const std::vector<std::string>& sparse_comm_tensor) {
W
wangguibao 已提交
476
  sparse_comm_tensor_.resize(sparse_comm_tensor.size());
W
wangguibao 已提交
477
  for (unsigned int i = 0; i < sparse_comm_tensor.size(); ++i) {
W
wangguibao 已提交
478
    sparse_comm_tensor_[i] = sparse_comm_tensor[i];
W
wangguibao 已提交
479 480 481
  }
}

W
wangguibao 已提交
482
void AsyncExecutor::SetSparseCommData(
W
wangguibao 已提交
483
    const std::map<std::string, int>& sparse_comm_data) {
W
wangguibao 已提交
484 485
  sparse_comm_data_ = sparse_comm_data;
  LOG(INFO) << "Sparse comm data: " << sparse_comm_data_.size();
W
wangguibao 已提交
486 487
}

W
wangguibao 已提交
488
void AsyncExecutor::SetFileList(const char* filelist) {
W
wangguibao 已提交
489
  filelist_.clear();
W
wangguibao 已提交
490 491 492 493
  std::ifstream fin(filelist);
  std::string filename;
  while (fin >> filename) {
    LOG(ERROR) << "add " << filename.c_str() << " to filelist";
W
wangguibao 已提交
494
    filelist_.push_back(filename);
W
wangguibao 已提交
495 496 497 498
  }
  fin.close();
}

W
wangguibao 已提交
499
void AsyncExecutor::SetFileList(std::vector<std::string> tfiles) {
W
wangguibao 已提交
500 501
  filelist_.clear();
  filelist_.insert(filelist_.end(), tfiles.begin(), tfiles.end());
W
wangguibao 已提交
502 503 504
  return;
}

W
wangguibao 已提交
505
void AsyncExecutor::SetInspectVarName(const std::string& inspect_var_name) {
W
wangguibao 已提交
506
  inspect_var_name_ = inspect_var_name;
W
wangguibao 已提交
507 508
}

W
wangguibao 已提交
509
void AsyncExecutor::SetParamNames(const std::vector<std::string>& param_names) {
W
wangguibao 已提交
510
  model_param_names_ = param_names;
W
wangguibao 已提交
511 512
}

W
wangguibao 已提交
513
void AsyncExecutor::SetThreadNum(const int thread_num) {
W
wangguibao 已提交
514
  thread_num_ = thread_num;
W
wangguibao 已提交
515 516
}

W
wangguibao 已提交
517
void AsyncExecutor::PrepareThreads(const ProgramDesc& host_program) {
W
wangguibao 已提交
518 519 520 521 522 523 524 525 526 527 528
  workers_.resize(thread_num_);
  for (unsigned i = 0; i < thread_num_; ++i) {
    workers_[i].reset(new ExecutorThreadWorker);
    workers_[i]->SetThreadId(i);
    workers_[i]->CreateThreadOperators(host_program);
    workers_[i]->SetRootScope(root_scope_);
    workers_[i]->SetPlace(place_);
    workers_[i]->SetMaxTrainingEpoch(max_epoch_);
    workers_[i]->CreateThreadScope(host_program);
    workers_[i]->SetInspectVarName(inspect_var_name_);
    workers_[i]->SetModelParamNames(model_param_names_);
D
dongdaxiang 已提交
529
    workers_[i]->SetSparseCommData(sparse_comm_data_); 
W
wangguibao 已提交
530 531
    workers_[i]->SetMainProgram(host_program);
    workers_[i]->SetModelPrefix(model_prefix_);
W
wangguibao 已提交
532 533
  }

W
wangguibao 已提交
534
  for (unsigned i = 0; i < filelist_.size(); ++i) {
W
wangguibao 已提交
535 536
    // suppose at least one trainer thread here, and
    // filelist is static so that we only add filelist once
W
wangguibao 已提交
537
    workers_[0]->AddTrainFile(filelist_[i]);
W
wangguibao 已提交
538
  }
D
dongdaxiang 已提交
539
  
W
wangguibao 已提交
540
  for (unsigned i = 0; i < thread_num_; ++i) {
W
wangguibao 已提交
541
    // new a datafeed here
W
wangguibao 已提交
542
    std::shared_ptr<DataFeed> local_feed = CreateDataFeed(feed_name_.c_str());
W
wangguibao 已提交
543
    local_feed->Init();
W
wangguibao 已提交
544 545 546 547
    local_feed->SetBatchSize(batch_size_);
    workers_[i]->SetDataFeed(local_feed);
    workers_[i]->BindingDataFeedMemory();
    workers_[i]->SetThreadId(i);
W
wangguibao 已提交
548 549 550
  }
}

W
wangguibao 已提交
551
void AsyncExecutor::RunAsyncExecutor(const ProgramDesc& host_program) {
W
wangguibao 已提交
552
  // thread binding here?
W
wangguibao 已提交
553 554 555 556
  PrepareThreads(host_program);
  for (unsigned i = 0; i < thread_num_; ++i) {
    threads_.push_back(std::thread(&ExecutorThreadWorker::Train,
                      workers_[i].get()));
W
wangguibao 已提交
557 558
  }

W
wangguibao 已提交
559
  for (auto& th : threads_) {
W
wangguibao 已提交
560 561 562 563
    th.join();
  }
}

W
wangguibao 已提交
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
void AsyncExecutor::LoadInitModel() {
  auto place = paddle::platform::CPUPlace();
  auto* executor = new paddle::framework::Executor(place);

  std::string init_prog_file = model_path_ + "/" + init_prog_file_;
  std::string init_model_file = model_path_ + "/" + init_model_file_;

  struct stat stat_buf;

  if (stat(init_prog_file.c_str(), &stat_buf) == 0 &&
      S_ISREG(stat_buf.st_mode) &&
      stat(init_model_file.c_str(), &stat_buf) == 0 &&
      S_ISREG(stat_buf.st_mode)) {
    paddle::inference::Load(executor,
                          GetRootScope(),
                          model_path_ + "/" + init_prog_file_,
                          model_path_ + "/" + init_model_file_);
  }
}
}   // einit_modelnd namespace framework
W
wangguibao 已提交
584 585 586
}   // end namespace paddle

/* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */