async_executor.cc 15.5 KB
Newer Older
W
wangguibao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/async_executor.h"
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
W
wangguibao 已提交
19 20
#include <sys/types.h>
#include <sys/stat.h>
W
wangguibao 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <map>
#include <algorithm>
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "google/protobuf/io/zero_copy_stream_impl.h"

#include "gflags/gflags.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/platform/place.h"
W
wangguibao 已提交
38
#include "paddle/fluid/inference/io.h"
W
wangguibao 已提交
39 40 41 42
#include "paddle/fluid/pybind/pybind.h"

namespace paddle {
namespace framework {
43 44

bool AsyncExecutor::workers_initialized_ = false;
W
wangguibao 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

void CreateTensor(Variable* var, proto::VarType::Type var_type) {
  if (var_type == proto::VarType::LOD_TENSOR) {
    var->GetMutable<LoDTensor>();
  } else if (var_type == proto::VarType::SELECTED_ROWS) {
    var->GetMutable<SelectedRows>();
  } else if (var_type == proto::VarType::FEED_MINIBATCH) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::FETCH_LIST) {
    var->GetMutable<FeedFetchList>();
  } else if (var_type == proto::VarType::STEP_SCOPES) {
    var->GetMutable<std::vector<Scope>>();
  } else if (var_type == proto::VarType::LOD_RANK_TABLE) {
    var->GetMutable<LoDRankTable>();
  } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
    var->GetMutable<LoDTensorArray>();
  } else if (var_type == proto::VarType::PLACE_LIST) {
    var->GetMutable<platform::PlaceList>();
  } else if (var_type == proto::VarType::READER) {
    var->GetMutable<ReaderHolder>();
  } else if (var_type == proto::VarType::RAW) {
    // GetMutable will be called in operator
  } else {
    PADDLE_THROW(
        "Variable type %d is not in "
        "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
        "LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
        var_type);
  }
}

W
wangguibao 已提交
76
static void ReadBinaryFile(const std::string& filename,
W
wangguibao 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90
                             std::string* content) {
  std::string &contents = *content;
  std::ifstream fin(filename, std::ios::in | std::ios::binary);
  if (!fin.good()) {
    LOG(ERROR) << "Cannot open file " << filename.c_str();
  }
  fin.seekg(0, std::ios::end);
  contents.clear();
  contents.resize(fin.tellg());
  fin.seekg(0, std::ios::beg);
  fin.read(&contents[0], contents.size());
  fin.close();
}

W
wangguibao 已提交
91
static void SaveModel(
W
wangguibao 已提交
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
    const std::unique_ptr<ProgramDesc> & main_program,
    Scope* scope,
    const std::vector<std::string> & param_names,
    const std::string & model_name,
    bool save_combine) {
  auto place = platform::CPUPlace();
  const BlockDesc& global_block = main_program->Block(0);
  std::vector<std::string> paralist;

  for (auto* var : global_block.AllVars()) {
    bool is_model_param = false;
    for (auto param_name : param_names) {
      if (var->Name() == param_name) {
        is_model_param = true;
        break;
      }
    }

    if (!is_model_param)  continue;

    if (!save_combine) {
      LOG(ERROR) << "model var name: " << var->Name().c_str();

      paddle::framework::AttributeMap attrs;
      attrs.insert({"file_path", model_name + "/" + var->Name()});
      auto save_op = paddle::framework::OpRegistry::CreateOp(
                                                      "save",
                                                      {{"X", {var->Name()}}},
                                                      {},
                                                      attrs);
      save_op->Run(*scope, place);
    } else {
      paralist.push_back(var->Name());
    }
  }

  if (save_combine) {
    std::sort(paralist.begin(), paralist.end());
    paddle::framework::AttributeMap attrs;
    attrs.insert({"file_path", model_name});
    auto save_op = paddle::framework::OpRegistry::CreateOp(
                                                      "save_combine",
                                                      {{"X", paralist}},
                                                      {},
                                                      attrs);
137

W
wangguibao 已提交
138 139
    save_op->Run(*scope, place);
  }
W
wangguibao 已提交
140
}   // end SaveModel
W
wangguibao 已提交
141

142 143
void ExecutorThreadWorker::Reset() {
  inspect_values_.clear();
W
wangguibao 已提交
144
}
W
wangguibao 已提交
145
void ExecutorThreadWorker::CreateThreadOperators(const ProgramDesc& program) {
W
wangguibao 已提交
146
  auto& block = program.Block(0);
W
wangguibao 已提交
147
  op_names_.clear();
W
wangguibao 已提交
148 149
  for (auto& op_desc : block.AllOps()) {
    std::unique_ptr<OperatorBase> local_op = OpRegistry::CreateOp(*op_desc);
W
wangguibao 已提交
150
    op_names_.push_back(op_desc->Type());
W
wangguibao 已提交
151
    OperatorBase* local_op_ptr = local_op.release();
W
wangguibao 已提交
152
    ops_.push_back(local_op_ptr);
W
wangguibao 已提交
153 154 155 156
    continue;
  }
}

W
wangguibao 已提交
157
void ExecutorThreadWorker::CreateThreadScope(const ProgramDesc& program) {
W
wangguibao 已提交
158
  auto& block = program.Block(0);
W
wangguibao 已提交
159
  thread_scope_ = &root_scope_->NewScope();
W
wangguibao 已提交
160 161
  for (auto& var : block.AllVars()) {
    if (var->Persistable()) {
W
wangguibao 已提交
162
      auto* ptr = root_scope_->Var(var->Name());
W
wangguibao 已提交
163 164
      CreateTensor(ptr, var->GetType());
    } else {
W
wangguibao 已提交
165
      auto* ptr = thread_scope_->Var(var->Name());
W
wangguibao 已提交
166 167 168 169 170
      CreateTensor(ptr, var->GetType());
    }
  }
}

171 172 173 174 175 176
void ExecutorThreadWorker::SetDataFeed(DataFeed& datafeed) {
  if (typeid(datafeed) == typeid(TextClassDataFeed)) {
    local_reader_.reset(
        new TextClassDataFeed(dynamic_cast<TextClassDataFeed &>(datafeed)));
    local_reader_->SetThreadId(thread_id_);
  }
W
wangguibao 已提交
177 178
}

W
wangguibao 已提交
179 180
void ExecutorThreadWorker::BindingDataFeedMemory() {
  const std::vector<std::string>& input_feed = local_reader_->GetUseSlotAlias();
W
wangguibao 已提交
181
  for (auto name : input_feed) {
W
wangguibao 已提交
182
    local_reader_->AddFeedVar(thread_scope_->Var(name), name);
W
wangguibao 已提交
183 184 185
  }
}

186 187 188 189 190
void ExecutorThreadWorker::SetInspectVarNames(
    const std::vector<std::string>& inspect_var_names) {
  inspect_var_names_.clear();
  inspect_var_names_.insert(inspect_var_names_.end(),
                            inspect_var_names.begin(), inspect_var_names.end());
W
wangguibao 已提交
191 192
}

W
wangguibao 已提交
193
void ExecutorThreadWorker::SetModelParamNames(
W
wangguibao 已提交
194
    const std::vector<std::string>& param_names) {
W
wangguibao 已提交
195
  model_param_names_ = param_names;
W
wangguibao 已提交
196 197
}

W
wangguibao 已提交
198
void ExecutorThreadWorker::SetDevice() {
W
wangguibao 已提交
199 200 201 202 203 204 205 206 207 208 209
  static unsigned priority[] = {
    0, 1, 2, 3, 4, 5,
    6, 7, 8, 9, 10, 11,
    12, 13, 14, 15, 16, 17,
    18, 19, 20, 21, 22, 23,
    24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35,
    36, 37, 38, 39, 40, 41,
    42, 43, 44, 45, 46, 47
  };

W
wangguibao 已提交
210
  unsigned int i = this->thread_id_;
W
wangguibao 已提交
211 212 213 214 215 216 217 218 219 220 221 222 223 224

  if (i < sizeof(priority) / sizeof(unsigned)) {
    unsigned proc = priority[i];

    cpu_set_t mask;
    CPU_ZERO(&mask);
    CPU_SET(proc, &mask);

    if (-1 == sched_setaffinity(0, sizeof(mask), &mask)) {
      LOG(ERROR) << "WARNING: Failed to set thread affinity for thread " << i;
    } else {
      CPU_ZERO(&mask);
      if ((0 == sched_getaffinity(0, sizeof(mask), &mask))
          && CPU_ISSET(proc, &mask)) {
225 226 227
        LOG(ERROR) << "TRACE: Thread " << i
                   << " is running on processor " << proc
                   << "...";
W
wangguibao 已提交
228 229 230 231 232 233
      }
    }
  }
}


W
wangguibao 已提交
234
void ExecutorThreadWorker::Train() {
W
wangguibao 已提交
235
  LOG(ERROR) << "begin to train";
W
wangguibao 已提交
236
  SetDevice();
W
wangguibao 已提交
237

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
  int inspect_var_num = inspect_var_names_.size();
  inspect_values_.clear();
  inspect_values_.resize(inspect_var_num, 0);

  local_reader_->WaitNextEpoch();
  int epoch = local_reader_->GetCurrentEpoch();

  LOG(ERROR) << "epoch: " << epoch;

  int batch_num = 1;

  while (true) {
    const char *file = local_reader_->PickOneFile();
    if (file == NULL) {
      break;
    }

    if (!local_reader_->SetFile(file)) {
      break;
    }

    while (true) {
      bool flag = local_reader_->ReadBatch();
      if (!flag) {
        break;
W
wangguibao 已提交
263
      }
264 265 266

      for (unsigned int i = 0; i < ops_.size(); ++i) {
        ops_[i]->Run(*thread_scope_, place_);
W
wangguibao 已提交
267
      }
268 269 270 271 272 273 274 275 276 277
      batch_num++;

      float avg_inspect = 0.0;
      for (int i = 0; i < inspect_var_num; ++i) {
        avg_inspect = thread_scope_->FindVar(inspect_var_names_[i])
                                   ->GetMutable<LoDTensor>()
                                   ->data<float>()[0];
        inspect_values_[i] += avg_inspect;
      }
      thread_scope_->DropKids();
W
wangguibao 已提交
278
    }
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308

    local_reader_->UpdateEpochNum();
    LOG(ERROR) << "memory used after epoch " << epoch + 1
               << " called: " << memory::memory_usage(place_);
  }

  for (int i = 0; i < inspect_var_num; ++i) {
    inspect_values_[i] /= batch_num;
    std::string var = inspect_var_names_[i].substr(
                          0,
                          inspect_var_names_[i].find_first_of("_"));
    LOG(ERROR) << "mean " << var.c_str()
               << " of epoch " << i + 1 << ": " << inspect_values_[i];
  }

  if (thread_id_ == 0) {
    char modelfile[1024];
    snprintf(&modelfile[0], sizeof(modelfile), "%s_epoch%d.model",
             model_prefix_.c_str(), epoch);
    std::string model_filename = std::string(modelfile);
    // this save_inference_model can only save imdbtask, should make this
    // general
    //
    // currently comment it
    LOG(ERROR) << "Going to save model " << modelfile;
    SaveModel(main_program_,
              thread_scope_,
              model_param_names_,
              model_filename,
              true);
W
wangguibao 已提交
309 310 311
  }
}

W
wangguibao 已提交
312 313
void ExecutorThreadWorker::SetThreadId(int tid) {
  thread_id_ = tid;
W
wangguibao 已提交
314 315
}

W
wangguibao 已提交
316 317
void ExecutorThreadWorker::SetPlace(const platform::Place& place) {
  place_ = place;
W
wangguibao 已提交
318 319
}

W
wangguibao 已提交
320
void ExecutorThreadWorker::SetMainProgram(
W
wangguibao 已提交
321
    const ProgramDesc& main_program_desc) {
W
wangguibao 已提交
322
  main_program_.reset(new ProgramDesc(main_program_desc));
W
wangguibao 已提交
323 324
}

W
wangguibao 已提交
325 326
void ExecutorThreadWorker::SetRootScope(Scope* g_scope) {
  root_scope_ = g_scope;
W
wangguibao 已提交
327 328
}

W
wangguibao 已提交
329 330
void ExecutorThreadWorker::SetMaxTrainingEpoch(int max_epoch) {
  max_epoch_ = max_epoch;
W
wangguibao 已提交
331 332
}

333 334 335 336 337 338 339 340 341 342 343 344 345 346
AsyncExecutor::AsyncExecutor(ProgramDesc& main_program,
                       const std::vector<std::string>& param_names,
                       TextClassDataFeed& data_feed,
                       unsigned int thread_num,
                       const platform::Place& place)
    : thread_num_(thread_num),
      place_(place),
      main_program_(main_program),
      data_feed_(data_feed) {
  model_param_names_.clear();
  model_param_names_.insert(model_param_names_.end(),
                            param_names.begin(),
                            param_names.end());
}
W
wangguibao 已提交
347

W
wangguibao 已提交
348
void AsyncExecutor::InitRootScope(Scope* scope) {
W
wangguibao 已提交
349
  root_scope_ = scope;
W
wangguibao 已提交
350 351
}

W
wangguibao 已提交
352
void AsyncExecutor::SetMaxTrainingEpoch(int max_epoch) {
W
wangguibao 已提交
353
  max_epoch_ = max_epoch;
W
wangguibao 已提交
354 355
}

W
wangguibao 已提交
356
void AsyncExecutor::SetModelPrefix(const std::string& model_prefix) {
W
wangguibao 已提交
357
  model_prefix_ = model_prefix;
W
wangguibao 已提交
358 359
}

W
wangguibao 已提交
360
void AsyncExecutor::RunStartupProgram(const ProgramDesc& program,
W
wangguibao 已提交
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
                                        Scope* scope) {
  auto& block = program.Block(0);
  for (auto& var : block.AllVars()) {
    if (var->Persistable()) {
      auto* ptr = scope->Var(var->Name());
      CreateTensor(ptr, var->GetType());
      // LOGERR("Persistable Var Name:%s", var->Name().c_str());
    }
  }

  std::map<std::string, int> param_dict;
  std::vector<OperatorBase *> ops;
  for (auto& op_desc : block.AllOps()) {
    std::vector<std::string> param_name_vec = op_desc->OutputArgumentNames();
    bool need_to_run = false;
    for (auto& name : param_name_vec) {
      if (param_dict.find(name) == param_dict.end()) {
        param_dict[name] = 1;
        need_to_run = true;
      }
    }
    if (need_to_run) {
      std::unique_ptr<OperatorBase> local_op = OpRegistry::CreateOp(*op_desc);
      OperatorBase* local_op_ptr = local_op.release();
      ops.push_back(local_op_ptr);
    }
  }
  // LOGERR("There are %d parameters in startup program, %d op needs to run",
  //        param_dict.size(), ops.size());

  for (auto& op : ops) {
W
wangguibao 已提交
392
    op->Run(*scope, place_);
W
wangguibao 已提交
393 394 395 396 397 398 399 400
  }
  // LOGERR("total time for startup program: %fs", timeline.elapsed_sec());
  for (auto& op : ops) {
    delete op;
  }
  // LOGERR("run startup program done.");
}

W
wangguibao 已提交
401
std::unique_ptr<ProgramDesc> AsyncExecutor::LoadDescFromFile(
W
wangguibao 已提交
402 403
    const std::string& f) {
  std::string program_desc_str;
W
wangguibao 已提交
404
  ReadBinaryFile(f, &program_desc_str);
W
wangguibao 已提交
405 406 407 408
  std::unique_ptr<ProgramDesc> program(new ProgramDesc(program_desc_str));
  return program;
}

409 410 411 412 413
void AsyncExecutor::SetInspectVarNames(
    const std::vector<std::string>& inspect_var_names) {
  inspect_var_names_.clear();
  inspect_var_names_.insert(inspect_var_names_.end(),
                            inspect_var_names.begin(), inspect_var_names.end());
W
wangguibao 已提交
414 415
}

W
wangguibao 已提交
416
void AsyncExecutor::PrepareThreads(const ProgramDesc& host_program) {
W
wangguibao 已提交
417
  workers_.resize(thread_num_);
418
  for (int i = 0; i < thread_num_; ++i) {
W
wangguibao 已提交
419 420 421 422 423 424 425
    workers_[i].reset(new ExecutorThreadWorker);
    workers_[i]->SetThreadId(i);
    workers_[i]->CreateThreadOperators(host_program);
    workers_[i]->SetRootScope(root_scope_);
    workers_[i]->SetPlace(place_);
    workers_[i]->SetMaxTrainingEpoch(max_epoch_);
    workers_[i]->CreateThreadScope(host_program);
426
    workers_[i]->SetInspectVarNames(inspect_var_names_);
W
wangguibao 已提交
427 428 429
    workers_[i]->SetModelParamNames(model_param_names_);
    workers_[i]->SetMainProgram(host_program);
    workers_[i]->SetModelPrefix(model_prefix_);
430
    //
W
wangguibao 已提交
431
    // new a datafeed here
432
    workers_[i]->SetDataFeed(data_feed_);
W
wangguibao 已提交
433
    workers_[i]->BindingDataFeedMemory();
W
wangguibao 已提交
434 435 436
  }
}

437 438 439 440 441
std::vector<float>& AsyncExecutor::Run(
    const std::vector<std::string>& inspect_var_names) {
  SetInspectVarNames(inspect_var_names);
  threads_.clear();

W
wangguibao 已提交
442
  // thread binding here?
443 444 445 446 447 448 449 450
  if (workers_initialized_ == false) {
    PrepareThreads(main_program_);
    workers_initialized_ = true;
  }

  for (int i = 0; i < thread_num_; ++i) {
    workers_[i]->Reset();
    workers_[i]->SetInspectVarNames(inspect_var_names);
W
wangguibao 已提交
451 452
    threads_.push_back(std::thread(&ExecutorThreadWorker::Train,
                      workers_[i].get()));
W
wangguibao 已提交
453 454
  }

W
wangguibao 已提交
455
  for (auto& th : threads_) {
W
wangguibao 已提交
456 457
    th.join();
  }
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478

  inspect_values_.clear();
  inspect_values_.resize(inspect_var_names_.size(), 0);


  std::vector<std::vector<float>*> inspect_value_vectors;
  inspect_value_vectors.resize(thread_num_);
  for (int i = 0; i < thread_num_; ++i) {
    inspect_value_vectors[i] = &workers_[i]->GetInspectValues();
  }

  for (unsigned int i = 0; i < inspect_var_names_.size(); ++i) {
    float value = 0.0;
    for (int j = 0; j < thread_num_; ++j) {
      value += inspect_value_vectors[j]->at(i);
    }
    value /= thread_num_;
    inspect_values_[i] = value;
  }

  return inspect_values_;
W
wangguibao 已提交
479 480
}

W
wangguibao 已提交
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
void AsyncExecutor::LoadInitModel() {
  auto place = paddle::platform::CPUPlace();
  auto* executor = new paddle::framework::Executor(place);

  std::string init_prog_file = model_path_ + "/" + init_prog_file_;
  std::string init_model_file = model_path_ + "/" + init_model_file_;

  struct stat stat_buf;

  if (stat(init_prog_file.c_str(), &stat_buf) == 0 &&
      S_ISREG(stat_buf.st_mode) &&
      stat(init_model_file.c_str(), &stat_buf) == 0 &&
      S_ISREG(stat_buf.st_mode)) {
    paddle::inference::Load(executor,
                          GetRootScope(),
                          model_path_ + "/" + init_prog_file_,
                          model_path_ + "/" + init_model_file_);
  }
}
}   // einit_modelnd namespace framework
W
wangguibao 已提交
501 502 503
}   // end namespace paddle

/* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */