From 0075a7b00075dbc25c7a8a879c0d05d604f960c2 Mon Sep 17 00:00:00 2001 From: seiriosPlus Date: Tue, 22 Sep 2020 18:08:30 +0800 Subject: [PATCH] add save delta for large scale kv --- .../operators/distributed/large_scale_kv.h | 67 +++++++++++-------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/operators/distributed/large_scale_kv.h b/paddle/fluid/operators/distributed/large_scale_kv.h index 9e39e68cba7..cb59fbc66e1 100644 --- a/paddle/fluid/operators/distributed/large_scale_kv.h +++ b/paddle/fluid/operators/distributed/large_scale_kv.h @@ -246,6 +246,7 @@ struct VALUE { std::vector names_; int count_; + bool seen_after_save_; int unseen_days_; bool is_entry_; std::vector> values_; @@ -322,6 +323,7 @@ class ValueBlock { auto value = new VALUE(value_names_); value->set(values); + value->seen_after_save_ = true; value->count_ = count; values_[id] = value; } @@ -590,9 +592,9 @@ class SparseVariable { } } - void Save(const std::string &dirname) { + void Save(const std::string &dirname, const int mode = 0) { rwlock_->WRLock(); - VLOG(1) << "save " << meta_.name << " in dir: " << dirname << " begin"; + VLOG(3) << "save " << meta_.name << " in dir: " << dirname << " begin"; MkDirRecursively(dirname.c_str()); @@ -601,22 +603,15 @@ class SparseVariable { auto filename = string::Sprintf("%s/%s", dirname, value_name); filenames.push_back(filename); } - SaveToSelectedRows(filenames, meta_.value_names); - // // save sparse to text - // std::vector txt_filenames; - // for (auto &value_name : meta_.value_names) { - // auto filename = string::Sprintf("%s/%s.txt", dirname, value_name); - // txt_filenames.push_back(filename); - // } - // SaveToText(txt_filenames, meta_.value_names); - - VLOG(1) << "save " << meta_.name << " in dir: " << dirname << " done"; + SaveToSelectedRows(filenames, meta_.value_names, mode); + VLOG(3) << "save " << meta_.name << " in dir: " << dirname << " done"; rwlock_->UNLock(); } void SaveToSelectedRows(const std::vector &filenames, - const std::vector &valuenames) { + const std::vector &valuenames, + const int mode) { for (auto &value_name : valuenames) { auto it = std::find(meta_.value_names.begin(), meta_.value_names.end(), value_name); @@ -630,14 +625,30 @@ class SparseVariable { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); - int64_t ids_num = 0; + std::vector ids; + for (auto &block : shard_blocks_) { - ids_num += block->values_.size(); + for (auto value : block->values_) { + bool id_need_save = false; + // save all params + if (mode == 0) { + id_need_save = true; + } else { + id_need_save = value.second.seen_after_save_; + } + + if (id_need_save) { + ids.push_back(value.first); + } + value.second.seen_after_save_ = false; + } } + VLOG(3) << "save " << ids.size() << " feasigns for " << meta_.name + << " with mode: " << mode; + std::vector> variables; std::vector tensors; - std::vector ids; std::vector dims; for (int i = 0; i < static_cast(filenames.size()); i++) { @@ -646,7 +657,7 @@ class SparseVariable { auto *slr = var->GetMutable(); auto *src_t = slr->mutable_value(); - src_t->Resize({ids_num, dim}); + src_t->Resize({ids.size(), dim}); auto *value = src_t->mutable_data(place); dims.push_back(dim); @@ -654,20 +665,18 @@ class SparseVariable { tensors.push_back(value); } - int64_t offset = 0; - for (auto &block : shard_blocks_) { - for (auto value : block->values_) { - ids.push_back(value.first); - std::vector *> vss = value.second->get(valuenames); - - for (int i = 0; i < static_cast(vss.size()); i++) { - auto &vs = vss[i]; - std::memcpy(tensors[i] + offset * dims[i], vs->data(), - sizeof(float) * dims[i]); - } + std::vector *>> *values; + Get(ids, variables, values); - offset += 1; + int64_t offset = 0; + for (auto *value : values) { + auto vss = value; + for (int i = 0; i < static_cast(vss.size()); i++) { + auto &vs = vss[i]; + std::memcpy(tensors[i] + offset * dims[i], vs->data(), + sizeof(float) * dims[i]); } + offset += 1; } for (auto &var : variables) { -- GitLab