From 7d4bdff07d2ea3d07ce69629d31235c9e1b8ff3b Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Mon, 4 Jan 2021 17:23:45 +0800 Subject: [PATCH] fix large scale memory (#30035) * memory holder optimize Change-Id: Ic91af8ac6f2853336d28a9fbbc5e8d0c57b5d05e * memory holder optimize Change-Id: I2fd1c14ecc17f5d5ce88b87890381ea801e6367f * fix large scale memory holder Change-Id: Ief0992b02b00220e16c72cc637a56e7b5788140f * fix large scale memory holder Change-Id: I910142a3952ead643a5604f8f80955f3e6efe655 --- .../distributed/table/common_sparse_table.cc | 145 ++++-------- .../distributed/table/common_sparse_table.h | 11 +- .../table/depends/large_scale_kv.h | 217 +++++++----------- .../fluid/distributed/table/depends/sparse.h | 157 ++++++------- 4 files changed, 219 insertions(+), 311 deletions(-) diff --git a/paddle/fluid/distributed/table/common_sparse_table.cc b/paddle/fluid/distributed/table/common_sparse_table.cc index ad7baa2524f..4f8afd3d256 100644 --- a/paddle/fluid/distributed/table/common_sparse_table.cc +++ b/paddle/fluid/distributed/table/common_sparse_table.cc @@ -114,18 +114,18 @@ void ProcessALine(const std::vector& columns, const Meta& meta, } int64_t SaveToText(std::ostream* os, std::shared_ptr block, - const std::vector& saved_names, const int mode) { for (auto value : block->values_) { - std::vector*> vss = value.second->get(saved_names); + auto* vs = value.second->data_.data(); std::stringstream ss; auto id = value.first; ss << id << "\t"; - for (int i = 0; i < static_cast(vss.size()); i++) { - auto& vs = vss[i]; - ss << paddle::string::join_strings((*vs), ','); - ss << "\t"; + + for (int i = 0; i < block->value_length_; i++) { + ss << vs[i]; + ss << ","; } + ss << "\n"; os->write(ss.str().c_str(), sizeof(char) * ss.str().size()); @@ -159,62 +159,13 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, std::vector> kvalues; ProcessALine(values, meta, &kvalues); - block->Init(id, &kvalues, 1); + // warning: need fix + block->Init(id); } return 0; } -void SaveShard(std::shared_ptr block, const std::string& dirname, - const CommonAccessorParameter& common, const int mode, - const int pserver_id, const int shard_id) { - auto varname = common.table_name(); - std::string var_store = string::Sprintf("%s/%s", dirname, varname); - VLOG(3) << "save " << varname << " in dir: " << var_store << " begin"; - MkDirRecursively(var_store.c_str()); - - std::string shard_var_pre = - string::Sprintf("%s.block%d.%d", varname, pserver_id, shard_id); - std::string meta_ = string::Sprintf("%s/%s.meta", var_store, shard_var_pre); - std::string value_ = string::Sprintf("%s/%s.txt", var_store, shard_var_pre); - - // save values - std::vector params(common.params().begin(), - common.params().end()); - std::unique_ptr value_out(new std::ofstream(value_)); - SaveToText(value_out.get(), block, params, mode); - // save meta - std::stringstream stream; - stream << "param=" << common.table_name() << "\n"; - stream << "server_id=" << pserver_id << "\n"; - stream << "shard_id=" << shard_id << "\n"; - stream << "row_names=" << paddle::string::join_strings(common.params(), ',') - << "\n"; - stream << "row_dims=" << paddle::string::join_strings(common.dims(), ',') - << "\n"; - stream << "count=" << block->values_.size() << "\n"; - std::unique_ptr meta_out(new std::ofstream(meta_)); - meta_out->write(stream.str().c_str(), sizeof(char) * stream.str().size()); - meta_out->close(); - VLOG(3) << "save " << varname << " in dir: " << var_store << " done"; -} - -void CommonSparseTable::create_initializer(const std::string& attr, - const std::string& name) { - auto slices = string::split_string(attr, "&"); - - if (slices[0] == "gaussian_random") { - initializers_[name] = new GaussianInitializer(slices); - } else if (slices[0] == "fill_constant") { - initializers_[name] = new FillConstantInitializer(slices); - } else if (slices[0] == "uniform_random") { - initializers_[name] = new UniformInitializer(slices); - } else { - PADDLE_THROW( - platform::errors::InvalidArgument("%s can not be supported", name)); - } -} - int32_t CommonSparseTable::initialize() { _shards_task_pool.resize(task_pool_size_); for (int i = 0; i < _shards_task_pool.size(); ++i) { @@ -224,31 +175,44 @@ int32_t CommonSparseTable::initialize() { sync = _config.common().sync(); VLOG(1) << "table " << _config.common().table_name() << " is sync: " << sync; - initialize_value(); - initialize_optimizer(); - initialize_recorder(); - return 0; -} - -int32_t CommonSparseTable::initialize_recorder() { return 0; } - -int32_t CommonSparseTable::initialize_value() { auto common = _config.common(); int size = static_cast(common.params().size()); + size_t offset = 0; for (int x = 0; x < size; ++x) { auto& varname = common.params()[x]; auto& dim = common.dims()[x]; + + value_idx_[varname] = x; + value_names_.push_back(varname); + value_dims_.push_back(dim); + value_offsets_.push_back(offset); + initializer_attrs_.push_back(common.initializers()[x]); + if (varname == "Param") { param_dim_ = dim; + param_offset_ = offset; } - auto& initializer = common.initializers()[x]; - create_initializer(initializer, varname); + + offset += dim; } + initialize_value(); + initialize_optimizer(); + initialize_recorder(); + return 0; +} + +int32_t CommonSparseTable::initialize_recorder() { return 0; } + +int32_t CommonSparseTable::initialize_value() { shard_values_.reserve(task_pool_size_); + for (int x = 0; x < task_pool_size_; ++x) { - auto shard = std::make_shared(common, &initializers_); + auto shard = + std::make_shared(value_names_, value_dims_, value_offsets_, + value_idx_, initializer_attrs_, "none"); + shard_values_.emplace_back(shard); } @@ -281,14 +245,16 @@ int32_t CommonSparseTable::initialize_value() { int32_t CommonSparseTable::initialize_optimizer() { auto common = _config.common(); auto name = common.name(); - auto attrs = common.attributes(); if (name == "sgd") { - optimizer_ = std::make_shared(common); + optimizer_ = std::make_shared(value_names_, value_dims_, + value_offsets_, value_idx_); } else if (name == "adam") { - optimizer_ = std::make_shared(common); + optimizer_ = std::make_shared(value_names_, value_dims_, + value_offsets_, value_idx_); } else if (name == "sum") { - optimizer_ = std::make_shared(common); + optimizer_ = std::make_shared(value_names_, value_dims_, + value_offsets_, value_idx_); } else { VLOG(0) << "init optimizer failed"; } @@ -330,8 +296,7 @@ int32_t CommonSparseTable::save(const std::string& dirname, int64_t total_ins = 0; for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) { // save values - total_ins += - SaveToText(value_out.get(), shard_values_[shard_id], params, mode); + total_ins += SaveToText(value_out.get(), shard_values_[shard_id], mode); } value_out->close(); @@ -391,10 +356,6 @@ int32_t CommonSparseTable::pour() { int32_t CommonSparseTable::pull_sparse(float* pull_values, const uint64_t* keys, size_t num) { rwlock_->RDLock(); - std::vector value_names; - for (auto name : _config.common().params()) { - value_names.push_back(name); - } std::vector> offset_bucket; offset_bucket.resize(task_pool_size_); @@ -408,20 +369,18 @@ int32_t CommonSparseTable::pull_sparse(float* pull_values, const uint64_t* keys, for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) { tasks[shard_id] = _shards_task_pool[shard_id]->enqueue( - [this, shard_id, &keys, &offset_bucket, &value_names, - &pull_values]() -> int { + [this, shard_id, &keys, &offset_bucket, &pull_values]() -> int { auto& block = shard_values_[shard_id]; auto& offsets = offset_bucket[shard_id]; for (int i = 0; i < offsets.size(); ++i) { auto offset = offsets[i]; auto id = keys[offset]; - block->InitFromInitializer(id, value_names); - auto values = block->Get(id, {"Param"}); - auto dim = values[0]->size(); - std::copy(values[0]->begin(), values[0]->end(), - pull_values + dim * offset); + auto* value = block->InitFromInitializer(id); + std::copy_n(value + param_offset_, param_dim_, + pull_values + param_dim_ * offset); } + return 0; }); } @@ -492,10 +451,6 @@ int32_t CommonSparseTable::push_sparse(const uint64_t* keys, int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys, const float* values, size_t num) { rwlock_->RDLock(); - std::vector value_names; - for (auto name : _config.common().params()) { - value_names.push_back(name); - } std::vector> offset_bucket; offset_bucket.resize(task_pool_size_); @@ -509,18 +464,16 @@ int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys, for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) { tasks[shard_id] = _shards_task_pool[shard_id]->enqueue( - [this, shard_id, &keys, &offset_bucket, &value_names, - &values]() -> int { + [this, shard_id, &keys, &offset_bucket, &values]() -> int { auto& block = shard_values_[shard_id]; auto& offsets = offset_bucket[shard_id]; for (int i = 0; i < offsets.size(); ++i) { auto offset = offsets[i]; auto id = keys[offset]; - block->InitFromInitializer(id, value_names); - auto values_ = block->Get(id, {"Param"}); - auto dim = values_[0]->size(); - std::copy_n(values + dim * offset, dim, values_[0]->data()); + auto* value = block->InitFromInitializer(id); + std::copy_n(values + param_dim_ * offset, param_dim_, + value + param_offset_); } return 0; }); diff --git a/paddle/fluid/distributed/table/common_sparse_table.h b/paddle/fluid/distributed/table/common_sparse_table.h index 6baf60a44c1..19199b682ac 100644 --- a/paddle/fluid/distributed/table/common_sparse_table.h +++ b/paddle/fluid/distributed/table/common_sparse_table.h @@ -50,8 +50,6 @@ class CommonSparseTable : public SparseTable { virtual int32_t initialize(); virtual int32_t initialize_shard() { return 0; } - virtual void create_initializer(const std::string& attr, - const std::string& name); virtual int32_t initialize_value(); virtual int32_t initialize_optimizer(); virtual int32_t initialize_recorder(); @@ -86,8 +84,15 @@ class CommonSparseTable : public SparseTable { bool sync = false; int param_dim_ = 0; + int param_offset_ = 0; + + std::unordered_map value_idx_; + std::vector value_names_; + std::vector value_dims_; + std::vector value_offsets_; + std::vector initializer_attrs_; + std::shared_ptr optimizer_; - std::unordered_map initializers_; std::vector> shard_values_; std::unordered_map> pull_reservoir_; std::unique_ptr rwlock_{nullptr}; diff --git a/paddle/fluid/distributed/table/depends/large_scale_kv.h b/paddle/fluid/distributed/table/depends/large_scale_kv.h index 8119cd03458..79a4c4700a9 100644 --- a/paddle/fluid/distributed/table/depends/large_scale_kv.h +++ b/paddle/fluid/distributed/table/depends/large_scale_kv.h @@ -67,100 +67,47 @@ inline bool entry(const int count, const float threshold) { } struct VALUE { - explicit VALUE(const std::vector &names) - : names_(names), count_(1), unseen_days_(0), seen_after_last_save_(true) { - values_.resize(names.size()); - for (int i = 0; i < static_cast(names.size()); i++) { - places[names[i]] = i; - } - } - - void set(std::vector> *values) { - values_ = std::move(*values); - } - - void set(const std::vector &inits, std::vector numels) { - for (int x = 0; x < numels.size(); ++x) { - auto &value = values_[x]; - value.resize(numels[x]); - inits[x]->GetValue(value.data(), numels[x]); - } - } - - void set(const std::vector &names, - const std::vector> &values) { - for (int i = 0; i < static_cast(names.size()); i++) { - auto idx = places[names[i]]; - auto value = values[i]; - values_[idx].assign(value.begin(), value.end()); - } - } - - std::vector *> get() { - auto pts = std::vector *>(); - pts.reserve(values_.size()); - - for (auto &value : values_) { - pts.push_back(&value); - } - return pts; + explicit VALUE(size_t length) + : length_(length), + count_(1), + unseen_days_(0), + seen_after_last_save_(true), + is_entry_(true) { + data_.resize(length); } - int fetch_count() { return ++count_; } - void reset_unseen_days() { unseen_days_ = 0; } - - void set_entry(bool is_entry) { is_entry_ = is_entry; } - - bool get_entry() { return is_entry_; } - - std::vector *> get(const std::vector names) { - auto pts = std::vector *>(); - pts.reserve(values_.size()); - - for (int i = 0; i < static_cast(names.size()); i++) { - pts.push_back(&(values_[places[names[i]]])); - } - return pts; - } - - std::vector names_; + size_t length_; + std::vector data_; int count_; int unseen_days_; bool seen_after_last_save_; bool is_entry_; - std::vector> values_; - std::unordered_map places; }; class ValueBlock { public: - explicit ValueBlock( - const CommonAccessorParameter &common, - std::unordered_map *initializers) { - initializers_ = initializers; - int size = static_cast(common.params().size()); - - for (int x = 0; x < size; ++x) { - auto varname = common.params()[x]; - auto dim = common.dims()[x]; - value_names_.push_back(varname); - value_dims_.push_back(dim); - } - - for (auto &name : value_names_) { - initializer_list_.emplace_back(initializers_->at(name)); + explicit ValueBlock(const std::vector &value_names, + const std::vector &value_dims, + const std::vector &value_offsets, + const std::unordered_map &value_idx, + const std::vector &init_attrs, + const std::string &entry_attr) + : value_names_(value_names), + value_dims_(value_dims), + value_offsets_(value_offsets), + value_idx_(value_idx) { + for (int x = 0; x < value_dims.size(); ++x) { + value_length_ += value_dims[x]; } // for Entry { - // entry will add later - std::string entry_attr = "none"; if (entry_attr == "none") { - has_entry = false; + has_entry_ = false; entry_func_ = std::bind(entry, std::placeholders::_1, "none"); } else { - has_entry = true; + has_entry_ = true; auto slices = string::split_string(entry_attr, "&"); if (slices[0] == "count_filter") { int threshold = std::stoi(slices[1]); @@ -172,85 +119,82 @@ class ValueBlock { } } } + + // for Initializer + { + for (auto &attr : init_attrs) { + auto slices = string::split_string(attr, "&"); + + if (slices[0] == "gaussian_random") { + initializers_.emplace_back( + std::make_shared(slices)); + } else if (slices[0] == "fill_constant") { + initializers_.emplace_back( + std::make_shared(slices)); + } else if (slices[0] == "uniform_random") { + initializers_.emplace_back( + std::make_shared(slices)); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "%s can not be supported", attr)); + } + } + } } ~ValueBlock() {} - void Init(const uint64_t &id, std::vector> *values, - int count) { - if (Has(id)) { - PADDLE_THROW(platform::errors::AlreadyExists("id already exist, error")); + float *Init(const uint64_t &id) { + auto value = std::make_shared(value_length_); + for (int x = 0; x < value_names_.size(); ++x) { + initializers_[x]->GetValue(value->data_.data() + value_offsets_[x], + value_dims_[x]); } - - if (values->size() != value_names_.size()) { - PADDLE_THROW( - platform::errors::AlreadyExists("values can not match, error")); - } - - auto value = new VALUE(value_names_); - value->set(values); - value->seen_after_last_save_ = true; - value->count_ = count; values_[id] = value; + return value->data_.data(); } - void Init(const uint64_t &id, const std::vector &inits, - int count) { - if (Has(id)) { - PADDLE_THROW(platform::errors::AlreadyExists("id already exist, error")); - } - - if (inits.size() != value_names_.size()) { - PADDLE_THROW( - platform::errors::AlreadyExists("values can not match, error")); + std::vector Get(const uint64_t &id, + const std::vector &value_names) { + auto pts = std::vector(); + pts.reserve(value_names.size()); + auto &values = values_.at(id); + for (int i = 0; i < static_cast(value_names.size()); i++) { + pts.push_back(values->data_.data() + + value_offsets_.at(value_idx_.at(value_names[i]))); } - - auto value = new VALUE(value_names_); - value->set(inits, value_dims_); - values_[id] = value; + return pts; } - std::vector *> Get( - const uint64_t &id, const std::vector &value_names) { - auto ret_values = values_.at(id)->get(value_names); - return ret_values; - } + float *Get(const uint64_t &id) { + auto pts = std::vector *>(); + auto &values = values_.at(id); - std::vector *> Get(const uint64_t &id) { - auto ret_values = values_.at(id)->get(value_names_); - return ret_values; + return values->data_.data(); } - void InitFromInitializer(const uint64_t &id, - const std::vector &value_names) { + float *InitFromInitializer(const uint64_t &id) { if (Has(id)) { - if (has_entry) { + if (has_entry_) { Update(id); } - return; + return Get(id); } - Init(id, initializer_list_, 1); + return Init(id); } bool GetEntry(const uint64_t &id) { auto value = values_.at(id); - auto entry = value->get_entry(); - return entry; - } - - void Set(const uint64_t &id, const std::vector &value_names, - const std::vector> &values) { - auto value = values_.at(id); - value->set(value_names, values); + return value->is_entry_; } void Update(const uint64_t id) { - auto *value = values_.at(id); - value->reset_unseen_days(); - auto count = value->fetch_count(); + auto value = values_.at(id); + value->unseen_days_ = 0; + auto count = ++value->count_; - if (!value->get_entry()) { - value->set_entry(entry_func_(count)); + if (!value->is_entry_) { + value->is_entry_ = entry_func_(count); } } @@ -265,15 +209,18 @@ class ValueBlock { } public: - std::unordered_map values_; + std::unordered_map> values_; + size_t value_length_ = 0; private: - bool has_entry = false; - std::vector value_names_; - std::vector value_dims_; + const std::vector &value_names_; + const std::vector &value_dims_; + const std::vector &value_offsets_; + const std::unordered_map &value_idx_; + + bool has_entry_ = false; std::function entry_func_; - std::unordered_map *initializers_; - std::vector initializer_list_; + std::vector> initializers_; }; } // namespace distributed diff --git a/paddle/fluid/distributed/table/depends/sparse.h b/paddle/fluid/distributed/table/depends/sparse.h index 5d992a4c4f0..f98057f9867 100644 --- a/paddle/fluid/distributed/table/depends/sparse.h +++ b/paddle/fluid/distributed/table/depends/sparse.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -30,25 +31,38 @@ namespace distributed { class SparseOptimizer { public: - SparseOptimizer() {} - explicit SparseOptimizer(const CommonAccessorParameter& common) {} + explicit SparseOptimizer( + const std::vector& value_names, + const std::vector& value_dims, const std::vector& value_offsets, + const std::unordered_map& value_idx) + : value_names_(value_names), + value_dims_(value_dims), + value_offsets_(value_offsets), + value_idx_(value_idx) {} + virtual void update(const uint64_t* keys, const float* update_values, size_t num, const std::vector& offsets, ValueBlock* block) = 0; + + const std::vector& value_names_; + const std::vector& value_dims_; + const std::vector& value_offsets_; + const std::unordered_map& value_idx_; + int param_offset = 0; + int update_numel = 0; }; // sum calc for sparse tensor class SSUM : public SparseOptimizer { public: - SSUM(){}; - explicit SSUM(const CommonAccessorParameter& common) { - auto& names = common.params(); - for (int x = 0; x < static_cast(names.size()); ++x) { - if (names[x] == "Param") { - param_idx = x; - update_numel = common.dims()[x]; - } - } + explicit SSUM(const std::vector& value_names, + const std::vector& value_dims, + const std::vector& value_offsets, + const std::unordered_map& value_idx) + : SparseOptimizer(value_names, value_dims, value_offsets, value_idx) { + auto idx = value_idx.at("Param"); + param_offset = value_offsets.at(idx); + update_numel = value_dims.at(idx); } void update(const uint64_t* keys, const float* update_values, size_t num, @@ -57,35 +71,27 @@ class SSUM : public SparseOptimizer { auto blas = GetBlas(); for (auto x : offsets) { auto id = keys[x]; - auto values = block->Get(id); - float* param = values[param_idx]->data(); - - std::vector delta; - delta.resize(update_numel); - blas.VCOPY(update_numel, update_values + x * update_numel, delta.data()); - blas.VADD(update_numel, delta.data(), param, param); + auto* value = block->Get(id); + float* param = value + param_offset; + blas.VADD(update_numel, update_values + x * update_numel, param, param); } } - - int param_idx; - int update_numel; }; // sgd optimzer for sparse tensor class SSGD : public SparseOptimizer { public: - SSGD(){}; - explicit SSGD(const CommonAccessorParameter& common) { - auto& names = common.params(); - for (int x = 0; x < static_cast(names.size()); ++x) { - if (names[x] == "LearningRate") { - learning_rate_idx = x; - } - if (names[x] == "Param") { - param_idx = x; - update_numel = common.dims()[x]; - } - } + explicit SSGD(const std::vector& value_names, + const std::vector& value_dims, + const std::vector& value_offsets, + const std::unordered_map& value_idx) + : SparseOptimizer(value_names, value_dims, value_offsets, value_idx) { + auto idx = value_idx.at("Param"); + param_offset = value_offsets.at(idx); + update_numel = value_dims.at(idx); + + idx = value_idx.at("LearningRate"); + lr_offset = value_offsets.at(idx); } void update(const uint64_t* keys, const float* update_values, size_t num, @@ -94,9 +100,10 @@ class SSGD : public SparseOptimizer { auto blas = GetBlas(); for (auto x : offsets) { auto id = keys[x]; - auto values = block->Get(id); - float* learning_rate = values[learning_rate_idx]->data(); - float* param = values[param_idx]->data(); + auto* value = block->Get(id); + + float* learning_rate = value + lr_offset; + float* param = value + param_offset; std::vector grads; grads.resize(update_numel); @@ -106,38 +113,35 @@ class SSGD : public SparseOptimizer { } } - int learning_rate_idx; - int param_idx; - int update_numel; + int lr_offset; }; // adam optimzer for sparse tensor class SAdam : public SparseOptimizer { public: - SAdam() {} - explicit SAdam(const CommonAccessorParameter& common) { - auto& names = common.params(); - for (int x = 0; x < static_cast(names.size()); ++x) { - if (names[x] == "LearningRate") { - learning_rate_idx = x; - } - if (names[x] == "Param") { - param_idx = x; - update_numel = common.dims()[x]; - } - if (names[x] == "Moment1") { - moment1_idx = x; - } - if (names[x] == "Moment2") { - moment2_idx = x; - } - if (names[x] == "Beta1Pow") { - beta1_pow_idx = x; - } - if (names[x] == "Beta2Pow") { - beta2_pow_idx = x; - } - } + explicit SAdam(const std::vector& value_names, + const std::vector& value_dims, + const std::vector& value_offsets, + const std::unordered_map& value_idx) + : SparseOptimizer(value_names, value_dims, value_offsets, value_idx) { + auto idx = value_idx.at("Param"); + param_offset = value_offsets.at(idx); + update_numel = value_dims.at(idx); + + idx = value_idx.at("LearningRate"); + lr_offset = value_offsets.at(idx); + + idx = value_idx.at("Moment1"); + m1_offset = value_offsets.at(idx); + + idx = value_idx.at("Moment2"); + m2_offset = value_offsets.at(idx); + + idx = value_idx.at("Beta1Pow"); + beta1_pow_offset = value_offsets.at(idx); + + idx = value_idx.at("Beta2Pow"); + beta2_pow_offset = value_offsets.at(idx); // add attr later beta1 = 0.9; @@ -151,13 +155,13 @@ class SAdam : public SparseOptimizer { auto blas = GetBlas(); for (auto x : offsets) { auto id = keys[x]; - auto values = block->Get(id); - float* learning_rate = values[learning_rate_idx]->data(); - float* param = values[param_idx]->data(); - float* moment1 = values[moment1_idx]->data(); - float* moment2 = values[moment2_idx]->data(); - float* beta1_pow = values[beta1_pow_idx]->data(); - float* beta2_pow = values[beta2_pow_idx]->data(); + auto* values = block->Get(id); + float* learning_rate = values + lr_offset; + float* param = values + param_offset; + float* moment1 = values + m1_offset; + float* moment2 = values + m2_offset; + float* beta1_pow = values + beta1_pow_offset; + float* beta2_pow = values + beta2_pow_offset; beta1_pow[0] = beta1_pow[0] * beta1; beta2_pow[0] = beta2_pow[0] * beta2; @@ -194,16 +198,15 @@ class SAdam : public SparseOptimizer { } } - int learning_rate_idx; - int param_idx; - int moment1_idx; - int moment2_idx; - int beta1_pow_idx; - int beta2_pow_idx; + int lr_offset; + int m1_offset; + int m2_offset; + int beta1_pow_offset; + int beta2_pow_offset; + float beta1; float beta2; float epsilon; - int update_numel; }; } // namespace distributed -- GitLab