未验证 提交 703487c6 编写于 作者: Z zhaocaibei123 提交者: GitHub

memory sparse table (#36909)

上级 41a09113
...@@ -37,7 +37,9 @@ set_source_files_properties(table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPI ...@@ -37,7 +37,9 @@ set_source_files_properties(table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPI
set_source_files_properties(sparse_sgd_rule.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(sparse_sgd_rule.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(ctr_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(ctr_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(memory_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_library(sparse_sgd_rule SRCS sparse_sgd_rule.cc DEPS ${TABLE_DEPS} ps_framework_proto) cc_library(sparse_sgd_rule SRCS sparse_sgd_rule.cc DEPS ${TABLE_DEPS} ps_framework_proto)
cc_library(ctr_accessor SRCS ctr_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) cc_library(ctr_accessor SRCS ctr_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule)
cc_library(memory_sparse_table SRCS memory_sparse_table.cc DEPS ps_framework_proto ${TABLE_DEPS} fs afs_wrapper ctr_accessor common_table)
cc_library(table SRCS table.cc DEPS common_table tensor_accessor tensor_table ps_framework_proto string_helper device_context gflags glog boost ctr_accessor) cc_library(table SRCS table.cc DEPS memory_sparse_table common_table tensor_accessor tensor_table ps_framework_proto string_helper device_context gflags glog boost)
...@@ -221,15 +221,6 @@ class DAdamD2Sum : public DenseOptimizer { ...@@ -221,15 +221,6 @@ class DAdamD2Sum : public DenseOptimizer {
void update(const float* update_values, size_t num, int begin, void update(const float* update_values, size_t num, int begin,
int end) override { int end) override {
auto update_numel = end - begin; auto update_numel = end - begin;
/*
// for debug
std::cout << "before update:\n";
for (int i = 0; i < 3; ++ i) {
std::cout << "param: " << i << " " << *(param+begin+i) <<
"grad: " << *(update_values+begin+i) << "\n";
}*/
std::vector<float> grad, grad2, scale; std::vector<float> grad, grad2, scale;
grad.resize(update_numel); grad.resize(update_numel);
grad2.resize(update_numel); grad2.resize(update_numel);
...@@ -240,57 +231,21 @@ class DAdamD2Sum : public DenseOptimizer { ...@@ -240,57 +231,21 @@ class DAdamD2Sum : public DenseOptimizer {
blas.VCOPY(update_numel, update_values + begin, grad.data()); blas.VCOPY(update_numel, update_values + begin, grad.data());
blas.VCOPY(update_numel, update_values + begin, grad2.data()); blas.VCOPY(update_numel, update_values + begin, grad2.data());
/*
for (int i = 0; i < end-begin; ++ i) {
std::cout << "copy grad: " << i << " " << *(grad.data()+begin+i) <<
"copy grad2: " << *(grad2.data()+begin+i) << "\n";
}
for (int i = 0; i < 3; ++ i) {
std::cout << "d2sum before: " << i << " " << *(ada_d2sum+begin+i) << "\n";
}*/
// d2sum // d2sum
blas.SCAL(update_numel, ada_decay_rate[0], ada_d2sum + begin); blas.SCAL(update_numel, ada_decay_rate[0], ada_d2sum + begin);
ADD<float>(update_numel, ada_d2sum + begin, 1, ada_d2sum + begin); ADD<float>(update_numel, ada_d2sum + begin, 1, ada_d2sum + begin);
/*
for (int i = 0; i < end-begin; ++ i) {
std::cout << "d2sum update: " << i << " " << *(ada_d2sum+begin+i) << "\n";
}
for (int i = 0; i < 3; ++ i) {
std::cout << "g2sum before: " << i << " " << *(ada_g2sum+begin+i) << "\n";
}*/
// g2sum // g2sum
blas.SCAL(update_numel, ada_decay_rate[0], ada_g2sum + begin); blas.SCAL(update_numel, ada_decay_rate[0], ada_g2sum + begin);
blas.VSQUARE(update_numel, grad2.data(), grad2.data()); blas.VSQUARE(update_numel, grad2.data(), grad2.data());
blas.VADD(update_numel, ada_g2sum + begin, grad2.data(), ada_g2sum + begin); blas.VADD(update_numel, ada_g2sum + begin, grad2.data(), ada_g2sum + begin);
/*
for (int i = 0; i < end-begin; ++ i) {
std::cout << "g2sum update: " << i << " " << *(ada_g2sum+begin+i) << "\n";
}
for (int i = 0; i < 3; ++ i) {
std::cout << "mom before: " << i << " " << *(mom_velocity+begin+i) <<
"\n";
}*/
// mom // mom
blas.SCAL(update_numel, mom_decay_rate[0], mom_velocity + begin); blas.SCAL(update_numel, mom_decay_rate[0], mom_velocity + begin);
blas.SCAL(update_numel, 1 - mom_decay_rate[0], grad.data()); blas.SCAL(update_numel, 1 - mom_decay_rate[0], grad.data());
blas.VADD(update_numel, mom_velocity + begin, grad.data(), blas.VADD(update_numel, mom_velocity + begin, grad.data(),
mom_velocity + begin); mom_velocity + begin);
/*
for (int i = 0; i < end-begin; ++ i) {
std::cout << "mom update: " << i << " " << *(mom_velocity+begin+i) <<
"\n";
}
for (int i = 0; i < 3; ++ i) {
std::cout << "scale before: " << i << " " << *(scale.data()+begin+i) <<
"\n";
}*/
// scale // scale
float* scale_ = scale.data(); float* scale_ = scale.data();
blas.VDIV(update_numel, ada_g2sum + begin, ada_d2sum + begin, scale_); blas.VDIV(update_numel, ada_g2sum + begin, ada_d2sum + begin, scale_);
...@@ -298,30 +253,13 @@ class DAdamD2Sum : public DenseOptimizer { ...@@ -298,30 +253,13 @@ class DAdamD2Sum : public DenseOptimizer {
DIV<float>(update_numel, 1 + ada_epsilon[0], scale_, scale_); DIV<float>(update_numel, 1 + ada_epsilon[0], scale_, scale_);
SQRT<float>(update_numel, scale_, scale_); SQRT<float>(update_numel, scale_, scale_);
/*
for (int i = 0; i < 3; ++ i) {
std::cout << "scale update: " << i << " " << *(scale.data()+begin+i) <<
"\n";
}*/
blas.SCAL(update_numel, learning_rate[0], scale_); blas.SCAL(update_numel, learning_rate[0], scale_);
// TODO(zhaocaibei123): check if there exists elementwise_multiply in blas // TODO(zhaocaibei123): check if there exists elementwise_multiply in blas
// TODO(zhaocaibei123): blas.VMUL // TODO(zhaocaibei123): blas.VMUL
ELE_MUL<float>(update_numel, scale_, mom_velocity + begin, scale_); ELE_MUL<float>(update_numel, scale_, mom_velocity + begin, scale_);
/*
for (int i = 0; i < 3; ++ i) {
std::cout << "scale update2: " << i << " " << *(scale.data()+begin+i) <<
"\n";
}*/
blas.VSUB(update_numel, param + begin, scale_, param + begin); blas.VSUB(update_numel, param + begin, scale_, param + begin);
/*
for (int i = 0; i < end-begin; ++ i) {
std::cout << "param update " << i << " " << *(param+begin+i) << "\n";
}*/
} }
float* learning_rate; float* learning_rate;
......
此差异已折叠。
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ThreadPool.h>
#include <assert.h>
#include <pthread.h>
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "Eigen/Dense"
#include "paddle/fluid/distributed/table/accessor.h"
#include "paddle/fluid/distributed/table/common_table.h"
#include "paddle/fluid/distributed/table/depends/feature_value.h"
#include "paddle/fluid/string/string_helper.h"
#define PSERVER_SAVE_SUFFIX ".shard"
namespace paddle {
namespace distributed {
class MemorySparseTable : public SparseTable {
public:
MemorySparseTable() {}
virtual ~MemorySparseTable() {}
// unused method begin
virtual int32_t pull_dense(float* pull_values, size_t num) { return 0; }
virtual int32_t push_dense_param(const float* values, size_t num) {
return 0;
}
virtual int32_t push_dense(const float* values, size_t num) { return 0; }
// unused method end
virtual int32_t initialize();
virtual int32_t initialize_shard() { return 0; }
virtual int32_t initialize_value();
virtual int32_t load(const std::string& path, const std::string& param);
virtual int32_t save(const std::string& path, const std::string& param);
int32_t load_local_fs(const std::string& path, const std::string& param);
int32_t save_local_fs(const std::string& path, const std::string& param,
const std::string& prefix);
virtual std::pair<int64_t, int64_t> print_table_stat();
virtual int32_t pull_sparse(float* values, const PullSparseValue& pull_value);
virtual int32_t pull_sparse_ptr(char** pull_values, const uint64_t* keys,
size_t num);
virtual int32_t push_sparse(const uint64_t* keys, const float* values,
size_t num);
virtual int32_t push_sparse(const uint64_t* keys, const float** values,
size_t num);
virtual int32_t flush();
virtual int32_t shrink(const std::string& param);
virtual void clear();
protected:
virtual int32_t _push_sparse(const uint64_t* keys, const float** values,
size_t num);
protected:
const int task_pool_size_ = 24;
size_t avg_local_shard_num_;
size_t real_local_shard_num_;
size_t sparse_table_shard_num_;
std::vector<std::shared_ptr<::ThreadPool>> shards_task_pool_;
std::vector<std::shared_ptr<SparseTableShard>> shard_values_;
};
} // namespace distributed
} // namespace paddle
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
#ifdef PADDLE_WITH_HETERPS #ifdef PADDLE_WITH_HETERPS
#include "paddle/fluid/distributed/table/ssd_sparse_table.h" #include "paddle/fluid/distributed/table/ssd_sparse_table.h"
#endif #endif
#include "paddle/fluid/distributed/table/ctr_accessor.h"
#include "paddle/fluid/distributed/table/memory_sparse_table.h"
#include "paddle/fluid/distributed/table/tensor_accessor.h" #include "paddle/fluid/distributed/table/tensor_accessor.h"
#include "paddle/fluid/distributed/table/tensor_table.h" #include "paddle/fluid/distributed/table/tensor_table.h"
...@@ -40,7 +42,13 @@ REGISTER_PSCORE_CLASS(Table, BarrierTable); ...@@ -40,7 +42,13 @@ REGISTER_PSCORE_CLASS(Table, BarrierTable);
REGISTER_PSCORE_CLASS(Table, TensorTable); REGISTER_PSCORE_CLASS(Table, TensorTable);
REGISTER_PSCORE_CLASS(Table, DenseTensorTable); REGISTER_PSCORE_CLASS(Table, DenseTensorTable);
REGISTER_PSCORE_CLASS(Table, GlobalStepTable); REGISTER_PSCORE_CLASS(Table, GlobalStepTable);
REGISTER_PSCORE_CLASS(Table, MemorySparseTable);
REGISTER_PSCORE_CLASS(ValueAccessor, CommMergeAccessor); REGISTER_PSCORE_CLASS(ValueAccessor, CommMergeAccessor);
REGISTER_PSCORE_CLASS(ValueAccessor, CtrCommonAccessor);
REGISTER_PSCORE_CLASS(SparseValueSGDRule, StdAdaGradSGDRule);
REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdamSGDRule);
REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseNaiveSGDRule);
REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdaGradSGDRule);
int32_t TableManager::initialize() { int32_t TableManager::initialize() {
static bool initialized = false; static bool initialized = false;
...@@ -58,6 +66,11 @@ int32_t Table::initialize(const TableParameter &config, ...@@ -58,6 +66,11 @@ int32_t Table::initialize(const TableParameter &config,
LOG(WARNING) << "Table accessor initialize failed"; LOG(WARNING) << "Table accessor initialize failed";
return -1; return -1;
} }
if (_afs_client.initialize(fs_config) != 0) {
LOG(WARNING) << "Table fs_client initialize failed";
// return -1;
}
return initialize(); return initialize();
} }
...@@ -67,6 +80,9 @@ int32_t Table::initialize_accessor() { ...@@ -67,6 +80,9 @@ int32_t Table::initialize_accessor() {
<< _config.table_id(); << _config.table_id();
return -1; return -1;
} }
LOG(INFO) << "accessor initializing: table_id: " << _config.table_id()
<< ", accessor_name: " << _config.accessor().accessor_class();
auto *accessor = CREATE_PSCORE_CLASS( auto *accessor = CREATE_PSCORE_CLASS(
ValueAccessor, ValueAccessor,
_config.accessor().accessor_class()) if (accessor == NULL) { _config.accessor().accessor_class()) if (accessor == NULL) {
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
#include "paddle/fluid/distributed/common/afs_warpper.h"
#include "paddle/fluid/distributed/table/accessor.h" #include "paddle/fluid/distributed/table/accessor.h"
#include "paddle/fluid/distributed/table/depends/sparse_utils.h" #include "paddle/fluid/distributed/table/depends/sparse_utils.h"
#include "paddle/fluid/distributed/table/graph/graph_node.h" #include "paddle/fluid/distributed/table/graph/graph_node.h"
...@@ -103,10 +104,10 @@ class Table { ...@@ -103,10 +104,10 @@ class Table {
virtual int32_t flush() = 0; virtual int32_t flush() = 0;
virtual int32_t shrink(const std::string &param) = 0; virtual int32_t shrink(const std::string &param) = 0;
//指定加载路径 // 指定加载路径
virtual int32_t load(const std::string &path, virtual int32_t load(const std::string &path,
const std::string &converter) = 0; const std::string &converter) = 0;
//指定保存路径 // 指定保存路径
virtual int32_t save(const std::string &path, virtual int32_t save(const std::string &path,
const std::string &converter) = 0; const std::string &converter) = 0;
...@@ -137,6 +138,7 @@ class Table { ...@@ -137,6 +138,7 @@ class Table {
TableParameter _config; TableParameter _config;
float *_global_lr = nullptr; float *_global_lr = nullptr;
std::shared_ptr<ValueAccessor> _value_accesor; std::shared_ptr<ValueAccessor> _value_accesor;
AfsClient _afs_client;
}; };
REGISTER_PSCORE_REGISTERER(Table); REGISTER_PSCORE_REGISTERER(Table);
......
...@@ -29,3 +29,6 @@ cc_test(sparse_sgd_rule_test SRCS sparse_sgd_rule_test.cc DEPS ${COMMON_DEPS} bo ...@@ -29,3 +29,6 @@ cc_test(sparse_sgd_rule_test SRCS sparse_sgd_rule_test.cc DEPS ${COMMON_DEPS} bo
set_source_files_properties(ctr_accessor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(ctr_accessor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(ctr_accessor_test SRCS ctr_accessor_test.cc DEPS ${COMMON_DEPS} boost table) cc_test(ctr_accessor_test SRCS ctr_accessor_test.cc DEPS ${COMMON_DEPS} boost table)
set_source_files_properties(memory_sparse_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(memory_sparse_table_test SRCS memory_sparse_table_test.cc DEPS ${COMMON_DEPS} boost table)
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <ThreadPool.h>
#include <unistd.h>
#include <string>
#include <thread> // NOLINT
#include "google/protobuf/text_format.h"
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/ps.pb.h"
#include "paddle/fluid/distributed/table/memory_sparse_table.h"
#include "paddle/fluid/distributed/table/table.h"
namespace paddle {
namespace distributed {
TEST(MemorySparseTable, SGD) {
int emb_dim = 8;
int trainers = 2;
TableParameter table_config;
table_config.set_table_class("MemorySparseTable");
table_config.set_shard_num(10);
FsClientParameter fs_config;
Table *table = new MemorySparseTable();
table->set_shard(0, 1);
TableAccessorParameter *accessor_config = table_config.mutable_accessor();
accessor_config->set_accessor_class("CtrCommonAccessor");
accessor_config->set_fea_dim(11);
accessor_config->set_embedx_dim(8);
accessor_config->set_embedx_threshold(5);
accessor_config->mutable_ctr_accessor_param()->set_nonclk_coeff(0.2);
accessor_config->mutable_ctr_accessor_param()->set_click_coeff(1);
accessor_config->mutable_ctr_accessor_param()->set_base_threshold(0.5);
accessor_config->mutable_ctr_accessor_param()->set_delta_threshold(0.2);
accessor_config->mutable_ctr_accessor_param()->set_delta_keep_days(16);
accessor_config->mutable_ctr_accessor_param()->set_show_click_decay_rate(
0.99);
accessor_config->mutable_embed_sgd_param()->set_name("SparseNaiveSGDRule");
auto *naive_param =
accessor_config->mutable_embed_sgd_param()->mutable_naive();
naive_param->set_learning_rate(0.1);
naive_param->set_initial_range(0.3);
naive_param->add_weight_bounds(-10.0);
naive_param->add_weight_bounds(10.0);
accessor_config->mutable_embedx_sgd_param()->set_name("SparseNaiveSGDRule");
naive_param = accessor_config->mutable_embedx_sgd_param()->mutable_naive();
naive_param->set_learning_rate(0.1);
naive_param->set_initial_range(0.3);
naive_param->add_weight_bounds(-10.0);
naive_param->add_weight_bounds(10.0);
auto ret = table->initialize(table_config, fs_config);
ASSERT_EQ(ret, 0);
// pull parameters for create and check
std::vector<uint64_t> init_keys = {0, 1, 2, 3, 4};
std::vector<uint32_t> init_fres = {1, 1, 1, 1, 1};
std::vector<float> init_values;
init_values.resize(init_keys.size() * (emb_dim + 1));
auto value = PullSparseValue(init_keys, init_fres, emb_dim);
table->pull_sparse(init_values.data(), value);
// for check
std::vector<float> total_gradients;
total_gradients.resize(init_keys.size() * (4 + emb_dim));
memset(total_gradients.data(), 0, sizeof(float) * total_gradients.size());
// push gradient
std::vector<std::vector<uint64_t>> trainer_keys;
std::vector<std::vector<float>> trainer_gradient_values;
trainer_keys.resize(trainers);
trainer_gradient_values.resize(trainers);
float start = 0.0;
for (int i = 0; i < trainers; i++) {
start = 0.0;
trainer_keys[i] = init_keys;
for (size_t j = 0; j < trainer_keys[i].size(); j++) {
auto id = trainer_keys[i][j];
for (int k = 0; k < emb_dim + 4; k++) {
trainer_gradient_values[i].push_back(start);
total_gradients[id * (emb_dim + 4) + k] += start;
start += 0.1;
}
}
}
std::shared_ptr<::ThreadPool> pool_ =
std::make_shared<::ThreadPool>(trainers);
std::vector<std::future<void>> task_status;
for (int i = 0; i < trainers; i++) {
auto &push_keys = trainer_keys[i];
auto &push_values = trainer_gradient_values[i];
auto task = [table, &push_keys, &push_values] {
table->push_sparse(push_keys.data(), push_values.data(),
push_keys.size());
};
task_status.push_back(pool_->enqueue(std::move(task)));
}
for (auto &status : task_status) {
status.wait();
}
std::vector<float> pull_values;
pull_values.resize(init_keys.size() * (emb_dim + 1));
table->pull_sparse(pull_values.data(), value);
for (size_t i = 0; i < init_keys.size(); ++i) {
for (size_t j = 0; j < emb_dim + 1; ++j) {
auto update_val = init_values[i * (emb_dim + 1) + j] -
0.1 * total_gradients[3 + i * (emb_dim + 4) + j];
VLOG(3) << total_gradients[i * (emb_dim + 4) + j + 3] << ":"
<< init_values[i * (emb_dim + 1) + j];
VLOG(3) << update_val << ": " << pull_values[i * (emb_dim + 1) + j];
}
}
MemorySparseTable *ctr_table = dynamic_cast<MemorySparseTable *>(table);
ctr_table->save_local_fs("./work/table.save", "0", "test");
}
} // namespace distributed
} // namespace paddle
cc_library(fs SRCS fs.cc DEPS string_helper glog boost enforce)
cc_library(shell SRCS shell.cc DEPS string_helper glog timer enforce) cc_library(shell SRCS shell.cc DEPS string_helper glog timer enforce)
cc_library(fs SRCS fs.cc DEPS string_helper glog boost enforce shell)
cc_test(test_fs SRCS test_fs.cc DEPS fs shell) cc_test(test_fs SRCS test_fs.cc DEPS fs shell)
if (WITH_CRYPTO) if (WITH_CRYPTO)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册