未验证 提交 8113c913 编写于 作者: W wangguanqun 提交者: GitHub

double accessor and show_scale (#41943)

* double accessor and show_scale

* double accessor and show_scale

* rename

* fix bug in pslib config

* add unittest
上级 d3f95e5a
...@@ -10,7 +10,7 @@ Table: for param storage and update ...@@ -10,7 +10,7 @@ Table: for param storage and update
ValueAccessor: for pull param and push gradient ValueAccessor: for pull param and push gradient
-----CtrCommonAccessor: pull/push value with show/click, float type -----CtrCommonAccessor: pull/push value with show/click, float type
-----DownpourCtrDoubleAccessor: same as CtrCommonAccessor, other than show/click with double type -----CtrDoubleAccessor: same as CtrCommonAccessor, other than show/click with double type
-----SparseAccessor: used for common embedding, pull value without show/click, push value with show/click -----SparseAccessor: used for common embedding, pull value without show/click, push value with show/click
-----CommMergeAccessor: used for dense table only, for get param dim -----CommMergeAccessor: used for dense table only, for get param dim
......
...@@ -42,8 +42,7 @@ set_source_files_properties(ctr_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUT ...@@ -42,8 +42,7 @@ set_source_files_properties(ctr_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUT
set_source_files_properties(sparse_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(sparse_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(memory_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(memory_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_library(sparse_sgd_rule SRCS sparse_sgd_rule.cc DEPS ${TABLE_DEPS} ps_framework_proto) cc_library(sparse_sgd_rule SRCS sparse_sgd_rule.cc DEPS ${TABLE_DEPS} ps_framework_proto)
cc_library(ctr_double_accessor SRCS ctr_double_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) cc_library(ctr_accessor SRCS ctr_accessor.cc ctr_double_accessor.cc sparse_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule)
cc_library(ctr_accessor SRCS ctr_accessor.cc sparse_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule)
cc_library(memory_sparse_table SRCS memory_sparse_table.cc DEPS ps_framework_proto ${TABLE_DEPS} fs afs_wrapper ctr_accessor common_table) cc_library(memory_sparse_table SRCS memory_sparse_table.cc DEPS ps_framework_proto ${TABLE_DEPS} fs afs_wrapper ctr_accessor common_table)
set_source_files_properties(memory_sparse_geo_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(memory_sparse_geo_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
......
...@@ -35,6 +35,10 @@ int CtrCommonAccessor::Initialize() { ...@@ -35,6 +35,10 @@ int CtrCommonAccessor::Initialize() {
common_feature_value.embedx_sgd_dim = _embedx_sgd_rule->Dim(); common_feature_value.embedx_sgd_dim = _embedx_sgd_rule->Dim();
_show_click_decay_rate = _config.ctr_accessor_param().show_click_decay_rate(); _show_click_decay_rate = _config.ctr_accessor_param().show_click_decay_rate();
if (_config.ctr_accessor_param().show_scale()) {
_show_scale = true;
}
InitAccessorInfo(); InitAccessorInfo();
return 0; return 0;
} }
...@@ -233,6 +237,11 @@ int32_t CtrCommonAccessor::Update(float** update_values, ...@@ -233,6 +237,11 @@ int32_t CtrCommonAccessor::Update(float** update_values,
push_click * _config.ctr_accessor_param().click_coeff(); push_click * _config.ctr_accessor_param().click_coeff();
update_value[common_feature_value.UnseenDaysIndex()] = 0; update_value[common_feature_value.UnseenDaysIndex()] = 0;
// TODO(zhaocaibei123): add configure show_scale // TODO(zhaocaibei123): add configure show_scale
if (!_show_scale) {
push_show = 1;
}
VLOG(3) << "accessor show scale:" << _show_scale
<< ", push_show:" << push_show;
_embed_sgd_rule->UpdateValue( _embed_sgd_rule->UpdateValue(
update_value + common_feature_value.EmbedWIndex(), update_value + common_feature_value.EmbedWIndex(),
update_value + common_feature_value.EmbedG2SumIndex(), update_value + common_feature_value.EmbedG2SumIndex(),
......
...@@ -24,9 +24,9 @@ ...@@ -24,9 +24,9 @@
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
class DownpourCtrDoubleAccessor : public ValueAccessor { class CtrDoubleAccessor : public ValueAccessor {
public: public:
struct DownpourCtrDoubleFeatureValue { struct CtrDoubleFeatureValue {
/* /*
float unseen_days; float unseen_days;
float delta_score; float delta_score;
...@@ -45,60 +45,56 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { ...@@ -45,60 +45,56 @@ class DownpourCtrDoubleAccessor : public ValueAccessor {
} }
static int UnseenDaysIndex() { return 0; } static int UnseenDaysIndex() { return 0; }
static int DeltaScoreIndex() { static int DeltaScoreIndex() {
return DownpourCtrDoubleFeatureValue::UnseenDaysIndex() + 1; return CtrDoubleFeatureValue::UnseenDaysIndex() + 1;
} }
static int ShowIndex() { static int ShowIndex() {
return DownpourCtrDoubleFeatureValue::DeltaScoreIndex() + 1; return CtrDoubleFeatureValue::DeltaScoreIndex() + 1;
} }
// show is double // show is double
static int ClickIndex() { static int ClickIndex() { return CtrDoubleFeatureValue::ShowIndex() + 2; }
return DownpourCtrDoubleFeatureValue::ShowIndex() + 2;
}
// click is double // click is double
static int EmbedWIndex() { static int EmbedWIndex() { return CtrDoubleFeatureValue::ClickIndex() + 2; }
return DownpourCtrDoubleFeatureValue::ClickIndex() + 2;
}
static int EmbedG2SumIndex() { static int EmbedG2SumIndex() {
return DownpourCtrDoubleFeatureValue::EmbedWIndex() + 1; return CtrDoubleFeatureValue::EmbedWIndex() + 1;
} }
static int SlotIndex() { static int SlotIndex() {
return DownpourCtrDoubleFeatureValue::EmbedG2SumIndex() + 1; return CtrDoubleFeatureValue::EmbedG2SumIndex() + 1;
} }
static int EmbedxG2SumIndex() { static int EmbedxG2SumIndex() {
return DownpourCtrDoubleFeatureValue::SlotIndex() + 1; return CtrDoubleFeatureValue::SlotIndex() + 1;
} }
static int EmbedxWIndex() { static int EmbedxWIndex() {
return DownpourCtrDoubleFeatureValue::EmbedxG2SumIndex() + 1; return CtrDoubleFeatureValue::EmbedxG2SumIndex() + 1;
} }
static float& UnseenDays(float* val) { static float& UnseenDays(float* val) {
return val[DownpourCtrDoubleFeatureValue::UnseenDaysIndex()]; return val[CtrDoubleFeatureValue::UnseenDaysIndex()];
} }
static float& DeltaScore(float* val) { static float& DeltaScore(float* val) {
return val[DownpourCtrDoubleFeatureValue::DeltaScoreIndex()]; return val[CtrDoubleFeatureValue::DeltaScoreIndex()];
} }
static double& Show(float* val) { static double& Show(float* val) {
return ((double*)(val + DownpourCtrDoubleFeatureValue::ShowIndex()))[0]; return ((double*)(val + CtrDoubleFeatureValue::ShowIndex()))[0];
} }
static double& Click(float* val) { static double& Click(float* val) {
return ((double*)(val + DownpourCtrDoubleFeatureValue::ClickIndex()))[0]; return ((double*)(val + CtrDoubleFeatureValue::ClickIndex()))[0];
} }
static float& Slot(float* val) { static float& Slot(float* val) {
return val[DownpourCtrDoubleFeatureValue::SlotIndex()]; return val[CtrDoubleFeatureValue::SlotIndex()];
} }
static float& EmbedW(float* val) { static float& EmbedW(float* val) {
return val[DownpourCtrDoubleFeatureValue::EmbedWIndex()]; return val[CtrDoubleFeatureValue::EmbedWIndex()];
} }
static float& EmbedG2Sum(float* val) { static float& EmbedG2Sum(float* val) {
return val[DownpourCtrDoubleFeatureValue::EmbedG2SumIndex()]; return val[CtrDoubleFeatureValue::EmbedG2SumIndex()];
} }
static float& EmbedxG2Sum(float* val) { static float& EmbedxG2Sum(float* val) {
return val[DownpourCtrDoubleFeatureValue::EmbedxG2SumIndex()]; return val[CtrDoubleFeatureValue::EmbedxG2SumIndex()];
} }
static float* EmbedxW(float* val) { static float* EmbedxW(float* val) {
return (val + DownpourCtrDoubleFeatureValue::EmbedxWIndex()); return (val + CtrDoubleFeatureValue::EmbedxWIndex());
} }
}; };
struct DownpourCtrDoublePushValue { struct CtrDoublePushValue {
/* /*
float slot; float slot;
float show; float show;
...@@ -110,35 +106,27 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { ...@@ -110,35 +106,27 @@ class DownpourCtrDoubleAccessor : public ValueAccessor {
static int DimSize(int dim, int embedx_dim) { return sizeof(float); } static int DimSize(int dim, int embedx_dim) { return sizeof(float); }
static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); } static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); }
static int SlotIndex() { return 0; } static int SlotIndex() { return 0; }
static int ShowIndex() { static int ShowIndex() { return CtrDoublePushValue::SlotIndex() + 1; }
return DownpourCtrDoublePushValue::SlotIndex() + 1; static int ClickIndex() { return CtrDoublePushValue::ShowIndex() + 1; }
} static int EmbedGIndex() { return CtrDoublePushValue::ClickIndex() + 1; }
static int ClickIndex() { static int EmbedxGIndex() { return CtrDoublePushValue::EmbedGIndex() + 1; }
return DownpourCtrDoublePushValue::ShowIndex() + 1;
}
static int EmbedGIndex() {
return DownpourCtrDoublePushValue::ClickIndex() + 1;
}
static int EmbedxGIndex() {
return DownpourCtrDoublePushValue::EmbedGIndex() + 1;
}
static float& Slot(float* val) { static float& Slot(float* val) {
return val[DownpourCtrDoublePushValue::SlotIndex()]; return val[CtrDoublePushValue::SlotIndex()];
} }
static float& Show(float* val) { static float& Show(float* val) {
return val[DownpourCtrDoublePushValue::ShowIndex()]; return val[CtrDoublePushValue::ShowIndex()];
} }
static float& Click(float* val) { static float& Click(float* val) {
return val[DownpourCtrDoublePushValue::ClickIndex()]; return val[CtrDoublePushValue::ClickIndex()];
} }
static float& EmbedG(float* val) { static float& EmbedG(float* val) {
return val[DownpourCtrDoublePushValue::EmbedGIndex()]; return val[CtrDoublePushValue::EmbedGIndex()];
} }
static float* EmbedxG(float* val) { static float* EmbedxG(float* val) {
return val + DownpourCtrDoublePushValue::EmbedxGIndex(); return val + CtrDoublePushValue::EmbedxGIndex();
} }
}; };
struct DownpourCtrDoublePullValue { struct CtrDoublePullValue {
/* /*
float show; float show;
float click; float click;
...@@ -153,20 +141,20 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { ...@@ -153,20 +141,20 @@ class DownpourCtrDoubleAccessor : public ValueAccessor {
static int EmbedWIndex() { return 2; } static int EmbedWIndex() { return 2; }
static int EmbedxWIndex() { return 3; } static int EmbedxWIndex() { return 3; }
static float& Show(float* val) { static float& Show(float* val) {
return val[DownpourCtrDoublePullValue::ShowIndex()]; return val[CtrDoublePullValue::ShowIndex()];
} }
static float& Click(float* val) { static float& Click(float* val) {
return val[DownpourCtrDoublePullValue::ClickIndex()]; return val[CtrDoublePullValue::ClickIndex()];
} }
static float& EmbedW(float* val) { static float& EmbedW(float* val) {
return val[DownpourCtrDoublePullValue::EmbedWIndex()]; return val[CtrDoublePullValue::EmbedWIndex()];
} }
static float* EmbedxW(float* val) { static float* EmbedxW(float* val) {
return val + DownpourCtrDoublePullValue::EmbedxWIndex(); return val + CtrDoublePullValue::EmbedxWIndex();
} }
}; };
DownpourCtrDoubleAccessor() {} CtrDoubleAccessor() {}
virtual ~DownpourCtrDoubleAccessor() {} virtual ~CtrDoubleAccessor() {}
virtual int Initialize(); virtual int Initialize();
// 初始化AccessorInfo // 初始化AccessorInfo
virtual void InitAccessorInfo(); virtual void InitAccessorInfo();
...@@ -182,7 +170,7 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { ...@@ -182,7 +170,7 @@ class DownpourCtrDoubleAccessor : public ValueAccessor {
// update delta_score and unseen_days after save // update delta_score and unseen_days after save
virtual void UpdateStatAfterSave(float* value, int param) override; virtual void UpdateStatAfterSave(float* value, int param) override;
// 判断该value是否保存到ssd // 判断该value是否保存到ssd
virtual bool save_ssd(float* value); virtual bool SaveSSD(float* value);
// virtual bool save_cache(float* value, int param, double // virtual bool save_cache(float* value, int param, double
// global_cache_threshold) override; // global_cache_threshold) override;
// keys不存在时,为values生成随机值 // keys不存在时,为values生成随机值
...@@ -206,14 +194,14 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { ...@@ -206,14 +194,14 @@ class DownpourCtrDoubleAccessor : public ValueAccessor {
virtual float GetField(float* value, const std::string& name) override { virtual float GetField(float* value, const std::string& name) override {
CHECK(name == "show"); CHECK(name == "show");
if (name == "show") { if (name == "show") {
return (float)DownpourCtrDoubleFeatureValue::Show(value); return (float)CtrDoubleFeatureValue::Show(value);
} }
return 0.0; return 0.0;
} }
// DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, show) // DEFINE_GET_INDEX(CtrDoubleFeatureValue, show)
// DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, click) // DEFINE_GET_INDEX(CtrDoubleFeatureValue, click)
// DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, embed_w) // DEFINE_GET_INDEX(CtrDoubleFeatureValue, embed_w)
// DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, embedx_w) // DEFINE_GET_INDEX(CtrDoubleFeatureValue, embedx_w)
private: private:
double ShowClickScore(double show, double click); double ShowClickScore(double show, double click);
...@@ -222,6 +210,7 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { ...@@ -222,6 +210,7 @@ class DownpourCtrDoubleAccessor : public ValueAccessor {
SparseValueSGDRule* _embedx_sgd_rule; SparseValueSGDRule* _embedx_sgd_rule;
float _show_click_decay_rate; float _show_click_decay_rate;
int32_t _ssd_unseenday_threshold; int32_t _ssd_unseenday_threshold;
bool _show_scale = false;
}; };
} // namespace distributed } // namespace distributed
} // namespace paddle } // namespace paddle
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "paddle/fluid/distributed/ps/table/memory_dense_table.h" #include "paddle/fluid/distributed/ps/table/memory_dense_table.h"
#include "paddle/fluid/distributed/ps/table/ctr_accessor.h" #include "paddle/fluid/distributed/ps/table/ctr_accessor.h"
#include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h"
#include "paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h" #include "paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h"
#include "paddle/fluid/distributed/ps/table/memory_sparse_table.h" #include "paddle/fluid/distributed/ps/table/memory_sparse_table.h"
#include "paddle/fluid/distributed/ps/table/sparse_accessor.h" #include "paddle/fluid/distributed/ps/table/sparse_accessor.h"
...@@ -39,6 +40,7 @@ REGISTER_PSCORE_CLASS(Table, MemorySparseTable); ...@@ -39,6 +40,7 @@ REGISTER_PSCORE_CLASS(Table, MemorySparseTable);
REGISTER_PSCORE_CLASS(Table, MemorySparseGeoTable); REGISTER_PSCORE_CLASS(Table, MemorySparseGeoTable);
REGISTER_PSCORE_CLASS(ValueAccessor, CommMergeAccessor); REGISTER_PSCORE_CLASS(ValueAccessor, CommMergeAccessor);
REGISTER_PSCORE_CLASS(ValueAccessor, CtrCommonAccessor); REGISTER_PSCORE_CLASS(ValueAccessor, CtrCommonAccessor);
REGISTER_PSCORE_CLASS(ValueAccessor, CtrDoubleAccessor);
REGISTER_PSCORE_CLASS(ValueAccessor, SparseAccessor); REGISTER_PSCORE_CLASS(ValueAccessor, SparseAccessor);
REGISTER_PSCORE_CLASS(SparseValueSGDRule, StdAdaGradSGDRule); REGISTER_PSCORE_CLASS(SparseValueSGDRule, StdAdaGradSGDRule);
REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdamSGDRule); REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdamSGDRule);
......
...@@ -153,6 +153,7 @@ message CtrAccessorParameter { ...@@ -153,6 +153,7 @@ message CtrAccessorParameter {
// will be delete in shrink_model // will be delete in shrink_model
optional int32 ssd_unseenday_threshold = 9 optional int32 ssd_unseenday_threshold = 9
[ default = 1 ]; // threshold to save ssd [ default = 1 ]; // threshold to save ssd
optional bool show_scale = 10 [ default = true ];
} }
message TensorAccessorParameter { message TensorAccessorParameter {
......
...@@ -258,6 +258,7 @@ message CtrAccessorParameter { ...@@ -258,6 +258,7 @@ message CtrAccessorParameter {
[ default = 0.8 ]; // threshold to shrink a feasign [ default = 0.8 ]; // threshold to shrink a feasign
optional float delete_after_unseen_days = 8 [ default = 30 ]; optional float delete_after_unseen_days = 8 [ default = 30 ];
optional int32 ssd_unseenday_threshold = 9 [ default = 1 ]; optional int32 ssd_unseenday_threshold = 9 [ default = 1 ];
optional bool show_scale = 10 [ default = true ];
} }
message TableAccessorSaveParameter { message TableAccessorSaveParameter {
......
...@@ -611,12 +611,15 @@ class DistributedStrategy(object): ...@@ -611,12 +611,15 @@ class DistributedStrategy(object):
"DownpourCtrAccessor") "DownpourCtrAccessor")
if accessor_class not in support_sparse_accessor_class: if accessor_class not in support_sparse_accessor_class:
raise ValueError( raise ValueError(
"support sparse_accessor_class: [''DownpourSparseValueAccessor', 'DownpourCtrAccessor', 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor'], but actual %s" "support sparse_accessor_class: ['DownpourSparseValueAccessor', 'DownpourCtrAccessor', 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor'], but actual %s"
% (accessor_class)) % (accessor_class))
if configs.get("use_cvm", True): if accessor_class.find("Double") >= 0:
table_data.accessor.accessor_class = 'CtrCommonAccessor' table_data.accessor.accessor_class = 'CtrDoubleAccessor'
else: else:
table_data.accessor.accessor_class = 'CtrCommonAccessor'
if not configs.get("use_cvm", True):
table_data.accessor.accessor_class = 'SparseAccessor' table_data.accessor.accessor_class = 'SparseAccessor'
table_data.accessor.embedx_dim = config.get('sparse_embedx_dim', 8) table_data.accessor.embedx_dim = config.get('sparse_embedx_dim', 8)
...@@ -624,6 +627,11 @@ class DistributedStrategy(object): ...@@ -624,6 +627,11 @@ class DistributedStrategy(object):
table_data.accessor.embedx_threshold = config.get( table_data.accessor.embedx_threshold = config.get(
'sparse_embedx_threshold', 10) 'sparse_embedx_threshold', 10)
if accessor_class == 'DownpourUnitAccessor':
table_data.accessor.ctr_accessor_param.show_scale = False
else:
table_data.accessor.ctr_accessor_param.show_scale = True
table_data.accessor.ctr_accessor_param.nonclk_coeff = config.get( table_data.accessor.ctr_accessor_param.nonclk_coeff = config.get(
'sparse_nonclk_coeff', 0.1) 'sparse_nonclk_coeff', 0.1)
table_data.accessor.ctr_accessor_param.click_coeff = config.get( table_data.accessor.ctr_accessor_param.click_coeff = config.get(
......
...@@ -310,9 +310,22 @@ class TestStrategyConfig(unittest.TestCase): ...@@ -310,9 +310,22 @@ class TestStrategyConfig(unittest.TestCase):
"embed_sparse_optimizer": "std_adagrad" "embed_sparse_optimizer": "std_adagrad"
} }
strategy.fleet_desc_configs = configs strategy.fleet_desc_configs = configs
self.assertEqual(strategy.sparse_table_configs[0]
.accessor.ctr_accessor_param.show_scale, False)
self.assertEqual(strategy.sparse_table_configs[0] self.assertEqual(strategy.sparse_table_configs[0]
.accessor.embed_sgd_param.adagrad.initial_range, 0) .accessor.embed_sgd_param.adagrad.initial_range, 0)
strategy = paddle.distributed.fleet.DistributedStrategy()
configs = {}
configs['emb'] = {
"sparse_accessor_class": "DownpourCtrDoubleAccessor",
"embed_sparse_optimizer": "std_adagrad"
}
strategy.fleet_desc_configs = configs
self.assertEqual(strategy.sparse_table_configs[0]
.accessor.embed_sgd_param.adagrad.initial_range,
0.0001)
def test_trainer_desc_configs(self): def test_trainer_desc_configs(self):
strategy = paddle.distributed.fleet.DistributedStrategy() strategy = paddle.distributed.fleet.DistributedStrategy()
configs = { configs = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册