From efaef31a60500c8c618342bb77714dc973ff4939 Mon Sep 17 00:00:00 2001 From: wangguanqun Date: Thu, 21 Apr 2022 16:08:54 +0800 Subject: [PATCH] double accessor and show_scale (#41943) (#42014) * double accessor and show_scale * double accessor and show_scale * rename * fix bug in pslib config * add unittest --- paddle/fluid/distributed/ps/README.md | 2 +- .../fluid/distributed/ps/table/CMakeLists.txt | 3 +- .../distributed/ps/table/ctr_accessor.cc | 9 + .../ps/table/ctr_double_accessor.cc | 217 +++++++++--------- .../ps/table/ctr_double_accessor.h | 97 ++++---- paddle/fluid/distributed/ps/table/table.cc | 2 + paddle/fluid/distributed/the_one_ps.proto | 1 + .../framework/distributed_strategy.proto | 1 + .../fleet/base/distributed_strategy.py | 14 +- .../test_fleet_distributed_strategy.py | 13 ++ 10 files changed, 192 insertions(+), 167 deletions(-) diff --git a/paddle/fluid/distributed/ps/README.md b/paddle/fluid/distributed/ps/README.md index afa6d60a4e0..728ab0ddae2 100755 --- a/paddle/fluid/distributed/ps/README.md +++ b/paddle/fluid/distributed/ps/README.md @@ -10,7 +10,7 @@ Table: for param storage and update ValueAccessor: for pull param and push gradient -----CtrCommonAccessor: pull/push value with show/click, float type ------DownpourCtrDoubleAccessor: same as CtrCommonAccessor, other than show/click with double type +-----CtrDoubleAccessor: same as CtrCommonAccessor, other than show/click with double type -----SparseAccessor: used for common embedding, pull value without show/click, push value with show/click -----CommMergeAccessor: used for dense table only, for get param dim diff --git a/paddle/fluid/distributed/ps/table/CMakeLists.txt b/paddle/fluid/distributed/ps/table/CMakeLists.txt index aebe36b5e04..bb6725b0842 100644 --- a/paddle/fluid/distributed/ps/table/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/table/CMakeLists.txt @@ -42,8 +42,7 @@ set_source_files_properties(ctr_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUT set_source_files_properties(sparse_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(memory_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_library(sparse_sgd_rule SRCS sparse_sgd_rule.cc DEPS ${TABLE_DEPS} ps_framework_proto) -cc_library(ctr_double_accessor SRCS ctr_double_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) -cc_library(ctr_accessor SRCS ctr_accessor.cc sparse_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) +cc_library(ctr_accessor SRCS ctr_accessor.cc ctr_double_accessor.cc sparse_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) cc_library(memory_sparse_table SRCS memory_sparse_table.cc DEPS ps_framework_proto ${TABLE_DEPS} fs afs_wrapper ctr_accessor common_table) set_source_files_properties(memory_sparse_geo_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) diff --git a/paddle/fluid/distributed/ps/table/ctr_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_accessor.cc index 4446c8297c5..715abe270e5 100644 --- a/paddle/fluid/distributed/ps/table/ctr_accessor.cc +++ b/paddle/fluid/distributed/ps/table/ctr_accessor.cc @@ -35,6 +35,10 @@ int CtrCommonAccessor::Initialize() { common_feature_value.embedx_sgd_dim = _embedx_sgd_rule->Dim(); _show_click_decay_rate = _config.ctr_accessor_param().show_click_decay_rate(); + if (_config.ctr_accessor_param().show_scale()) { + _show_scale = true; + } + InitAccessorInfo(); return 0; } @@ -233,6 +237,11 @@ int32_t CtrCommonAccessor::Update(float** update_values, push_click * _config.ctr_accessor_param().click_coeff(); update_value[common_feature_value.UnseenDaysIndex()] = 0; // TODO(zhaocaibei123): add configure show_scale + if (!_show_scale) { + push_show = 1; + } + VLOG(3) << "accessor show scale:" << _show_scale + << ", push_show:" << push_show; _embed_sgd_rule->UpdateValue( update_value + common_feature_value.EmbedWIndex(), update_value + common_feature_value.EmbedG2SumIndex(), diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc index 740b03a84e4..f0d9426343d 100644 --- a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc +++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc @@ -20,7 +20,7 @@ namespace paddle { namespace distributed { -int DownpourCtrDoubleAccessor::Initialize() { +int CtrDoubleAccessor::Initialize() { auto name = _config.embed_sgd_param().name(); _embed_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name); _embed_sgd_rule->LoadConfig(_config.embed_sgd_param(), 1); @@ -34,14 +34,18 @@ int DownpourCtrDoubleAccessor::Initialize() { _ssd_unseenday_threshold = _config.ctr_accessor_param().ssd_unseenday_threshold(); + if (_config.ctr_accessor_param().show_scale()) { + _show_scale = true; + } + InitAccessorInfo(); return 0; } -void DownpourCtrDoubleAccessor::InitAccessorInfo() { +void CtrDoubleAccessor::InitAccessorInfo() { auto embedx_dim = _config.embedx_dim(); - _accessor_info.dim = DownpourCtrDoubleFeatureValue::Dim(embedx_dim); - _accessor_info.size = DownpourCtrDoubleFeatureValue::Size(embedx_dim); + _accessor_info.dim = CtrDoubleFeatureValue::Dim(embedx_dim); + _accessor_info.size = CtrDoubleFeatureValue::Size(embedx_dim); _accessor_info.select_dim = 3 + embedx_dim; _accessor_info.select_size = _accessor_info.select_dim * sizeof(float); _accessor_info.update_dim = 4 + embedx_dim; @@ -49,7 +53,7 @@ void DownpourCtrDoubleAccessor::InitAccessorInfo() { _accessor_info.mf_size = (embedx_dim + 1) * sizeof(float); } -bool DownpourCtrDoubleAccessor::Shrink(float* value) { +bool CtrDoubleAccessor::Shrink(float* value) { // auto base_threshold = _config.ctr_accessor_param().base_threshold(); // auto delta_threshold = _config.ctr_accessor_param().delta_threshold(); // auto delete_threshold = _config.ctr_accessor_param().delete_threshold(); @@ -59,38 +63,37 @@ bool DownpourCtrDoubleAccessor::Shrink(float* value) { _config.ctr_accessor_param().delete_after_unseen_days(); auto delete_threshold = _config.ctr_accessor_param().delete_threshold(); // time_decay first - DownpourCtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate; - DownpourCtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate; + CtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate; + CtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate; // shrink after - auto score = ShowClickScore(DownpourCtrDoubleFeatureValue::Show(value), - DownpourCtrDoubleFeatureValue::Click(value)); - auto unseen_days = DownpourCtrDoubleFeatureValue::UnseenDays(value); + auto score = ShowClickScore(CtrDoubleFeatureValue::Show(value), + CtrDoubleFeatureValue::Click(value)); + auto unseen_days = CtrDoubleFeatureValue::UnseenDays(value); if (score < delete_threshold || unseen_days > delete_after_unseen_days) { return true; } return false; } -bool DownpourCtrDoubleAccessor::save_ssd(float* value) { - if (DownpourCtrDoubleFeatureValue::UnseenDays(value) > - _ssd_unseenday_threshold) { +bool CtrDoubleAccessor::SaveSSD(float* value) { + if (CtrDoubleFeatureValue::UnseenDays(value) > _ssd_unseenday_threshold) { return true; } return false; } -// bool DownpourCtrDoubleAccessor::save_cache( +// bool CtrDoubleAccessor::save_cache( // float* value, int param, double global_cache_threshold) { // auto base_threshold = _config.ctr_accessor_param().base_threshold(); // auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days(); -// if (ShowClickScore(DownpourCtrDoubleFeatureValue::Show(value), -// DownpourCtrDoubleFeatureValue::Click(value)) >= base_threshold -// && DownpourCtrDoubleFeatureValue::UnseenDays(value) <= +// if (ShowClickScore(CtrDoubleFeatureValue::Show(value), +// CtrDoubleFeatureValue::Click(value)) >= base_threshold +// && CtrDoubleFeatureValue::UnseenDays(value) <= // delta_keep_days) { -// return DownpourCtrDoubleFeatureValue::Show(value) > +// return CtrDoubleFeatureValue::Show(value) > // global_cache_threshold; // } // return false; // } -bool DownpourCtrDoubleAccessor::Save(float* value, int param) { +bool CtrDoubleAccessor::Save(float* value, int param) { // auto base_threshold = _config.ctr_accessor_param().base_threshold(); // auto delta_threshold = _config.ctr_accessor_param().delta_threshold(); // auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days(); @@ -109,14 +112,14 @@ bool DownpourCtrDoubleAccessor::Save(float* value, int param) { case 1: // save xbox base case 2: { - if (ShowClickScore(DownpourCtrDoubleFeatureValue::Show(value), - DownpourCtrDoubleFeatureValue::Click(value)) >= + if (ShowClickScore(CtrDoubleFeatureValue::Show(value), + CtrDoubleFeatureValue::Click(value)) >= base_threshold && - DownpourCtrDoubleFeatureValue::DeltaScore(value) >= delta_threshold && - DownpourCtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) { + CtrDoubleFeatureValue::DeltaScore(value) >= delta_threshold && + CtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) { // do this after save, because it must not be modified when retry if (param == 2) { - DownpourCtrDoubleFeatureValue::DeltaScore(value) = 0; + CtrDoubleFeatureValue::DeltaScore(value) = 0; } return true; } else { @@ -125,10 +128,10 @@ bool DownpourCtrDoubleAccessor::Save(float* value, int param) { } // already decayed in shrink case 3: { - // DownpourCtrFeatureValue::Show(value) *= _show_click_decay_rate; - // DownpourCtrFeatureValue::Click(value) *= _show_click_decay_rate; + // CtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate; + // CtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate; // do this after save, because it must not be modified when retry - // DownpourCtrDoubleFeatureValue::UnseenDays(value)++; + // CtrDoubleFeatureValue::UnseenDays(value)++; return true; } default: @@ -136,7 +139,7 @@ bool DownpourCtrDoubleAccessor::Save(float* value, int param) { }; } -void DownpourCtrDoubleAccessor::UpdateStatAfterSave(float* value, int param) { +void CtrDoubleAccessor::UpdateStatAfterSave(float* value, int param) { auto base_threshold = _config.ctr_accessor_param().base_threshold(); auto delta_threshold = _config.ctr_accessor_param().delta_threshold(); auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days(); @@ -145,17 +148,17 @@ void DownpourCtrDoubleAccessor::UpdateStatAfterSave(float* value, int param) { } switch (param) { case 1: { - if (ShowClickScore(DownpourCtrDoubleFeatureValue::Show(value), - DownpourCtrDoubleFeatureValue::Click(value)) >= + if (ShowClickScore(CtrDoubleFeatureValue::Show(value), + CtrDoubleFeatureValue::Click(value)) >= base_threshold && - DownpourCtrDoubleFeatureValue::DeltaScore(value) >= delta_threshold && - DownpourCtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) { - DownpourCtrDoubleFeatureValue::DeltaScore(value) = 0; + CtrDoubleFeatureValue::DeltaScore(value) >= delta_threshold && + CtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) { + CtrDoubleFeatureValue::DeltaScore(value) = 0; } } return; case 3: { - DownpourCtrDoubleFeatureValue::UnseenDays(value)++; + CtrDoubleFeatureValue::UnseenDays(value)++; } return; default: @@ -163,123 +166,125 @@ void DownpourCtrDoubleAccessor::UpdateStatAfterSave(float* value, int param) { }; } -int32_t DownpourCtrDoubleAccessor::Create(float** values, size_t num) { +int32_t CtrDoubleAccessor::Create(float** values, size_t num) { auto embedx_dim = _config.embedx_dim(); for (size_t value_item = 0; value_item < num; ++value_item) { float* value = values[value_item]; - value[DownpourCtrDoubleFeatureValue::UnseenDaysIndex()] = 0; - value[DownpourCtrDoubleFeatureValue::DeltaScoreIndex()] = 0; - *(double*)(value + DownpourCtrDoubleFeatureValue::ShowIndex()) = 0; - *(double*)(value + DownpourCtrDoubleFeatureValue::ClickIndex()) = 0; - value[DownpourCtrDoubleFeatureValue::SlotIndex()] = -1; + value[CtrDoubleFeatureValue::UnseenDaysIndex()] = 0; + value[CtrDoubleFeatureValue::DeltaScoreIndex()] = 0; + *(double*)(value + CtrDoubleFeatureValue::ShowIndex()) = 0; + *(double*)(value + CtrDoubleFeatureValue::ClickIndex()) = 0; + value[CtrDoubleFeatureValue::SlotIndex()] = -1; _embed_sgd_rule->InitValue( - value + DownpourCtrDoubleFeatureValue::EmbedWIndex(), - value + DownpourCtrDoubleFeatureValue::EmbedG2SumIndex()); + value + CtrDoubleFeatureValue::EmbedWIndex(), + value + CtrDoubleFeatureValue::EmbedG2SumIndex()); _embedx_sgd_rule->InitValue( - value + DownpourCtrDoubleFeatureValue::EmbedxWIndex(), - value + DownpourCtrDoubleFeatureValue::EmbedxG2SumIndex(), false); + value + CtrDoubleFeatureValue::EmbedxWIndex(), + value + CtrDoubleFeatureValue::EmbedxG2SumIndex(), false); } return 0; } -bool DownpourCtrDoubleAccessor::NeedExtendMF(float* value) { - auto show = - ((double*)(value + DownpourCtrDoubleFeatureValue::ShowIndex()))[0]; - auto click = - ((double*)(value + DownpourCtrDoubleFeatureValue::ClickIndex()))[0]; +bool CtrDoubleAccessor::NeedExtendMF(float* value) { + auto show = ((double*)(value + CtrDoubleFeatureValue::ShowIndex()))[0]; + auto click = ((double*)(value + CtrDoubleFeatureValue::ClickIndex()))[0]; // float score = (show - click) * _config.ctr_accessor_param().nonclk_coeff() auto score = (show - click) * _config.ctr_accessor_param().nonclk_coeff() + click * _config.ctr_accessor_param().click_coeff(); //+ click * _config.ctr_accessor_param().click_coeff(); return score >= _config.embedx_threshold(); } -// from DownpourCtrFeatureValue to DownpourCtrPullValue -int32_t DownpourCtrDoubleAccessor::Select(float** select_values, - const float** values, size_t num) { +// from CtrDoubleFeatureValue to CtrDoublePullValue +int32_t CtrDoubleAccessor::Select(float** select_values, const float** values, + size_t num) { auto embedx_dim = _config.embedx_dim(); for (size_t value_item = 0; value_item < num; ++value_item) { float* select_value = select_values[value_item]; float* value = const_cast(values[value_item]); - select_value[DownpourCtrDoublePullValue::ShowIndex()] = - (float)*(double*)(value + DownpourCtrDoubleFeatureValue::ShowIndex()); - select_value[DownpourCtrDoublePullValue::ClickIndex()] = - (float)*(double*)(value + DownpourCtrDoubleFeatureValue::ClickIndex()); - select_value[DownpourCtrDoublePullValue::EmbedWIndex()] = - value[DownpourCtrDoubleFeatureValue::EmbedWIndex()]; - memcpy(select_value + DownpourCtrDoublePullValue::EmbedxWIndex(), - value + DownpourCtrDoubleFeatureValue::EmbedxWIndex(), + select_value[CtrDoublePullValue::ShowIndex()] = + (float)*(double*)(value + CtrDoubleFeatureValue::ShowIndex()); + select_value[CtrDoublePullValue::ClickIndex()] = + (float)*(double*)(value + CtrDoubleFeatureValue::ClickIndex()); + select_value[CtrDoublePullValue::EmbedWIndex()] = + value[CtrDoubleFeatureValue::EmbedWIndex()]; + memcpy(select_value + CtrDoublePullValue::EmbedxWIndex(), + value + CtrDoubleFeatureValue::EmbedxWIndex(), embedx_dim * sizeof(float)); } return 0; } -// from DownpourCtrPushValue to DownpourCtrPushValue +// from CtrDoublePushValue to CtrDoublePushValue // first dim: item // second dim: field num -int32_t DownpourCtrDoubleAccessor::Merge(float** update_values, - const float** other_update_values, - size_t num) { +int32_t CtrDoubleAccessor::Merge(float** update_values, + const float** other_update_values, + size_t num) { auto embedx_dim = _config.embedx_dim(); - size_t total_dim = DownpourCtrDoublePushValue::Dim(embedx_dim); + size_t total_dim = CtrDoublePushValue::Dim(embedx_dim); for (size_t value_item = 0; value_item < num; ++value_item) { float* update_value = update_values[value_item]; const float* other_update_value = other_update_values[value_item]; - /**(double*)(update_value + DownpourCtrDoublePushValue::ShowIndex()) += - *(double*)(other_update_value + DownpourCtrDoublePushValue::ShowIndex()); - *(double*)(update_value + DownpourCtrDoublePushValue::ClickIndex()) += - *(double*)(other_update_value + DownpourCtrDoublePushValue::ClickIndex()); + /**(double*)(update_value + CtrDoublePushValue::ShowIndex()) += + *(double*)(other_update_value + CtrDoublePushValue::ShowIndex()); + *(double*)(update_value + CtrDoublePushValue::ClickIndex()) += + *(double*)(other_update_value + CtrDoublePushValue::ClickIndex()); for (auto i = 3u; i < total_dim; ++i) { update_value[i] += other_update_value[i]; }*/ for (auto i = 0u; i < total_dim; ++i) { - if (i != DownpourCtrDoublePushValue::SlotIndex()) { + if (i != CtrDoublePushValue::SlotIndex()) { update_value[i] += other_update_value[i]; } } } return 0; } -// from DownpourCtrPushValue to DownpourCtrFeatureValue +// from CtrDoublePushValue to CtrDoubleFeatureValue // first dim: item // second dim: field num -int32_t DownpourCtrDoubleAccessor::Update(float** update_values, - const float** push_values, - size_t num) { +int32_t CtrDoubleAccessor::Update(float** update_values, + const float** push_values, size_t num) { auto embedx_dim = _config.embedx_dim(); for (size_t value_item = 0; value_item < num; ++value_item) { float* update_value = update_values[value_item]; const float* push_value = push_values[value_item]; - float push_show = push_value[DownpourCtrDoublePushValue::ShowIndex()]; - float push_click = push_value[DownpourCtrDoublePushValue::ClickIndex()]; - float slot = push_value[DownpourCtrDoublePushValue::SlotIndex()]; - *(double*)(update_value + DownpourCtrDoubleFeatureValue::ShowIndex()) += + float push_show = push_value[CtrDoublePushValue::ShowIndex()]; + float push_click = push_value[CtrDoublePushValue::ClickIndex()]; + float slot = push_value[CtrDoublePushValue::SlotIndex()]; + *(double*)(update_value + CtrDoubleFeatureValue::ShowIndex()) += (double)push_show; - *(double*)(update_value + DownpourCtrDoubleFeatureValue::ClickIndex()) += + *(double*)(update_value + CtrDoubleFeatureValue::ClickIndex()) += (double)push_click; - update_value[DownpourCtrDoubleFeatureValue::SlotIndex()] = slot; - update_value[DownpourCtrDoubleFeatureValue::DeltaScoreIndex()] += + update_value[CtrDoubleFeatureValue::SlotIndex()] = slot; + update_value[CtrDoubleFeatureValue::DeltaScoreIndex()] += (push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() + push_click * _config.ctr_accessor_param().click_coeff(); //(push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() + // push_click * _config.ctr_accessor_param().click_coeff(); - update_value[DownpourCtrDoubleFeatureValue::UnseenDaysIndex()] = 0; + update_value[CtrDoubleFeatureValue::UnseenDaysIndex()] = 0; + if (!_show_scale) { + push_show = 1; + } + VLOG(3) << "accessor show scale:" << _show_scale + << ", push_show:" << push_show; _embed_sgd_rule->UpdateValue( - update_value + DownpourCtrDoubleFeatureValue::EmbedWIndex(), - update_value + DownpourCtrDoubleFeatureValue::EmbedG2SumIndex(), - push_value + DownpourCtrDoublePushValue::EmbedGIndex(), push_show); + update_value + CtrDoubleFeatureValue::EmbedWIndex(), + update_value + CtrDoubleFeatureValue::EmbedG2SumIndex(), + push_value + CtrDoublePushValue::EmbedGIndex(), push_show); _embedx_sgd_rule->UpdateValue( - update_value + DownpourCtrDoubleFeatureValue::EmbedxWIndex(), - update_value + DownpourCtrDoubleFeatureValue::EmbedxG2SumIndex(), - push_value + DownpourCtrDoublePushValue::EmbedxGIndex(), push_show); + update_value + CtrDoubleFeatureValue::EmbedxWIndex(), + update_value + CtrDoubleFeatureValue::EmbedxG2SumIndex(), + push_value + CtrDoublePushValue::EmbedxGIndex(), push_show); } return 0; } -bool DownpourCtrDoubleAccessor::CreateValue(int stage, const float* value) { +bool CtrDoubleAccessor::CreateValue(int stage, const float* value) { // stage == 0, pull // stage == 1, push if (stage == 0) { return true; } else if (stage == 1) { - auto show = DownpourCtrDoublePushValue::Show(const_cast(value)); - auto click = DownpourCtrDoublePushValue::Click(const_cast(value)); + auto show = CtrDoublePushValue::Show(const_cast(value)); + auto click = CtrDoublePushValue::Click(const_cast(value)); auto score = ShowClickScore(show, click); if (score <= 0) { return false; @@ -293,23 +298,22 @@ bool DownpourCtrDoubleAccessor::CreateValue(int stage, const float* value) { return true; } } -double DownpourCtrDoubleAccessor::ShowClickScore(double show, double click) { +double CtrDoubleAccessor::ShowClickScore(double show, double click) { // auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff(); // auto click_coeff = _config.ctr_accessor_param().click_coeff(); auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff(); auto click_coeff = _config.ctr_accessor_param().click_coeff(); return (show - click) * nonclk_coeff + click * click_coeff; } -std::string DownpourCtrDoubleAccessor::ParseToString(const float* v, - int param_size) { +std::string CtrDoubleAccessor::ParseToString(const float* v, int param_size) { thread_local std::ostringstream os; os.clear(); os.str(""); os << v[0] << " " << v[1] << " " << (float)((double*)(v + 2))[0] << " " << (float)((double*)(v + 4))[0] << " " << v[6] << " " << v[7] << " " << v[8]; - auto show = DownpourCtrDoubleFeatureValue::Show(const_cast(v)); - auto click = DownpourCtrDoubleFeatureValue::Click(const_cast(v)); + auto show = CtrDoubleFeatureValue::Show(const_cast(v)); + auto click = CtrDoubleFeatureValue::Click(const_cast(v)); auto score = ShowClickScore(show, click); if (score >= _config.embedx_threshold() && param_size > 9) { os << " " << v[9]; @@ -319,23 +323,22 @@ std::string DownpourCtrDoubleAccessor::ParseToString(const float* v, } return os.str(); } -int DownpourCtrDoubleAccessor::ParseFromString(const std::string& str, - float* value) { +int CtrDoubleAccessor::ParseFromString(const std::string& str, float* value) { int embedx_dim = _config.embedx_dim(); float data_buff[_accessor_info.dim + 2]; float* data_buff_ptr = data_buff; _embedx_sgd_rule->InitValue( - data_buff_ptr + DownpourCtrDoubleFeatureValue::EmbedxWIndex(), - data_buff_ptr + DownpourCtrDoubleFeatureValue::EmbedxG2SumIndex()); + data_buff_ptr + CtrDoubleFeatureValue::EmbedxWIndex(), + data_buff_ptr + CtrDoubleFeatureValue::EmbedxG2SumIndex()); auto str_len = paddle::string::str_to_float(str.data(), data_buff_ptr); CHECK(str_len >= 6) << "expect more than 6 real:" << str_len; - int show_index = DownpourCtrDoubleFeatureValue::ShowIndex(); - int click_index = DownpourCtrDoubleFeatureValue::ClickIndex(); - int embed_w_index = DownpourCtrDoubleFeatureValue::EmbedWIndex(); + int show_index = CtrDoubleFeatureValue::ShowIndex(); + int click_index = CtrDoubleFeatureValue::ClickIndex(); + int embed_w_index = CtrDoubleFeatureValue::EmbedWIndex(); // no slot, embedx int value_dim = _accessor_info.dim; - int embedx_g2sum_index = DownpourCtrDoubleFeatureValue::EmbedxG2SumIndex(); - value[DownpourCtrDoubleFeatureValue::SlotIndex()] = -1; + int embedx_g2sum_index = CtrDoubleFeatureValue::EmbedxG2SumIndex(); + value[CtrDoubleFeatureValue::SlotIndex()] = -1; // other case if (str_len == (value_dim - 1)) { // copy unseen_days..delta_score @@ -344,8 +347,8 @@ int DownpourCtrDoubleAccessor::ParseFromString(const std::string& str, *(double*)(value + show_index) = (double)data_buff_ptr[2]; *(double*)(value + click_index) = (double)data_buff_ptr[3]; // copy others - value[DownpourCtrDoubleFeatureValue::EmbedWIndex()] = data_buff_ptr[4]; - value[DownpourCtrDoubleFeatureValue::EmbedG2SumIndex()] = data_buff_ptr[5]; + value[CtrDoubleFeatureValue::EmbedWIndex()] = data_buff_ptr[4]; + value[CtrDoubleFeatureValue::EmbedG2SumIndex()] = data_buff_ptr[5]; memcpy(value + embedx_g2sum_index, data_buff_ptr + 6, (embedx_dim + 1) * sizeof(float)); } else { diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.h b/paddle/fluid/distributed/ps/table/ctr_double_accessor.h index 39959034636..c5860206503 100644 --- a/paddle/fluid/distributed/ps/table/ctr_double_accessor.h +++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.h @@ -24,9 +24,9 @@ namespace paddle { namespace distributed { -class DownpourCtrDoubleAccessor : public ValueAccessor { +class CtrDoubleAccessor : public ValueAccessor { public: - struct DownpourCtrDoubleFeatureValue { + struct CtrDoubleFeatureValue { /* float unseen_days; float delta_score; @@ -45,60 +45,56 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { } static int UnseenDaysIndex() { return 0; } static int DeltaScoreIndex() { - return DownpourCtrDoubleFeatureValue::UnseenDaysIndex() + 1; + return CtrDoubleFeatureValue::UnseenDaysIndex() + 1; } static int ShowIndex() { - return DownpourCtrDoubleFeatureValue::DeltaScoreIndex() + 1; + return CtrDoubleFeatureValue::DeltaScoreIndex() + 1; } // show is double - static int ClickIndex() { - return DownpourCtrDoubleFeatureValue::ShowIndex() + 2; - } + static int ClickIndex() { return CtrDoubleFeatureValue::ShowIndex() + 2; } // click is double - static int EmbedWIndex() { - return DownpourCtrDoubleFeatureValue::ClickIndex() + 2; - } + static int EmbedWIndex() { return CtrDoubleFeatureValue::ClickIndex() + 2; } static int EmbedG2SumIndex() { - return DownpourCtrDoubleFeatureValue::EmbedWIndex() + 1; + return CtrDoubleFeatureValue::EmbedWIndex() + 1; } static int SlotIndex() { - return DownpourCtrDoubleFeatureValue::EmbedG2SumIndex() + 1; + return CtrDoubleFeatureValue::EmbedG2SumIndex() + 1; } static int EmbedxG2SumIndex() { - return DownpourCtrDoubleFeatureValue::SlotIndex() + 1; + return CtrDoubleFeatureValue::SlotIndex() + 1; } static int EmbedxWIndex() { - return DownpourCtrDoubleFeatureValue::EmbedxG2SumIndex() + 1; + return CtrDoubleFeatureValue::EmbedxG2SumIndex() + 1; } static float& UnseenDays(float* val) { - return val[DownpourCtrDoubleFeatureValue::UnseenDaysIndex()]; + return val[CtrDoubleFeatureValue::UnseenDaysIndex()]; } static float& DeltaScore(float* val) { - return val[DownpourCtrDoubleFeatureValue::DeltaScoreIndex()]; + return val[CtrDoubleFeatureValue::DeltaScoreIndex()]; } static double& Show(float* val) { - return ((double*)(val + DownpourCtrDoubleFeatureValue::ShowIndex()))[0]; + return ((double*)(val + CtrDoubleFeatureValue::ShowIndex()))[0]; } static double& Click(float* val) { - return ((double*)(val + DownpourCtrDoubleFeatureValue::ClickIndex()))[0]; + return ((double*)(val + CtrDoubleFeatureValue::ClickIndex()))[0]; } static float& Slot(float* val) { - return val[DownpourCtrDoubleFeatureValue::SlotIndex()]; + return val[CtrDoubleFeatureValue::SlotIndex()]; } static float& EmbedW(float* val) { - return val[DownpourCtrDoubleFeatureValue::EmbedWIndex()]; + return val[CtrDoubleFeatureValue::EmbedWIndex()]; } static float& EmbedG2Sum(float* val) { - return val[DownpourCtrDoubleFeatureValue::EmbedG2SumIndex()]; + return val[CtrDoubleFeatureValue::EmbedG2SumIndex()]; } static float& EmbedxG2Sum(float* val) { - return val[DownpourCtrDoubleFeatureValue::EmbedxG2SumIndex()]; + return val[CtrDoubleFeatureValue::EmbedxG2SumIndex()]; } static float* EmbedxW(float* val) { - return (val + DownpourCtrDoubleFeatureValue::EmbedxWIndex()); + return (val + CtrDoubleFeatureValue::EmbedxWIndex()); } }; - struct DownpourCtrDoublePushValue { + struct CtrDoublePushValue { /* float slot; float show; @@ -110,35 +106,27 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { static int DimSize(int dim, int embedx_dim) { return sizeof(float); } static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); } static int SlotIndex() { return 0; } - static int ShowIndex() { - return DownpourCtrDoublePushValue::SlotIndex() + 1; - } - static int ClickIndex() { - return DownpourCtrDoublePushValue::ShowIndex() + 1; - } - static int EmbedGIndex() { - return DownpourCtrDoublePushValue::ClickIndex() + 1; - } - static int EmbedxGIndex() { - return DownpourCtrDoublePushValue::EmbedGIndex() + 1; - } + static int ShowIndex() { return CtrDoublePushValue::SlotIndex() + 1; } + static int ClickIndex() { return CtrDoublePushValue::ShowIndex() + 1; } + static int EmbedGIndex() { return CtrDoublePushValue::ClickIndex() + 1; } + static int EmbedxGIndex() { return CtrDoublePushValue::EmbedGIndex() + 1; } static float& Slot(float* val) { - return val[DownpourCtrDoublePushValue::SlotIndex()]; + return val[CtrDoublePushValue::SlotIndex()]; } static float& Show(float* val) { - return val[DownpourCtrDoublePushValue::ShowIndex()]; + return val[CtrDoublePushValue::ShowIndex()]; } static float& Click(float* val) { - return val[DownpourCtrDoublePushValue::ClickIndex()]; + return val[CtrDoublePushValue::ClickIndex()]; } static float& EmbedG(float* val) { - return val[DownpourCtrDoublePushValue::EmbedGIndex()]; + return val[CtrDoublePushValue::EmbedGIndex()]; } static float* EmbedxG(float* val) { - return val + DownpourCtrDoublePushValue::EmbedxGIndex(); + return val + CtrDoublePushValue::EmbedxGIndex(); } }; - struct DownpourCtrDoublePullValue { + struct CtrDoublePullValue { /* float show; float click; @@ -153,20 +141,20 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { static int EmbedWIndex() { return 2; } static int EmbedxWIndex() { return 3; } static float& Show(float* val) { - return val[DownpourCtrDoublePullValue::ShowIndex()]; + return val[CtrDoublePullValue::ShowIndex()]; } static float& Click(float* val) { - return val[DownpourCtrDoublePullValue::ClickIndex()]; + return val[CtrDoublePullValue::ClickIndex()]; } static float& EmbedW(float* val) { - return val[DownpourCtrDoublePullValue::EmbedWIndex()]; + return val[CtrDoublePullValue::EmbedWIndex()]; } static float* EmbedxW(float* val) { - return val + DownpourCtrDoublePullValue::EmbedxWIndex(); + return val + CtrDoublePullValue::EmbedxWIndex(); } }; - DownpourCtrDoubleAccessor() {} - virtual ~DownpourCtrDoubleAccessor() {} + CtrDoubleAccessor() {} + virtual ~CtrDoubleAccessor() {} virtual int Initialize(); // 初始化AccessorInfo virtual void InitAccessorInfo(); @@ -182,7 +170,7 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { // update delta_score and unseen_days after save virtual void UpdateStatAfterSave(float* value, int param) override; // 判断该value是否保存到ssd - virtual bool save_ssd(float* value); + virtual bool SaveSSD(float* value); // virtual bool save_cache(float* value, int param, double // global_cache_threshold) override; // keys不存在时,为values生成随机值 @@ -206,14 +194,14 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { virtual float GetField(float* value, const std::string& name) override { CHECK(name == "show"); if (name == "show") { - return (float)DownpourCtrDoubleFeatureValue::Show(value); + return (float)CtrDoubleFeatureValue::Show(value); } return 0.0; } - // DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, show) - // DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, click) - // DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, embed_w) - // DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, embedx_w) + // DEFINE_GET_INDEX(CtrDoubleFeatureValue, show) + // DEFINE_GET_INDEX(CtrDoubleFeatureValue, click) + // DEFINE_GET_INDEX(CtrDoubleFeatureValue, embed_w) + // DEFINE_GET_INDEX(CtrDoubleFeatureValue, embedx_w) private: double ShowClickScore(double show, double click); @@ -222,6 +210,7 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { SparseValueSGDRule* _embedx_sgd_rule; float _show_click_decay_rate; int32_t _ssd_unseenday_threshold; + bool _show_scale = false; }; } // namespace distributed } // namespace paddle diff --git a/paddle/fluid/distributed/ps/table/table.cc b/paddle/fluid/distributed/ps/table/table.cc index 307abbdf51e..333008482f1 100644 --- a/paddle/fluid/distributed/ps/table/table.cc +++ b/paddle/fluid/distributed/ps/table/table.cc @@ -21,6 +21,7 @@ #include "paddle/fluid/distributed/ps/table/memory_dense_table.h" #include "paddle/fluid/distributed/ps/table/ctr_accessor.h" +#include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h" #include "paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h" #include "paddle/fluid/distributed/ps/table/memory_sparse_table.h" #include "paddle/fluid/distributed/ps/table/sparse_accessor.h" @@ -39,6 +40,7 @@ REGISTER_PSCORE_CLASS(Table, MemorySparseTable); REGISTER_PSCORE_CLASS(Table, MemorySparseGeoTable); REGISTER_PSCORE_CLASS(ValueAccessor, CommMergeAccessor); REGISTER_PSCORE_CLASS(ValueAccessor, CtrCommonAccessor); +REGISTER_PSCORE_CLASS(ValueAccessor, CtrDoubleAccessor); REGISTER_PSCORE_CLASS(ValueAccessor, SparseAccessor); REGISTER_PSCORE_CLASS(SparseValueSGDRule, StdAdaGradSGDRule); REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdamSGDRule); diff --git a/paddle/fluid/distributed/the_one_ps.proto b/paddle/fluid/distributed/the_one_ps.proto index 197acc18242..32bf9eaa5aa 100644 --- a/paddle/fluid/distributed/the_one_ps.proto +++ b/paddle/fluid/distributed/the_one_ps.proto @@ -153,6 +153,7 @@ message CtrAccessorParameter { // will be delete in shrink_model optional int32 ssd_unseenday_threshold = 9 [ default = 1 ]; // threshold to save ssd + optional bool show_scale = 10 [ default = true ]; } message TensorAccessorParameter { diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto index 9b0a033856d..fff78dd872c 100644 --- a/paddle/fluid/framework/distributed_strategy.proto +++ b/paddle/fluid/framework/distributed_strategy.proto @@ -258,6 +258,7 @@ message CtrAccessorParameter { [ default = 0.8 ]; // threshold to shrink a feasign optional float delete_after_unseen_days = 8 [ default = 30 ]; optional int32 ssd_unseenday_threshold = 9 [ default = 1 ]; + optional bool show_scale = 10 [ default = true ]; } message TableAccessorSaveParameter { diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index c46b6eeb048..9d20e432d89 100644 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -611,12 +611,15 @@ class DistributedStrategy(object): "DownpourCtrAccessor") if accessor_class not in support_sparse_accessor_class: raise ValueError( - "support sparse_accessor_class: [''DownpourSparseValueAccessor', 'DownpourCtrAccessor', 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor'], but actual %s" + "support sparse_accessor_class: ['DownpourSparseValueAccessor', 'DownpourCtrAccessor', 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor'], but actual %s" % (accessor_class)) - if configs.get("use_cvm", True): - table_data.accessor.accessor_class = 'CtrCommonAccessor' + if accessor_class.find("Double") >= 0: + table_data.accessor.accessor_class = 'CtrDoubleAccessor' else: + table_data.accessor.accessor_class = 'CtrCommonAccessor' + + if not configs.get("use_cvm", True): table_data.accessor.accessor_class = 'SparseAccessor' table_data.accessor.embedx_dim = config.get('sparse_embedx_dim', 8) @@ -624,6 +627,11 @@ class DistributedStrategy(object): table_data.accessor.embedx_threshold = config.get( 'sparse_embedx_threshold', 10) + if accessor_class == 'DownpourUnitAccessor': + table_data.accessor.ctr_accessor_param.show_scale = False + else: + table_data.accessor.ctr_accessor_param.show_scale = True + table_data.accessor.ctr_accessor_param.nonclk_coeff = config.get( 'sparse_nonclk_coeff', 0.1) table_data.accessor.ctr_accessor_param.click_coeff = config.get( diff --git a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py index 31f77f17b19..ffc3f2b21a4 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py @@ -310,9 +310,22 @@ class TestStrategyConfig(unittest.TestCase): "embed_sparse_optimizer": "std_adagrad" } strategy.fleet_desc_configs = configs + self.assertEqual(strategy.sparse_table_configs[0] + .accessor.ctr_accessor_param.show_scale, False) self.assertEqual(strategy.sparse_table_configs[0] .accessor.embed_sgd_param.adagrad.initial_range, 0) + strategy = paddle.distributed.fleet.DistributedStrategy() + configs = {} + configs['emb'] = { + "sparse_accessor_class": "DownpourCtrDoubleAccessor", + "embed_sparse_optimizer": "std_adagrad" + } + strategy.fleet_desc_configs = configs + self.assertEqual(strategy.sparse_table_configs[0] + .accessor.embed_sgd_param.adagrad.initial_range, + 0.0001) + def test_trainer_desc_configs(self): strategy = paddle.distributed.fleet.DistributedStrategy() configs = { -- GitLab