From 6097aefb2b25734cf9cb9bf5d97e645d201d1ba6 Mon Sep 17 00:00:00 2001 From: yaoxuefeng Date: Tue, 8 Feb 2022 15:06:13 +0800 Subject: [PATCH] add ctr_double_accessor (#39377) --- .../fluid/distributed/ps/table/CMakeLists.txt | 2 + .../ps/table/ctr_double_accessor.cc | 399 ++++++++++++++++++ .../ps/table/ctr_double_accessor.h | 245 +++++++++++ 3 files changed, 646 insertions(+) create mode 100644 paddle/fluid/distributed/ps/table/ctr_double_accessor.cc create mode 100644 paddle/fluid/distributed/ps/table/ctr_double_accessor.h diff --git a/paddle/fluid/distributed/ps/table/CMakeLists.txt b/paddle/fluid/distributed/ps/table/CMakeLists.txt index 9aa9ecc2afd..c0a7e33a6b2 100644 --- a/paddle/fluid/distributed/ps/table/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/table/CMakeLists.txt @@ -41,9 +41,11 @@ cc_library(tensor_table SRCS tensor_table.cc DEPS eigen3 ps_framework_proto exec set_source_files_properties(table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(sparse_sgd_rule.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties(ctr_double_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(ctr_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(memory_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_library(sparse_sgd_rule SRCS sparse_sgd_rule.cc DEPS ${TABLE_DEPS} ps_framework_proto) +cc_library(ctr_double_accessor SRCS ctr_double_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) cc_library(ctr_accessor SRCS ctr_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) cc_library(memory_sparse_table SRCS memory_sparse_table.cc DEPS ps_framework_proto ${TABLE_DEPS} fs afs_wrapper ctr_accessor common_table) diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc new file mode 100644 index 00000000000..b07bcf70ad7 --- /dev/null +++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc @@ -0,0 +1,399 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h" +#include +#include "glog/logging.h" +#include "paddle/fluid/string/string_helper.h" + +namespace paddle { +namespace distributed { + +int DownpourCtrDoubleAccessor::initialize() { + auto name = _config.embed_sgd_param().name(); + _embed_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name); + _embed_sgd_rule->load_config(_config.embed_sgd_param(), 1); + + name = _config.embedx_sgd_param().name(); + _embedx_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name); + _embedx_sgd_rule->load_config(_config.embedx_sgd_param(), + _config.embedx_dim()); + + _show_click_decay_rate = _config.ctr_accessor_param().show_click_decay_rate(); + _ssd_unseenday_threshold = + _config.ctr_accessor_param().ssd_unseenday_threshold(); + + return 0; +} + +size_t DownpourCtrDoubleAccessor::dim() { + auto embedx_dim = _config.embedx_dim(); + return DownpourCtrDoubleFeatureValue::dim(embedx_dim); +} +size_t DownpourCtrDoubleAccessor::dim_size(size_t dim) { + auto embedx_dim = _config.embedx_dim(); + return DownpourCtrDoubleFeatureValue::dim_size(dim, embedx_dim); +} +size_t DownpourCtrDoubleAccessor::size() { + auto embedx_dim = _config.embedx_dim(); + return DownpourCtrDoubleFeatureValue::size(embedx_dim); +} +size_t DownpourCtrDoubleAccessor::mf_size() { + return (_config.embedx_dim() + 1) * sizeof(float); // embedx embedx_g2sum +} +// pull value +size_t DownpourCtrDoubleAccessor::select_dim() { + auto embedx_dim = _config.embedx_dim(); + return 3 + embedx_dim; +} +size_t DownpourCtrDoubleAccessor::select_dim_size(size_t dim) { + return sizeof(float); +} +size_t DownpourCtrDoubleAccessor::select_size() { + return select_dim() * sizeof(float); +} +// push value +size_t DownpourCtrDoubleAccessor::update_dim() { + auto embedx_dim = _config.embedx_dim(); + return 4 + embedx_dim; +} +size_t DownpourCtrDoubleAccessor::update_dim_size(size_t dim) { + return sizeof(float); +} +size_t DownpourCtrDoubleAccessor::update_size() { + return update_dim() * sizeof(float); +} +bool DownpourCtrDoubleAccessor::shrink(float* value) { + // auto base_threshold = _config.ctr_accessor_param().base_threshold(); + // auto delta_threshold = _config.ctr_accessor_param().delta_threshold(); + // auto delete_threshold = _config.ctr_accessor_param().delete_threshold(); + auto base_threshold = _config.ctr_accessor_param().base_threshold(); + auto delta_threshold = _config.ctr_accessor_param().delta_threshold(); + auto delete_after_unseen_days = + _config.ctr_accessor_param().delete_after_unseen_days(); + auto delete_threshold = _config.ctr_accessor_param().delete_threshold(); + // time_decay first + DownpourCtrDoubleFeatureValue::show(value) *= _show_click_decay_rate; + DownpourCtrDoubleFeatureValue::click(value) *= _show_click_decay_rate; + // shrink after + auto score = show_click_score(DownpourCtrDoubleFeatureValue::show(value), + DownpourCtrDoubleFeatureValue::click(value)); + auto unseen_days = DownpourCtrDoubleFeatureValue::unseen_days(value); + if (score < delete_threshold || unseen_days > delete_after_unseen_days) { + return true; + } + return false; +} +bool DownpourCtrDoubleAccessor::save_ssd(float* value) { + if (DownpourCtrDoubleFeatureValue::unseen_days(value) > + _ssd_unseenday_threshold) { + return true; + } + return false; +} +// bool DownpourCtrDoubleAccessor::save_cache( +// float* value, int param, double global_cache_threshold) { +// auto base_threshold = _config.ctr_accessor_param().base_threshold(); +// auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days(); +// if (show_click_score(DownpourCtrDoubleFeatureValue::show(value), +// DownpourCtrDoubleFeatureValue::click(value)) >= base_threshold +// && DownpourCtrDoubleFeatureValue::unseen_days(value) <= +// delta_keep_days) { +// return DownpourCtrDoubleFeatureValue::show(value) > +// global_cache_threshold; +// } +// return false; +// } +bool DownpourCtrDoubleAccessor::save(float* value, int param) { + // auto base_threshold = _config.ctr_accessor_param().base_threshold(); + // auto delta_threshold = _config.ctr_accessor_param().delta_threshold(); + // auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days(); + auto base_threshold = _config.ctr_accessor_param().base_threshold(); + auto delta_threshold = _config.ctr_accessor_param().delta_threshold(); + auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days(); + if (param == 2) { + delta_threshold = 0; + } + switch (param) { + // save all + case 0: { + return true; + } + // save xbox delta + case 1: + // save xbox base + case 2: { + if (show_click_score(DownpourCtrDoubleFeatureValue::show(value), + DownpourCtrDoubleFeatureValue::click(value)) >= + base_threshold && + DownpourCtrDoubleFeatureValue::delta_score(value) >= + delta_threshold && + DownpourCtrDoubleFeatureValue::unseen_days(value) <= + delta_keep_days) { + // do this after save, because it must not be modified when retry + if (param == 2) { + DownpourCtrDoubleFeatureValue::delta_score(value) = 0; + } + return true; + } else { + return false; + } + } + // already decayed in shrink + case 3: { + // DownpourCtrFeatureValue::show(value) *= _show_click_decay_rate; + // DownpourCtrFeatureValue::click(value) *= _show_click_decay_rate; + // do this after save, because it must not be modified when retry + // DownpourCtrDoubleFeatureValue::unseen_days(value)++; + return true; + } + default: + return true; + }; +} + +void DownpourCtrDoubleAccessor::update_stat_after_save(float* value, + int param) { + auto base_threshold = _config.ctr_accessor_param().base_threshold(); + auto delta_threshold = _config.ctr_accessor_param().delta_threshold(); + auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days(); + if (param == 2) { + delta_threshold = 0; + } + switch (param) { + case 1: { + if (show_click_score(DownpourCtrDoubleFeatureValue::show(value), + DownpourCtrDoubleFeatureValue::click(value)) >= + base_threshold && + DownpourCtrDoubleFeatureValue::delta_score(value) >= + delta_threshold && + DownpourCtrDoubleFeatureValue::unseen_days(value) <= + delta_keep_days) { + DownpourCtrDoubleFeatureValue::delta_score(value) = 0; + } + } + return; + case 3: { + DownpourCtrDoubleFeatureValue::unseen_days(value)++; + } + return; + default: + return; + }; +} + +int32_t DownpourCtrDoubleAccessor::create(float** values, size_t num) { + auto embedx_dim = _config.embedx_dim(); + for (size_t value_item = 0; value_item < num; ++value_item) { + float* value = values[value_item]; + value[DownpourCtrDoubleFeatureValue::unseen_days_index()] = 0; + value[DownpourCtrDoubleFeatureValue::delta_score_index()] = 0; + *(double*)(value + DownpourCtrDoubleFeatureValue::show_index()) = 0; + *(double*)(value + DownpourCtrDoubleFeatureValue::click_index()) = 0; + value[DownpourCtrDoubleFeatureValue::slot_index()] = -1; + _embed_sgd_rule->init_value( + value + DownpourCtrDoubleFeatureValue::embed_w_index(), + value + DownpourCtrDoubleFeatureValue::embed_g2sum_index()); + _embedx_sgd_rule->init_value( + value + DownpourCtrDoubleFeatureValue::embedx_w_index(), + value + DownpourCtrDoubleFeatureValue::embedx_g2sum_index(), false); + } + return 0; +} +bool DownpourCtrDoubleAccessor::need_extend_mf(float* value) { + auto show = + ((double*)(value + DownpourCtrDoubleFeatureValue::show_index()))[0]; + auto click = + ((double*)(value + DownpourCtrDoubleFeatureValue::click_index()))[0]; + // float score = (show - click) * _config.ctr_accessor_param().nonclk_coeff() + auto score = (show - click) * _config.ctr_accessor_param().nonclk_coeff() + + click * _config.ctr_accessor_param().click_coeff(); + //+ click * _config.ctr_accessor_param().click_coeff(); + return score >= _config.embedx_threshold(); +} +// from DownpourCtrFeatureValue to DownpourCtrPullValue +int32_t DownpourCtrDoubleAccessor::select(float** select_values, + const float** values, size_t num) { + auto embedx_dim = _config.embedx_dim(); + for (size_t value_item = 0; value_item < num; ++value_item) { + float* select_value = select_values[value_item]; + float* value = const_cast(values[value_item]); + select_value[DownpourCtrDoublePullValue::show_index()] = + (float)*(double*)(value + DownpourCtrDoubleFeatureValue::show_index()); + select_value[DownpourCtrDoublePullValue::click_index()] = + (float)*(double*)(value + DownpourCtrDoubleFeatureValue::click_index()); + select_value[DownpourCtrDoublePullValue::embed_w_index()] = + value[DownpourCtrDoubleFeatureValue::embed_w_index()]; + memcpy(select_value + DownpourCtrDoublePullValue::embedx_w_index(), + value + DownpourCtrDoubleFeatureValue::embedx_w_index(), + embedx_dim * sizeof(float)); + } + return 0; +} +// from DownpourCtrPushValue to DownpourCtrPushValue +// first dim: item +// second dim: field num +int32_t DownpourCtrDoubleAccessor::merge(float** update_values, + const float** other_update_values, + size_t num) { + auto embedx_dim = _config.embedx_dim(); + size_t total_dim = DownpourCtrDoublePushValue::dim(embedx_dim); + for (size_t value_item = 0; value_item < num; ++value_item) { + float* update_value = update_values[value_item]; + const float* other_update_value = other_update_values[value_item]; + /**(double*)(update_value + DownpourCtrDoublePushValue::show_index()) += + *(double*)(other_update_value + DownpourCtrDoublePushValue::show_index()); + *(double*)(update_value + DownpourCtrDoublePushValue::click_index()) += + *(double*)(other_update_value + DownpourCtrDoublePushValue::click_index()); + for (auto i = 3u; i < total_dim; ++i) { + update_value[i] += other_update_value[i]; + }*/ + for (auto i = 0u; i < total_dim; ++i) { + if (i != DownpourCtrDoublePushValue::slot_index()) { + update_value[i] += other_update_value[i]; + } + } + } + return 0; +} +// from DownpourCtrPushValue to DownpourCtrFeatureValue +// first dim: item +// second dim: field num +int32_t DownpourCtrDoubleAccessor::update(float** update_values, + const float** push_values, + size_t num) { + auto embedx_dim = _config.embedx_dim(); + for (size_t value_item = 0; value_item < num; ++value_item) { + float* update_value = update_values[value_item]; + const float* push_value = push_values[value_item]; + float push_show = push_value[DownpourCtrDoublePushValue::show_index()]; + float push_click = push_value[DownpourCtrDoublePushValue::click_index()]; + float slot = push_value[DownpourCtrDoublePushValue::slot_index()]; + *(double*)(update_value + DownpourCtrDoubleFeatureValue::show_index()) += + (double)push_show; + *(double*)(update_value + DownpourCtrDoubleFeatureValue::click_index()) += + (double)push_click; + update_value[DownpourCtrDoubleFeatureValue::slot_index()] = slot; + update_value[DownpourCtrDoubleFeatureValue::delta_score_index()] += + (push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() + + push_click * _config.ctr_accessor_param().click_coeff(); + //(push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() + + // push_click * _config.ctr_accessor_param().click_coeff(); + update_value[DownpourCtrDoubleFeatureValue::unseen_days_index()] = 0; + _embed_sgd_rule->update_value( + update_value + DownpourCtrDoubleFeatureValue::embed_w_index(), + update_value + DownpourCtrDoubleFeatureValue::embed_g2sum_index(), + push_value + DownpourCtrDoublePushValue::embed_g_index(), push_show); + _embedx_sgd_rule->update_value( + update_value + DownpourCtrDoubleFeatureValue::embedx_w_index(), + update_value + DownpourCtrDoubleFeatureValue::embedx_g2sum_index(), + push_value + DownpourCtrDoublePushValue::embedx_g_index(), push_show); + } + return 0; +} +bool DownpourCtrDoubleAccessor::create_value(int stage, const float* value) { + // stage == 0, pull + // stage == 1, push + if (stage == 0) { + return true; + } else if (stage == 1) { + auto show = DownpourCtrDoublePushValue::show(const_cast(value)); + auto click = DownpourCtrDoublePushValue::click(const_cast(value)); + auto score = show_click_score(show, click); + if (score <= 0) { + return false; + } + if (score >= 1) { + return true; + } + return local_uniform_real_distribution()(local_random_engine()) < + score; + } else { + return true; + } +} +double DownpourCtrDoubleAccessor::show_click_score(double show, double click) { + // auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff(); + // auto click_coeff = _config.ctr_accessor_param().click_coeff(); + auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff(); + auto click_coeff = _config.ctr_accessor_param().click_coeff(); + return (show - click) * nonclk_coeff + click * click_coeff; +} +std::string DownpourCtrDoubleAccessor::parse_to_string(const float* v, + int param_size) { + thread_local std::ostringstream os; + os.clear(); + os.str(""); + os << v[0] << " " << v[1] << " " << (float)((double*)(v + 2))[0] << " " + << (float)((double*)(v + 4))[0] << " " << v[6] << " " << v[7] << " " + << v[8]; + auto show = DownpourCtrDoubleFeatureValue::show(const_cast(v)); + auto click = DownpourCtrDoubleFeatureValue::click(const_cast(v)); + auto score = show_click_score(show, click); + if (score >= _config.embedx_threshold() && param_size > 9) { + os << " " << v[9]; + for (auto i = 0; i < _config.embedx_dim(); ++i) { + os << " " << v[10 + i]; + } + } + return os.str(); +} +int DownpourCtrDoubleAccessor::parse_from_string(const std::string& str, + float* value) { + int embedx_dim = _config.embedx_dim(); + float data_buff[dim() + 2]; + float* data_buff_ptr = data_buff; + _embedx_sgd_rule->init_value( + data_buff_ptr + DownpourCtrDoubleFeatureValue::embedx_w_index(), + data_buff_ptr + DownpourCtrDoubleFeatureValue::embedx_g2sum_index()); + auto str_len = paddle::string::str_to_float(str.data(), data_buff_ptr); + CHECK(str_len >= 6) << "expect more than 6 real:" << str_len; + int show_index = DownpourCtrDoubleFeatureValue::show_index(); + int click_index = DownpourCtrDoubleFeatureValue::click_index(); + int embed_w_index = DownpourCtrDoubleFeatureValue::embed_w_index(); + // no slot, embedx + int value_dim = dim(); + int embedx_g2sum_index = DownpourCtrDoubleFeatureValue::embedx_g2sum_index(); + value[DownpourCtrDoubleFeatureValue::slot_index()] = -1; + // other case + if (str_len == (value_dim - 1)) { + // copy unseen_days..delta_score + memcpy(value, data_buff_ptr, show_index * sizeof(float)); + // copy show & click + *(double*)(value + show_index) = (double)data_buff_ptr[2]; + *(double*)(value + click_index) = (double)data_buff_ptr[3]; + // copy others + value[DownpourCtrDoubleFeatureValue::embed_w_index()] = data_buff_ptr[4]; + value[DownpourCtrDoubleFeatureValue::embed_g2sum_index()] = + data_buff_ptr[5]; + memcpy(value + embedx_g2sum_index, data_buff_ptr + 6, + (embedx_dim + 1) * sizeof(float)); + } else { + // copy unseen_days..delta_score + memcpy(value, data_buff_ptr, show_index * sizeof(float)); + // copy show & click + *(double*)(value + show_index) = (double)data_buff_ptr[2]; + *(double*)(value + click_index) = (double)data_buff_ptr[3]; + // copy embed_w..embedx_w + memcpy(value + embed_w_index, data_buff_ptr + 4, + (str_len - 4) * sizeof(float)); + } + if (str_len == (value_dim - 1) || str_len == 6) { + str_len += 1; + } + return str_len + 2; +} + +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.h b/paddle/fluid/distributed/ps/table/ctr_double_accessor.h new file mode 100644 index 00000000000..d7c717ace09 --- /dev/null +++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.h @@ -0,0 +1,245 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include "paddle/fluid/distributed/common/registerer.h" +#include "paddle/fluid/distributed/ps.pb.h" +#include "paddle/fluid/distributed/ps/table/accessor.h" +#include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h" + +namespace paddle { +namespace distributed { + +class DownpourCtrDoubleAccessor : public ValueAccessor { + public: + struct DownpourCtrDoubleFeatureValue { + /* + float unseen_days; + float delta_score; + double show; + double click; + float embed_w; + float embed_g2sum; + float slot; + float embedx_g2sum; + std::vector embedx_w; + */ + static int dim(int embedx_dim) { return 8 + embedx_dim; } + static int dim_size(size_t dim, int embedx_dim) { return sizeof(float); } + static int size(int embedx_dim) { + return (dim(embedx_dim) + 2) * sizeof(float); + } + static int unseen_days_index() { return 0; } + static int delta_score_index() { + return DownpourCtrDoubleFeatureValue::unseen_days_index() + 1; + } + static int show_index() { + return DownpourCtrDoubleFeatureValue::delta_score_index() + 1; + } + // show is double + static int click_index() { + return DownpourCtrDoubleFeatureValue::show_index() + 2; + } + // click is double + static int embed_w_index() { + return DownpourCtrDoubleFeatureValue::click_index() + 2; + } + static int embed_g2sum_index() { + return DownpourCtrDoubleFeatureValue::embed_w_index() + 1; + } + static int slot_index() { + return DownpourCtrDoubleFeatureValue::embed_g2sum_index() + 1; + } + static int embedx_g2sum_index() { + return DownpourCtrDoubleFeatureValue::slot_index() + 1; + } + static int embedx_w_index() { + return DownpourCtrDoubleFeatureValue::embedx_g2sum_index() + 1; + } + static float& unseen_days(float* val) { + return val[DownpourCtrDoubleFeatureValue::unseen_days_index()]; + } + static float& delta_score(float* val) { + return val[DownpourCtrDoubleFeatureValue::delta_score_index()]; + } + static double& show(float* val) { + return ((double*)(val + DownpourCtrDoubleFeatureValue::show_index()))[0]; + } + static double& click(float* val) { + return ((double*)(val + DownpourCtrDoubleFeatureValue::click_index()))[0]; + } + static float& slot(float* val) { + return val[DownpourCtrDoubleFeatureValue::slot_index()]; + } + static float& embed_w(float* val) { + return val[DownpourCtrDoubleFeatureValue::embed_w_index()]; + } + static float& embed_g2sum(float* val) { + return val[DownpourCtrDoubleFeatureValue::embed_g2sum_index()]; + } + static float& embedx_g2sum(float* val) { + return val[DownpourCtrDoubleFeatureValue::embedx_g2sum_index()]; + } + static float* embedx_w(float* val) { + return (val + DownpourCtrDoubleFeatureValue::embedx_w_index()); + } + }; + struct DownpourCtrDoublePushValue { + /* + float slot; + float show; + float click; + float embed_g; + std::vector embedx_g; + */ + static int dim(int embedx_dim) { return 4 + embedx_dim; } + static int dim_size(int dim, int embedx_dim) { return sizeof(float); } + static int size(int embedx_dim) { return dim(embedx_dim) * sizeof(float); } + static int slot_index() { return 0; } + static int show_index() { + return DownpourCtrDoublePushValue::slot_index() + 1; + } + static int click_index() { + return DownpourCtrDoublePushValue::show_index() + 1; + } + static int embed_g_index() { + return DownpourCtrDoublePushValue::click_index() + 1; + } + static int embedx_g_index() { + return DownpourCtrDoublePushValue::embed_g_index() + 1; + } + static float& slot(float* val) { + return val[DownpourCtrDoublePushValue::slot_index()]; + } + static float& show(float* val) { + return val[DownpourCtrDoublePushValue::show_index()]; + } + static float& click(float* val) { + return val[DownpourCtrDoublePushValue::click_index()]; + } + static float& embed_g(float* val) { + return val[DownpourCtrDoublePushValue::embed_g_index()]; + } + static float* embedx_g(float* val) { + return val + DownpourCtrDoublePushValue::embedx_g_index(); + } + }; + struct DownpourCtrDoublePullValue { + /* + float show; + float click; + float embed_w; + std::vector embedx_w; + */ + static int dim(int embedx_dim) { return 3 + embedx_dim; } + static int dim_size(size_t dim) { return sizeof(float); } + static int size(int embedx_dim) { return dim(embedx_dim) * sizeof(float); } + static int show_index() { return 0; } + static int click_index() { return 1; } + static int embed_w_index() { return 2; } + static int embedx_w_index() { return 3; } + static float& show(float* val) { + return val[DownpourCtrDoublePullValue::show_index()]; + } + static float& click(float* val) { + return val[DownpourCtrDoublePullValue::click_index()]; + } + static float& embed_w(float* val) { + return val[DownpourCtrDoublePullValue::embed_w_index()]; + } + static float* embedx_w(float* val) { + return val + DownpourCtrDoublePullValue::embedx_w_index(); + } + }; + DownpourCtrDoubleAccessor() {} + virtual ~DownpourCtrDoubleAccessor() {} + virtual int initialize(); + // value维度 + virtual size_t dim(); + // value各个维度的size + virtual size_t dim_size(size_t dim); + // value各维度相加总size + virtual size_t size(); + // value中mf动态长度部分总size大小, sparse下生效 + virtual size_t mf_size(); + // pull value维度 + virtual size_t select_dim(); + // pull value各个维度的size + virtual size_t select_dim_size(size_t dim); + // pull value各维度相加总size + virtual size_t select_size(); + // push value维度 + virtual size_t update_dim(); + // push value各个维度的size + virtual size_t update_dim_size(size_t dim); + // push value各维度相加总size + virtual size_t update_size(); + // 判断该value是否进行shrink + virtual bool shrink(float* value); + virtual bool need_extend_mf(float* value); + // 判断该value是否在save阶段dump, + // param作为参数用于标识save阶段,如downpour的xbox与batch_model + // param = 0, save all feature + // param = 1, save delta feature + // param = 3, save all feature with time decay + virtual bool save(float* value, int param) override; + // update delta_score and unseen_days after save + virtual void update_stat_after_save(float* value, int param) override; + // 判断该value是否保存到ssd + virtual bool save_ssd(float* value); + // virtual bool save_cache(float* value, int param, double + // global_cache_threshold) override; + // keys不存在时,为values生成随机值 + // 要求value的内存由外部调用者分配完毕 + virtual int32_t create(float** value, size_t num); + // 从values中选取到select_values中 + virtual int32_t select(float** select_values, const float** values, + size_t num); + // 将update_values聚合到一起 + virtual int32_t merge(float** update_values, + const float** other_update_values, size_t num); + // 将update_values聚合到一起,通过it.next判定是否进入下一个key + // virtual int32_t merge(float** update_values, iterator it); + // 将update_values更新应用到values中 + virtual int32_t update(float** values, const float** update_values, + size_t num); + virtual std::string parse_to_string(const float* value, int param) override; + virtual int32_t parse_from_string(const std::string& str, float* v) override; + virtual bool create_value(int type, const float* value); + //这个接口目前只用来取show + virtual float get_field(float* value, const std::string& name) override { + CHECK(name == "show"); + if (name == "show") { + return (float)DownpourCtrDoubleFeatureValue::show(value); + } + return 0.0; + } + // DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, show) + // DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, click) + // DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, embed_w) + // DEFINE_GET_INDEX(DownpourCtrDoubleFeatureValue, embedx_w) + private: + double show_click_score(double show, double click); + + private: + SparseValueSGDRule* _embed_sgd_rule; + SparseValueSGDRule* _embedx_sgd_rule; + float _show_click_decay_rate; + int32_t _ssd_unseenday_threshold; +}; +} // namespace distributed +} // namespace paddle -- GitLab