ctr_double_accessor.cc 15.1 KB
Newer Older
Y
yaoxuefeng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h"
16

Y
yaoxuefeng 已提交
17
#include <gflags/gflags.h>
18

Y
yaoxuefeng 已提交
19 20 21 22 23 24
#include "glog/logging.h"
#include "paddle/fluid/string/string_helper.h"

namespace paddle {
namespace distributed {

25
int CtrDoubleAccessor::Initialize() {
Y
yaoxuefeng 已提交
26 27
  auto name = _config.embed_sgd_param().name();
  _embed_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name);
28
  _embed_sgd_rule->LoadConfig(_config.embed_sgd_param(), 1);
Y
yaoxuefeng 已提交
29 30 31

  name = _config.embedx_sgd_param().name();
  _embedx_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name);
32 33
  _embedx_sgd_rule->LoadConfig(_config.embedx_sgd_param(),
                               _config.embedx_dim());
Y
yaoxuefeng 已提交
34 35 36 37 38

  _show_click_decay_rate = _config.ctr_accessor_param().show_click_decay_rate();
  _ssd_unseenday_threshold =
      _config.ctr_accessor_param().ssd_unseenday_threshold();

39 40 41 42
  if (_config.ctr_accessor_param().show_scale()) {
    _show_scale = true;
  }

43
  InitAccessorInfo();
Y
yaoxuefeng 已提交
44 45 46
  return 0;
}

47
void CtrDoubleAccessor::InitAccessorInfo() {
Y
yaoxuefeng 已提交
48
  auto embedx_dim = _config.embedx_dim();
49 50
  _accessor_info.dim = CtrDoubleFeatureValue::Dim(embedx_dim);
  _accessor_info.size = CtrDoubleFeatureValue::Size(embedx_dim);
51 52 53 54 55
  _accessor_info.select_dim = 3 + embedx_dim;
  _accessor_info.select_size = _accessor_info.select_dim * sizeof(float);
  _accessor_info.update_dim = 4 + embedx_dim;
  _accessor_info.update_size = _accessor_info.update_dim * sizeof(float);
  _accessor_info.mf_size = (embedx_dim + 1) * sizeof(float);
Y
yaoxuefeng 已提交
56
}
57

58
bool CtrDoubleAccessor::Shrink(float* value) {
Y
yaoxuefeng 已提交
59 60 61 62 63 64 65
  // auto base_threshold = _config.ctr_accessor_param().base_threshold();
  // auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  // auto delete_threshold = _config.ctr_accessor_param().delete_threshold();
  auto delete_after_unseen_days =
      _config.ctr_accessor_param().delete_after_unseen_days();
  auto delete_threshold = _config.ctr_accessor_param().delete_threshold();
  // time_decay first
66 67
  CtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate;
  CtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate;
Y
yaoxuefeng 已提交
68
  // shrink after
69 70 71
  auto score = ShowClickScore(CtrDoubleFeatureValue::Show(value),
                              CtrDoubleFeatureValue::Click(value));
  auto unseen_days = CtrDoubleFeatureValue::UnseenDays(value);
Y
yaoxuefeng 已提交
72 73 74 75 76
  if (score < delete_threshold || unseen_days > delete_after_unseen_days) {
    return true;
  }
  return false;
}
Z
zhaocaibei123 已提交
77

78 79
bool CtrDoubleAccessor::SaveSSD(float* value) {
  if (CtrDoubleFeatureValue::UnseenDays(value) > _ssd_unseenday_threshold) {
Y
yaoxuefeng 已提交
80 81 82 83
    return true;
  }
  return false;
}
Z
zhaocaibei123 已提交
84

85 86
bool CtrDoubleAccessor::SaveCache(float* value,
                                  int param,
Z
zhaocaibei123 已提交
87 88 89 90 91 92 93 94 95 96 97
                                  double global_cache_threshold) {
  auto base_threshold = _config.ctr_accessor_param().base_threshold();
  auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
  if (ShowClickScore(CtrDoubleFeatureValue::Show(value),
                     CtrDoubleFeatureValue::Click(value)) >= base_threshold &&
      CtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) {
    return CtrDoubleFeatureValue::Show(value) > global_cache_threshold;
  }
  return false;
}

98
bool CtrDoubleAccessor::Save(float* value, int param) {
Y
yaoxuefeng 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
  // auto base_threshold = _config.ctr_accessor_param().base_threshold();
  // auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  // auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
  auto base_threshold = _config.ctr_accessor_param().base_threshold();
  auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
  if (param == 2) {
    delta_threshold = 0;
  }
  switch (param) {
    // save all
    case 0: {
      return true;
    }
    // save xbox delta
    case 1:
    // save xbox base
    case 2: {
117 118
      if (ShowClickScore(CtrDoubleFeatureValue::Show(value),
                         CtrDoubleFeatureValue::Click(value)) >=
Y
yaoxuefeng 已提交
119
              base_threshold &&
120 121
          CtrDoubleFeatureValue::DeltaScore(value) >= delta_threshold &&
          CtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) {
Y
yaoxuefeng 已提交
122 123
        // do this after save, because it must not be modified when retry
        if (param == 2) {
124
          CtrDoubleFeatureValue::DeltaScore(value) = 0;
Y
yaoxuefeng 已提交
125 126 127 128 129 130 131 132
        }
        return true;
      } else {
        return false;
      }
    }
    // already decayed in shrink
    case 3: {
133 134
      // CtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate;
      // CtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate;
Y
yaoxuefeng 已提交
135
      // do this after save, because it must not be modified when retry
136
      // CtrDoubleFeatureValue::UnseenDays(value)++;
Y
yaoxuefeng 已提交
137 138 139 140
      return true;
    }
    default:
      return true;
141
  }
Y
yaoxuefeng 已提交
142 143
}

144
void CtrDoubleAccessor::UpdateStatAfterSave(float* value, int param) {
Y
yaoxuefeng 已提交
145 146 147 148 149 150 151 152
  auto base_threshold = _config.ctr_accessor_param().base_threshold();
  auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
  if (param == 2) {
    delta_threshold = 0;
  }
  switch (param) {
    case 1: {
153 154
      if (ShowClickScore(CtrDoubleFeatureValue::Show(value),
                         CtrDoubleFeatureValue::Click(value)) >=
Y
yaoxuefeng 已提交
155
              base_threshold &&
156 157 158
          CtrDoubleFeatureValue::DeltaScore(value) >= delta_threshold &&
          CtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) {
        CtrDoubleFeatureValue::DeltaScore(value) = 0;
Y
yaoxuefeng 已提交
159 160 161 162
      }
    }
      return;
    case 3: {
163
      CtrDoubleFeatureValue::UnseenDays(value)++;
Y
yaoxuefeng 已提交
164 165 166 167
    }
      return;
    default:
      return;
168
  }
Y
yaoxuefeng 已提交
169 170
}

171
int32_t CtrDoubleAccessor::Create(float** values, size_t num) {
Y
yaoxuefeng 已提交
172 173
  for (size_t value_item = 0; value_item < num; ++value_item) {
    float* value = values[value_item];
174 175
    value[CtrDoubleFeatureValue::UnseenDaysIndex()] = 0;
    value[CtrDoubleFeatureValue::DeltaScoreIndex()] = 0;
176
    *reinterpret_cast<double*>(value + CtrDoubleFeatureValue::ShowIndex()) = 0;
177 178
    *(double*)(value + CtrDoubleFeatureValue::ClickIndex()) = 0;
    value[CtrDoubleFeatureValue::SlotIndex()] = -1;
179 180 181 182
    bool zero_init = _config.ctr_accessor_param().zero_init();
    _embed_sgd_rule->InitValue(value + CtrDoubleFeatureValue::EmbedWIndex(),
                               value + CtrDoubleFeatureValue::EmbedG2SumIndex(),
                               zero_init);
183
    _embedx_sgd_rule->InitValue(
184
        value + CtrDoubleFeatureValue::EmbedxWIndex(),
185 186
        value + CtrDoubleFeatureValue::EmbedxG2SumIndex(),
        false);
Y
yaoxuefeng 已提交
187 188 189
  }
  return 0;
}
190 191 192
bool CtrDoubleAccessor::NeedExtendMF(float* value) {
  auto show = ((double*)(value + CtrDoubleFeatureValue::ShowIndex()))[0];
  auto click = ((double*)(value + CtrDoubleFeatureValue::ClickIndex()))[0];
Y
yaoxuefeng 已提交
193 194 195 196 197 198
  // float score = (show - click) * _config.ctr_accessor_param().nonclk_coeff()
  auto score = (show - click) * _config.ctr_accessor_param().nonclk_coeff() +
               click * _config.ctr_accessor_param().click_coeff();
  //+ click * _config.ctr_accessor_param().click_coeff();
  return score >= _config.embedx_threshold();
}
199
// from CtrDoubleFeatureValue to CtrDoublePullValue
200 201
int32_t CtrDoubleAccessor::Select(float** select_values,
                                  const float** values,
202
                                  size_t num) {
Y
yaoxuefeng 已提交
203 204 205 206
  auto embedx_dim = _config.embedx_dim();
  for (size_t value_item = 0; value_item < num; ++value_item) {
    float* select_value = select_values[value_item];
    float* value = const_cast<float*>(values[value_item]);
207 208 209 210 211 212 213 214
    select_value[CtrDoublePullValue::ShowIndex()] =
        (float)*(double*)(value + CtrDoubleFeatureValue::ShowIndex());
    select_value[CtrDoublePullValue::ClickIndex()] =
        (float)*(double*)(value + CtrDoubleFeatureValue::ClickIndex());
    select_value[CtrDoublePullValue::EmbedWIndex()] =
        value[CtrDoubleFeatureValue::EmbedWIndex()];
    memcpy(select_value + CtrDoublePullValue::EmbedxWIndex(),
           value + CtrDoubleFeatureValue::EmbedxWIndex(),
Y
yaoxuefeng 已提交
215 216 217 218
           embedx_dim * sizeof(float));
  }
  return 0;
}
219
// from CtrDoublePushValue to CtrDoublePushValue
Y
yaoxuefeng 已提交
220 221
// first dim: item
// second dim: field num
222 223 224
int32_t CtrDoubleAccessor::Merge(float** update_values,
                                 const float** other_update_values,
                                 size_t num) {
Y
yaoxuefeng 已提交
225
  auto embedx_dim = _config.embedx_dim();
226
  size_t total_dim = CtrDoublePushValue::Dim(embedx_dim);
Y
yaoxuefeng 已提交
227 228 229
  for (size_t value_item = 0; value_item < num; ++value_item) {
    float* update_value = update_values[value_item];
    const float* other_update_value = other_update_values[value_item];
230 231 232 233
    /**(double*)(update_value + CtrDoublePushValue::ShowIndex()) +=
    *(double*)(other_update_value + CtrDoublePushValue::ShowIndex());
    *(double*)(update_value + CtrDoublePushValue::ClickIndex()) +=
    *(double*)(other_update_value + CtrDoublePushValue::ClickIndex());
Y
yaoxuefeng 已提交
234 235 236
    for (auto i = 3u; i < total_dim; ++i) {
        update_value[i] += other_update_value[i];
    }*/
237
    for (size_t i = 0; i < total_dim; ++i) {
Z
zhangchunle 已提交
238
      if (static_cast<int>(i) != CtrDoublePushValue::SlotIndex()) {
Y
yaoxuefeng 已提交
239 240 241 242 243 244
        update_value[i] += other_update_value[i];
      }
    }
  }
  return 0;
}
245
// from CtrDoublePushValue to CtrDoubleFeatureValue
Y
yaoxuefeng 已提交
246 247
// first dim: item
// second dim: field num
248
int32_t CtrDoubleAccessor::Update(float** update_values,
249 250
                                  const float** push_values,
                                  size_t num) {
Y
yaoxuefeng 已提交
251 252 253
  for (size_t value_item = 0; value_item < num; ++value_item) {
    float* update_value = update_values[value_item];
    const float* push_value = push_values[value_item];
254 255 256 257
    float push_show = push_value[CtrDoublePushValue::ShowIndex()];
    float push_click = push_value[CtrDoublePushValue::ClickIndex()];
    float slot = push_value[CtrDoublePushValue::SlotIndex()];
    *(double*)(update_value + CtrDoubleFeatureValue::ShowIndex()) +=
Y
yaoxuefeng 已提交
258
        (double)push_show;
259
    *(double*)(update_value + CtrDoubleFeatureValue::ClickIndex()) +=
Y
yaoxuefeng 已提交
260
        (double)push_click;
261 262
    update_value[CtrDoubleFeatureValue::SlotIndex()] = slot;
    update_value[CtrDoubleFeatureValue::DeltaScoreIndex()] +=
Y
yaoxuefeng 已提交
263 264 265 266
        (push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() +
        push_click * _config.ctr_accessor_param().click_coeff();
    //(push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() +
    // push_click * _config.ctr_accessor_param().click_coeff();
267 268 269 270 271 272
    update_value[CtrDoubleFeatureValue::UnseenDaysIndex()] = 0;
    if (!_show_scale) {
      push_show = 1;
    }
    VLOG(3) << "accessor show scale:" << _show_scale
            << ", push_show:" << push_show;
273
    _embed_sgd_rule->UpdateValue(
274 275
        update_value + CtrDoubleFeatureValue::EmbedWIndex(),
        update_value + CtrDoubleFeatureValue::EmbedG2SumIndex(),
276 277
        push_value + CtrDoublePushValue::EmbedGIndex(),
        push_show);
278
    _embedx_sgd_rule->UpdateValue(
279 280
        update_value + CtrDoubleFeatureValue::EmbedxWIndex(),
        update_value + CtrDoubleFeatureValue::EmbedxG2SumIndex(),
281 282
        push_value + CtrDoublePushValue::EmbedxGIndex(),
        push_show);
Y
yaoxuefeng 已提交
283 284 285
  }
  return 0;
}
286
bool CtrDoubleAccessor::CreateValue(int stage, const float* value) {
Y
yaoxuefeng 已提交
287 288 289 290 291
  // stage == 0, pull
  // stage == 1, push
  if (stage == 0) {
    return true;
  } else if (stage == 1) {
292 293
    auto show = CtrDoublePushValue::Show(const_cast<float*>(value));
    auto click = CtrDoublePushValue::Click(const_cast<float*>(value));
294
    auto score = ShowClickScore(show, click);
Y
yaoxuefeng 已提交
295 296 297 298 299 300 301 302 303 304 305 306
    if (score <= 0) {
      return false;
    }
    if (score >= 1) {
      return true;
    }
    return local_uniform_real_distribution<float>()(local_random_engine()) <
           score;
  } else {
    return true;
  }
}
307
double CtrDoubleAccessor::ShowClickScore(double show, double click) {
Y
yaoxuefeng 已提交
308 309 310 311 312 313
  // auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff();
  // auto click_coeff = _config.ctr_accessor_param().click_coeff();
  auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff();
  auto click_coeff = _config.ctr_accessor_param().click_coeff();
  return (show - click) * nonclk_coeff + click * click_coeff;
}
314
std::string CtrDoubleAccessor::ParseToString(const float* v, int param_size) {
Y
yaoxuefeng 已提交
315 316 317 318 319 320
  thread_local std::ostringstream os;
  os.clear();
  os.str("");
  os << v[0] << " " << v[1] << " " << (float)((double*)(v + 2))[0] << " "
     << (float)((double*)(v + 4))[0] << " " << v[6] << " " << v[7] << " "
     << v[8];
321 322
  auto show = CtrDoubleFeatureValue::Show(const_cast<float*>(v));
  auto click = CtrDoubleFeatureValue::Click(const_cast<float*>(v));
323
  auto score = ShowClickScore(show, click);
Y
yaoxuefeng 已提交
324 325
  if (score >= _config.embedx_threshold() && param_size > 9) {
    os << " " << v[9];
326
    for (size_t i = 0; i < _config.embedx_dim(); ++i) {
Y
yaoxuefeng 已提交
327 328 329 330 331
      os << " " << v[10 + i];
    }
  }
  return os.str();
}
332
int CtrDoubleAccessor::ParseFromString(const std::string& str, float* value) {
Y
yaoxuefeng 已提交
333
  int embedx_dim = _config.embedx_dim();
334
  float data_buff[_accessor_info.dim + 2];
Y
yaoxuefeng 已提交
335
  float* data_buff_ptr = data_buff;
336
  _embedx_sgd_rule->InitValue(
337 338
      data_buff_ptr + CtrDoubleFeatureValue::EmbedxWIndex(),
      data_buff_ptr + CtrDoubleFeatureValue::EmbedxG2SumIndex());
Y
yaoxuefeng 已提交
339 340
  auto str_len = paddle::string::str_to_float(str.data(), data_buff_ptr);
  CHECK(str_len >= 6) << "expect more than 6 real:" << str_len;
341 342 343
  int show_index = CtrDoubleFeatureValue::ShowIndex();
  int click_index = CtrDoubleFeatureValue::ClickIndex();
  int embed_w_index = CtrDoubleFeatureValue::EmbedWIndex();
Y
yaoxuefeng 已提交
344
  // no slot, embedx
345
  int value_dim = _accessor_info.dim;
346 347
  int embedx_g2sum_index = CtrDoubleFeatureValue::EmbedxG2SumIndex();
  value[CtrDoubleFeatureValue::SlotIndex()] = -1;
Y
yaoxuefeng 已提交
348 349 350 351 352 353 354 355
  // other case
  if (str_len == (value_dim - 1)) {
    // copy unseen_days..delta_score
    memcpy(value, data_buff_ptr, show_index * sizeof(float));
    // copy show & click
    *(double*)(value + show_index) = (double)data_buff_ptr[2];
    *(double*)(value + click_index) = (double)data_buff_ptr[3];
    // copy others
356 357
    value[CtrDoubleFeatureValue::EmbedWIndex()] = data_buff_ptr[4];
    value[CtrDoubleFeatureValue::EmbedG2SumIndex()] = data_buff_ptr[5];
358 359
    memcpy(value + embedx_g2sum_index,
           data_buff_ptr + 6,
Y
yaoxuefeng 已提交
360 361 362 363 364 365 366 367
           (embedx_dim + 1) * sizeof(float));
  } else {
    // copy unseen_days..delta_score
    memcpy(value, data_buff_ptr, show_index * sizeof(float));
    // copy show & click
    *(double*)(value + show_index) = (double)data_buff_ptr[2];
    *(double*)(value + click_index) = (double)data_buff_ptr[3];
    // copy embed_w..embedx_w
368 369
    memcpy(value + embed_w_index,
           data_buff_ptr + 4,
Y
yaoxuefeng 已提交
370 371 372 373 374 375 376 377 378 379
           (str_len - 4) * sizeof(float));
  }
  if (str_len == (value_dim - 1) || str_len == 6) {
    str_len += 1;
  }
  return str_len + 2;
}

}  // namespace distributed
}  // namespace paddle