ctr_double_accessor.cc 17.1 KB
Newer Older
Y
yaoxuefeng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h"
#include <gflags/gflags.h>
#include "glog/logging.h"
#include "paddle/fluid/string/string_helper.h"

namespace paddle {
namespace distributed {

23
int DownpourCtrDoubleAccessor::Initialize() {
Y
yaoxuefeng 已提交
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
  auto name = _config.embed_sgd_param().name();
  _embed_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name);
  _embed_sgd_rule->load_config(_config.embed_sgd_param(), 1);

  name = _config.embedx_sgd_param().name();
  _embedx_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name);
  _embedx_sgd_rule->load_config(_config.embedx_sgd_param(),
                                _config.embedx_dim());

  _show_click_decay_rate = _config.ctr_accessor_param().show_click_decay_rate();
  _ssd_unseenday_threshold =
      _config.ctr_accessor_param().ssd_unseenday_threshold();

  return 0;
}

40
void DownpourCtrDoubleAccessor::SetTableInfo(AccessorInfo& info) {
41 42 43 44 45 46 47
  info.dim = Dim();
  info.size = Size();
  info.select_dim = SelectDim();
  info.select_size = SelectSize();
  info.update_dim = UpdateDim();
  info.update_size = UpdateSize();
  info.mf_size = MFSize();
Y
yaoxuefeng 已提交
48 49
}

50 51 52
size_t DownpourCtrDoubleAccessor::GetTableInfo(InfoKey key) {
  switch (key) {
    case DIM:
53
      return Dim();
54
    case SIZE:
55
      return Size();
56
    case SELECT_DIM:
57
      return SelectDim();
58
    case SELECT_SIZE:
59
      return SelectSize();
60
    case UPDATE_DIM:
61
      return UpdateDim();
62
    case UPDATE_SIZE:
63
      return UpdateSize();
64
    case MF_SIZE:
65 66 67
      return MFSize();
    default:
      return 0;
68 69 70 71
  }
  return 0;
}

72
size_t DownpourCtrDoubleAccessor::Dim() {
Y
yaoxuefeng 已提交
73
  auto embedx_dim = _config.embedx_dim();
74
  return DownpourCtrDoubleFeatureValue::Dim(embedx_dim);
Y
yaoxuefeng 已提交
75
}
76
size_t DownpourCtrDoubleAccessor::DimSize(size_t dim) {
Y
yaoxuefeng 已提交
77
  auto embedx_dim = _config.embedx_dim();
78
  return DownpourCtrDoubleFeatureValue::DimSize(dim, embedx_dim);
Y
yaoxuefeng 已提交
79
}
80
size_t DownpourCtrDoubleAccessor::Size() {
Y
yaoxuefeng 已提交
81
  auto embedx_dim = _config.embedx_dim();
82
  return DownpourCtrDoubleFeatureValue::Size(embedx_dim);
Y
yaoxuefeng 已提交
83
}
84
size_t DownpourCtrDoubleAccessor::MFSize() {
Y
yaoxuefeng 已提交
85 86 87
  return (_config.embedx_dim() + 1) * sizeof(float);  // embedx embedx_g2sum
}
// pull value
88
size_t DownpourCtrDoubleAccessor::SelectDim() {
Y
yaoxuefeng 已提交
89 90 91
  auto embedx_dim = _config.embedx_dim();
  return 3 + embedx_dim;
}
92
size_t DownpourCtrDoubleAccessor::SelectDimSize(size_t dim) {
Y
yaoxuefeng 已提交
93 94
  return sizeof(float);
}
95 96
size_t DownpourCtrDoubleAccessor::SelectSize() {
  return SelectDim() * sizeof(float);
Y
yaoxuefeng 已提交
97 98
}
// push value
99
size_t DownpourCtrDoubleAccessor::UpdateDim() {
Y
yaoxuefeng 已提交
100 101 102
  auto embedx_dim = _config.embedx_dim();
  return 4 + embedx_dim;
}
103
size_t DownpourCtrDoubleAccessor::UpdateDimSize(size_t dim) {
Y
yaoxuefeng 已提交
104 105
  return sizeof(float);
}
106 107
size_t DownpourCtrDoubleAccessor::UpdateSize() {
  return UpdateDim() * sizeof(float);
Y
yaoxuefeng 已提交
108
}
109
bool DownpourCtrDoubleAccessor::Shrink(float* value) {
Y
yaoxuefeng 已提交
110 111 112 113 114 115 116 117 118
  // auto base_threshold = _config.ctr_accessor_param().base_threshold();
  // auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  // auto delete_threshold = _config.ctr_accessor_param().delete_threshold();
  auto base_threshold = _config.ctr_accessor_param().base_threshold();
  auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  auto delete_after_unseen_days =
      _config.ctr_accessor_param().delete_after_unseen_days();
  auto delete_threshold = _config.ctr_accessor_param().delete_threshold();
  // time_decay first
119 120
  DownpourCtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate;
  DownpourCtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate;
Y
yaoxuefeng 已提交
121
  // shrink after
122 123
  auto score = show_click_score(DownpourCtrDoubleFeatureValue::Show(value),
                                DownpourCtrDoubleFeatureValue::Click(value));
Y
yaoxuefeng 已提交
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
  auto unseen_days = DownpourCtrDoubleFeatureValue::unseen_days(value);
  if (score < delete_threshold || unseen_days > delete_after_unseen_days) {
    return true;
  }
  return false;
}
bool DownpourCtrDoubleAccessor::save_ssd(float* value) {
  if (DownpourCtrDoubleFeatureValue::unseen_days(value) >
      _ssd_unseenday_threshold) {
    return true;
  }
  return false;
}
// bool DownpourCtrDoubleAccessor::save_cache(
//         float* value, int param, double global_cache_threshold) {
//     auto base_threshold = _config.ctr_accessor_param().base_threshold();
//     auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
141 142
//     if (show_click_score(DownpourCtrDoubleFeatureValue::Show(value),
//     DownpourCtrDoubleFeatureValue::Click(value)) >= base_threshold
Y
yaoxuefeng 已提交
143 144
//         && DownpourCtrDoubleFeatureValue::unseen_days(value) <=
//         delta_keep_days) {
145
//         return DownpourCtrDoubleFeatureValue::Show(value) >
Y
yaoxuefeng 已提交
146 147 148 149
//         global_cache_threshold;
//     }
//     return false;
// }
150
bool DownpourCtrDoubleAccessor::Save(float* value, int param) {
Y
yaoxuefeng 已提交
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
  // auto base_threshold = _config.ctr_accessor_param().base_threshold();
  // auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  // auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
  auto base_threshold = _config.ctr_accessor_param().base_threshold();
  auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
  if (param == 2) {
    delta_threshold = 0;
  }
  switch (param) {
    // save all
    case 0: {
      return true;
    }
    // save xbox delta
    case 1:
    // save xbox base
    case 2: {
169 170
      if (show_click_score(DownpourCtrDoubleFeatureValue::Show(value),
                           DownpourCtrDoubleFeatureValue::Click(value)) >=
Y
yaoxuefeng 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
              base_threshold &&
          DownpourCtrDoubleFeatureValue::delta_score(value) >=
              delta_threshold &&
          DownpourCtrDoubleFeatureValue::unseen_days(value) <=
              delta_keep_days) {
        // do this after save, because it must not be modified when retry
        if (param == 2) {
          DownpourCtrDoubleFeatureValue::delta_score(value) = 0;
        }
        return true;
      } else {
        return false;
      }
    }
    // already decayed in shrink
    case 3: {
187 188
      // DownpourCtrFeatureValue::Show(value) *= _show_click_decay_rate;
      // DownpourCtrFeatureValue::Click(value) *= _show_click_decay_rate;
Y
yaoxuefeng 已提交
189 190 191 192 193 194 195 196 197
      // do this after save, because it must not be modified when retry
      // DownpourCtrDoubleFeatureValue::unseen_days(value)++;
      return true;
    }
    default:
      return true;
  };
}

198
void DownpourCtrDoubleAccessor::UpdateStatAfterSave(float* value, int param) {
Y
yaoxuefeng 已提交
199 200 201 202 203 204 205 206
  auto base_threshold = _config.ctr_accessor_param().base_threshold();
  auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
  auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
  if (param == 2) {
    delta_threshold = 0;
  }
  switch (param) {
    case 1: {
207 208
      if (show_click_score(DownpourCtrDoubleFeatureValue::Show(value),
                           DownpourCtrDoubleFeatureValue::Click(value)) >=
Y
yaoxuefeng 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
              base_threshold &&
          DownpourCtrDoubleFeatureValue::delta_score(value) >=
              delta_threshold &&
          DownpourCtrDoubleFeatureValue::unseen_days(value) <=
              delta_keep_days) {
        DownpourCtrDoubleFeatureValue::delta_score(value) = 0;
      }
    }
      return;
    case 3: {
      DownpourCtrDoubleFeatureValue::unseen_days(value)++;
    }
      return;
    default:
      return;
  };
}

227
int32_t DownpourCtrDoubleAccessor::Create(float** values, size_t num) {
Y
yaoxuefeng 已提交
228 229 230 231 232
  auto embedx_dim = _config.embedx_dim();
  for (size_t value_item = 0; value_item < num; ++value_item) {
    float* value = values[value_item];
    value[DownpourCtrDoubleFeatureValue::unseen_days_index()] = 0;
    value[DownpourCtrDoubleFeatureValue::delta_score_index()] = 0;
233 234 235
    *(double*)(value + DownpourCtrDoubleFeatureValue::ShowIndex()) = 0;
    *(double*)(value + DownpourCtrDoubleFeatureValue::ClickIndex()) = 0;
    value[DownpourCtrDoubleFeatureValue::SlotIndex()] = -1;
Y
yaoxuefeng 已提交
236
    _embed_sgd_rule->init_value(
237
        value + DownpourCtrDoubleFeatureValue::Embed_W_Index(),
Y
yaoxuefeng 已提交
238 239
        value + DownpourCtrDoubleFeatureValue::embed_g2sum_index());
    _embedx_sgd_rule->init_value(
240
        value + DownpourCtrDoubleFeatureValue::Embedx_W_Index(),
Y
yaoxuefeng 已提交
241 242 243 244
        value + DownpourCtrDoubleFeatureValue::embedx_g2sum_index(), false);
  }
  return 0;
}
245
bool DownpourCtrDoubleAccessor::NeedExtendMF(float* value) {
Y
yaoxuefeng 已提交
246
  auto show =
247
      ((double*)(value + DownpourCtrDoubleFeatureValue::ShowIndex()))[0];
Y
yaoxuefeng 已提交
248
  auto click =
249
      ((double*)(value + DownpourCtrDoubleFeatureValue::ClickIndex()))[0];
Y
yaoxuefeng 已提交
250 251 252 253 254 255 256
  // float score = (show - click) * _config.ctr_accessor_param().nonclk_coeff()
  auto score = (show - click) * _config.ctr_accessor_param().nonclk_coeff() +
               click * _config.ctr_accessor_param().click_coeff();
  //+ click * _config.ctr_accessor_param().click_coeff();
  return score >= _config.embedx_threshold();
}
// from DownpourCtrFeatureValue to DownpourCtrPullValue
257
int32_t DownpourCtrDoubleAccessor::Select(float** select_values,
Y
yaoxuefeng 已提交
258 259 260 261 262
                                          const float** values, size_t num) {
  auto embedx_dim = _config.embedx_dim();
  for (size_t value_item = 0; value_item < num; ++value_item) {
    float* select_value = select_values[value_item];
    float* value = const_cast<float*>(values[value_item]);
263 264 265 266 267 268 269 270
    select_value[DownpourCtrDoublePullValue::ShowIndex()] =
        (float)*(double*)(value + DownpourCtrDoubleFeatureValue::ShowIndex());
    select_value[DownpourCtrDoublePullValue::ClickIndex()] =
        (float)*(double*)(value + DownpourCtrDoubleFeatureValue::ClickIndex());
    select_value[DownpourCtrDoublePullValue::Embed_W_Index()] =
        value[DownpourCtrDoubleFeatureValue::Embed_W_Index()];
    memcpy(select_value + DownpourCtrDoublePullValue::Embedx_W_Index(),
           value + DownpourCtrDoubleFeatureValue::Embedx_W_Index(),
Y
yaoxuefeng 已提交
271 272 273 274 275 276 277
           embedx_dim * sizeof(float));
  }
  return 0;
}
// from DownpourCtrPushValue to DownpourCtrPushValue
// first dim: item
// second dim: field num
278
int32_t DownpourCtrDoubleAccessor::Merge(float** update_values,
Y
yaoxuefeng 已提交
279 280 281
                                         const float** other_update_values,
                                         size_t num) {
  auto embedx_dim = _config.embedx_dim();
282
  size_t total_dim = DownpourCtrDoublePushValue::Dim(embedx_dim);
Y
yaoxuefeng 已提交
283 284 285
  for (size_t value_item = 0; value_item < num; ++value_item) {
    float* update_value = update_values[value_item];
    const float* other_update_value = other_update_values[value_item];
286 287 288 289
    /**(double*)(update_value + DownpourCtrDoublePushValue::ShowIndex()) +=
    *(double*)(other_update_value + DownpourCtrDoublePushValue::ShowIndex());
    *(double*)(update_value + DownpourCtrDoublePushValue::ClickIndex()) +=
    *(double*)(other_update_value + DownpourCtrDoublePushValue::ClickIndex());
Y
yaoxuefeng 已提交
290 291 292 293
    for (auto i = 3u; i < total_dim; ++i) {
        update_value[i] += other_update_value[i];
    }*/
    for (auto i = 0u; i < total_dim; ++i) {
294
      if (i != DownpourCtrDoublePushValue::SlotIndex()) {
Y
yaoxuefeng 已提交
295 296 297 298 299 300 301 302 303
        update_value[i] += other_update_value[i];
      }
    }
  }
  return 0;
}
// from DownpourCtrPushValue to DownpourCtrFeatureValue
// first dim: item
// second dim: field num
304
int32_t DownpourCtrDoubleAccessor::Update(float** update_values,
Y
yaoxuefeng 已提交
305 306 307 308 309 310
                                          const float** push_values,
                                          size_t num) {
  auto embedx_dim = _config.embedx_dim();
  for (size_t value_item = 0; value_item < num; ++value_item) {
    float* update_value = update_values[value_item];
    const float* push_value = push_values[value_item];
311 312 313 314
    float push_show = push_value[DownpourCtrDoublePushValue::ShowIndex()];
    float push_click = push_value[DownpourCtrDoublePushValue::ClickIndex()];
    float slot = push_value[DownpourCtrDoublePushValue::SlotIndex()];
    *(double*)(update_value + DownpourCtrDoubleFeatureValue::ShowIndex()) +=
Y
yaoxuefeng 已提交
315
        (double)push_show;
316
    *(double*)(update_value + DownpourCtrDoubleFeatureValue::ClickIndex()) +=
Y
yaoxuefeng 已提交
317
        (double)push_click;
318
    update_value[DownpourCtrDoubleFeatureValue::SlotIndex()] = slot;
Y
yaoxuefeng 已提交
319 320 321 322 323 324 325
    update_value[DownpourCtrDoubleFeatureValue::delta_score_index()] +=
        (push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() +
        push_click * _config.ctr_accessor_param().click_coeff();
    //(push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() +
    // push_click * _config.ctr_accessor_param().click_coeff();
    update_value[DownpourCtrDoubleFeatureValue::unseen_days_index()] = 0;
    _embed_sgd_rule->update_value(
326
        update_value + DownpourCtrDoubleFeatureValue::Embed_W_Index(),
Y
yaoxuefeng 已提交
327
        update_value + DownpourCtrDoubleFeatureValue::embed_g2sum_index(),
328
        push_value + DownpourCtrDoublePushValue::Embed_G_Index(), push_show);
Y
yaoxuefeng 已提交
329
    _embedx_sgd_rule->update_value(
330
        update_value + DownpourCtrDoubleFeatureValue::Embedx_W_Index(),
Y
yaoxuefeng 已提交
331
        update_value + DownpourCtrDoubleFeatureValue::embedx_g2sum_index(),
332
        push_value + DownpourCtrDoublePushValue::Embedx_G_Index(), push_show);
Y
yaoxuefeng 已提交
333 334 335
  }
  return 0;
}
336
bool DownpourCtrDoubleAccessor::CreateValue(int stage, const float* value) {
Y
yaoxuefeng 已提交
337 338 339 340 341
  // stage == 0, pull
  // stage == 1, push
  if (stage == 0) {
    return true;
  } else if (stage == 1) {
342 343
    auto show = DownpourCtrDoublePushValue::Show(const_cast<float*>(value));
    auto click = DownpourCtrDoublePushValue::Click(const_cast<float*>(value));
Y
yaoxuefeng 已提交
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
    auto score = show_click_score(show, click);
    if (score <= 0) {
      return false;
    }
    if (score >= 1) {
      return true;
    }
    return local_uniform_real_distribution<float>()(local_random_engine()) <
           score;
  } else {
    return true;
  }
}
double DownpourCtrDoubleAccessor::show_click_score(double show, double click) {
  // auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff();
  // auto click_coeff = _config.ctr_accessor_param().click_coeff();
  auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff();
  auto click_coeff = _config.ctr_accessor_param().click_coeff();
  return (show - click) * nonclk_coeff + click * click_coeff;
}
364 365
std::string DownpourCtrDoubleAccessor::ParseToString(const float* v,
                                                     int param_size) {
Y
yaoxuefeng 已提交
366 367 368 369 370 371
  thread_local std::ostringstream os;
  os.clear();
  os.str("");
  os << v[0] << " " << v[1] << " " << (float)((double*)(v + 2))[0] << " "
     << (float)((double*)(v + 4))[0] << " " << v[6] << " " << v[7] << " "
     << v[8];
372 373
  auto show = DownpourCtrDoubleFeatureValue::Show(const_cast<float*>(v));
  auto click = DownpourCtrDoubleFeatureValue::Click(const_cast<float*>(v));
Y
yaoxuefeng 已提交
374 375 376 377 378 379 380 381 382
  auto score = show_click_score(show, click);
  if (score >= _config.embedx_threshold() && param_size > 9) {
    os << " " << v[9];
    for (auto i = 0; i < _config.embedx_dim(); ++i) {
      os << " " << v[10 + i];
    }
  }
  return os.str();
}
383 384
int DownpourCtrDoubleAccessor::ParseFromString(const std::string& str,
                                               float* value) {
Y
yaoxuefeng 已提交
385
  int embedx_dim = _config.embedx_dim();
386
  float data_buff[Dim() + 2];
Y
yaoxuefeng 已提交
387 388
  float* data_buff_ptr = data_buff;
  _embedx_sgd_rule->init_value(
389
      data_buff_ptr + DownpourCtrDoubleFeatureValue::Embedx_W_Index(),
Y
yaoxuefeng 已提交
390 391 392
      data_buff_ptr + DownpourCtrDoubleFeatureValue::embedx_g2sum_index());
  auto str_len = paddle::string::str_to_float(str.data(), data_buff_ptr);
  CHECK(str_len >= 6) << "expect more than 6 real:" << str_len;
393 394 395
  int show_index = DownpourCtrDoubleFeatureValue::ShowIndex();
  int click_index = DownpourCtrDoubleFeatureValue::ClickIndex();
  int embed_w_index = DownpourCtrDoubleFeatureValue::Embed_W_Index();
Y
yaoxuefeng 已提交
396
  // no slot, embedx
397
  int value_dim = Dim();
Y
yaoxuefeng 已提交
398
  int embedx_g2sum_index = DownpourCtrDoubleFeatureValue::embedx_g2sum_index();
399
  value[DownpourCtrDoubleFeatureValue::SlotIndex()] = -1;
Y
yaoxuefeng 已提交
400 401 402 403 404 405 406 407
  // other case
  if (str_len == (value_dim - 1)) {
    // copy unseen_days..delta_score
    memcpy(value, data_buff_ptr, show_index * sizeof(float));
    // copy show & click
    *(double*)(value + show_index) = (double)data_buff_ptr[2];
    *(double*)(value + click_index) = (double)data_buff_ptr[3];
    // copy others
408
    value[DownpourCtrDoubleFeatureValue::Embed_W_Index()] = data_buff_ptr[4];
Y
yaoxuefeng 已提交
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
    value[DownpourCtrDoubleFeatureValue::embed_g2sum_index()] =
        data_buff_ptr[5];
    memcpy(value + embedx_g2sum_index, data_buff_ptr + 6,
           (embedx_dim + 1) * sizeof(float));
  } else {
    // copy unseen_days..delta_score
    memcpy(value, data_buff_ptr, show_index * sizeof(float));
    // copy show & click
    *(double*)(value + show_index) = (double)data_buff_ptr[2];
    *(double*)(value + click_index) = (double)data_buff_ptr[3];
    // copy embed_w..embedx_w
    memcpy(value + embed_w_index, data_buff_ptr + 4,
           (str_len - 4) * sizeof(float));
  }
  if (str_len == (value_dim - 1) || str_len == 6) {
    str_len += 1;
  }
  return str_len + 2;
}

}  // namespace distributed
}  // namespace paddle