hashtable.h 5.7 KB
Newer Older
T
Thunderbrook 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
16
#ifdef PADDLE_WITH_HETERPS
T
Thunderbrook 已提交
17
#include <glog/logging.h>
T
Thunderbrook 已提交
18 19 20
#include <limits>
#include <memory>
#include <vector>
21

T
Thunderbrook 已提交
22
#ifdef PADDLE_WITH_PSLIB
T
Thunderbrook 已提交
23
#include "common_value.h"  // NOLINT
T
Thunderbrook 已提交
24
#endif
25 26

#if defined(PADDLE_WITH_PSCORE)
27
#include "paddle/fluid/distributed/ps/table/depends/feature_value.h"
T
Thunderbrook 已提交
28
#endif
29
#include "paddle/fluid/framework/fleet/heter_ps/feature_value.h"
30
#include "paddle/phi/core/utils/rw_lock.h"
31 32

#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
33
#include "paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h"
34
#include "paddle/fluid/framework/fleet/heter_ps/mem_pool.h"
35
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
36 37 38 39 40 41 42
#include "thrust/pair.h"
#elif defined(__xpu__)
#include <xpu/runtime.h>
#include "xpu/kernel/cluster_header.h"
#include "xpu/kernel/math.h"
#include "xpu/kernel/simd.h"
#endif
T
Thunderbrook 已提交
43

44 45 46 47
#if defined(PADDLE_WITH_XPU_KP)
#include "paddle/fluid/framework/fleet/heter_ps/optimizer_conf.h"
#endif

T
Thunderbrook 已提交
48 49 50
namespace paddle {
namespace framework {

51
#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
52 53 54 55 56 57 58 59 60 61
template <typename KeyType, typename ValType>
class TableContainer
    : public concurrent_unordered_map<KeyType, ValType,
                                      std::numeric_limits<KeyType>::max()> {
 public:
  TableContainer(size_t capacity)
      : concurrent_unordered_map<KeyType, ValType,
                                 std::numeric_limits<KeyType>::max()>(
            capacity, ValType()) {}
};
62 63 64 65
#elif defined(PADDLE_WITH_XPU_KP)
template <typename KeyType, typename ValType>
class XPUCacheArray {
 public:
66
  explicit XPUCacheArray(long long capacity) : capacity_(capacity), size_(0) {
67 68 69 70 71 72 73 74 75 76
    xpu_malloc(reinterpret_cast<void**>(&keys), capacity_ * sizeof(KeyType));
    xpu_malloc(reinterpret_cast<void**>(&vals), capacity_ * sizeof(ValType));
  }

  virtual ~XPUCacheArray() {
    xpu_free(keys);
    xpu_free(vals);
  }

  void print() {}
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97

#if defined(__xpu__)
  __device__ ValType* find(const KeyType& key) {
    for (int i = 0; i < size_; i++) {
      if (keys[i] == key) return &vals[i];
    }
    return NULL;
  }
  __device__ bool insert(const KeyType& key, const ValType& val) {
    // # NOTE(zhangminxu): we set the capacity larger than the feasign number of
    // one batch
    if (size_ == capacity_) {
      return false;
    } else {
      keys[size_] = key;
      vals[size_] = val;
      size_++;
      return true;
    }
  }
#endif
98 99 100 101 102 103 104 105 106 107 108

  int prefetch(const int dev_id, XPUStream stream = NULL) { return 0; }
  size_t size() { return size_; }

 private:
  long long capacity_;
  long long size_;
  KeyType* keys;
  ValType* vals;
};
#endif
T
Thunderbrook 已提交
109 110 111 112

template <typename KeyType, typename ValType>
class HashTable {
 public:
113
  explicit HashTable(size_t capacity);
T
Thunderbrook 已提交
114 115 116
  virtual ~HashTable();
  HashTable(const HashTable&) = delete;
  HashTable& operator=(const HashTable&) = delete;
117 118

  template <typename StreamType>
T
Thunderbrook 已提交
119
  void insert(const KeyType* d_keys, const ValType* d_vals, size_t len,
120 121 122
              StreamType stream);

  template <typename StreamType>
123
  void insert(const KeyType* d_keys, size_t len, char* pool, size_t start_index,
124 125 126
              StreamType stream);

  template <typename StreamType>
T
Thunderbrook 已提交
127
  void get(const KeyType* d_keys, ValType* d_vals, size_t len,
128 129 130 131 132
           StreamType stream);

  template <typename StreamType>
  void get(const KeyType* d_keys, char* d_vals, size_t len, StreamType stream);

T
Thunderbrook 已提交
133 134
  void show();

135 136 137 138 139
#if defined(PADDLE_WITH_XPU_KP)
  void set_sparse_sgd(const OptimizerConfig& optimizer_config);
  void set_embedx_sgd(const OptimizerConfig& optimizer_config);
#endif

140 141 142 143 144 145
  template <typename StreamType>
  void dump_to_cpu(int devid, StreamType stream);

#if defined(PADDLE_WITH_CUDA)

  template <typename GradType, typename Sgd, typename StreamType>
T
Thunderbrook 已提交
146
  void update(const KeyType* d_keys, const GradType* d_grads, size_t len,
147
              Sgd sgd, StreamType stream);
T
Thunderbrook 已提交
148

149
  template <typename Sgd, typename StreamType>
150
  void update(const KeyType* d_keys, const char* d_grads, size_t len, Sgd sgd,
151 152 153 154 155 156 157 158 159 160 161 162
              StreamType stream);

#elif defined(PADDLE_WITH_XPU_KP)
  template <typename GradType, typename StreamType>
  void update(const KeyType* d_keys, const GradType* d_grads, size_t len,
              StreamType stream);

  template <typename StreamType>
  void update(const KeyType* d_keys, const char* d_grads, size_t len,
              StreamType stream);

#endif
163

164 165
  int size() { return container_->size(); }

166 167 168 169 170 171 172 173
  void set_feature_value_size(size_t pull_feature_value_size,
                              size_t push_grad_value_size) {
    pull_feature_value_size_ = pull_feature_value_size;
    push_grad_value_size_ = push_grad_value_size;
    VLOG(3) << "hashtable set pull value size: " << pull_feature_value_size_
            << " push value size: " << push_grad_value_size_;
  }

174
  std::unique_ptr<phi::RWLock> rwlock_{nullptr};
175

T
Thunderbrook 已提交
176
 private:
177
#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
178
  TableContainer<KeyType, ValType>* container_;
179 180
#elif defined(PADDLE_WITH_XPU_KP)
  XPUCacheArray<KeyType, ValType>* container_;
181 182
  OptimizerConfig* xpu_optimizer_config_;
  OptimizerConfig cpu_optimizer_config_;
183
#endif
T
Thunderbrook 已提交
184 185 186
  int BLOCK_SIZE_{256};
  float LOAD_FACTOR{0.75f};
  size_t capacity_;
187 188 189
  size_t max_mf_dim_ = 8;
  size_t pull_feature_value_size_;
  size_t push_grad_value_size_;
T
Thunderbrook 已提交
190 191 192 193
};
}  // end namespace framework
}  // end namespace paddle
#endif