hashtable.h 6.2 KB
Newer Older
T
Thunderbrook 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
16
#ifdef PADDLE_WITH_HETERPS
T
Thunderbrook 已提交
17
#include <glog/logging.h>
18

T
Thunderbrook 已提交
19 20 21
#include <limits>
#include <memory>
#include <vector>
22

T
Thunderbrook 已提交
23
#ifdef PADDLE_WITH_PSLIB
T
Thunderbrook 已提交
24
#include "common_value.h"  // NOLINT
T
Thunderbrook 已提交
25
#endif
26 27

#if defined(PADDLE_WITH_PSCORE)
28
#include "paddle/fluid/distributed/ps/table/depends/feature_value.h"
T
Thunderbrook 已提交
29
#endif
30
#include "paddle/fluid/framework/fleet/heter_ps/feature_value.h"
31
#include "paddle/phi/core/utils/rw_lock.h"
32 33

#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
34
#include "paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h"
35
#include "paddle/fluid/framework/fleet/heter_ps/mem_pool.h"
36
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
37 38 39
#include "thrust/pair.h"
#elif defined(__xpu__)
#include <xpu/runtime.h>
40

41 42 43 44
#include "xpu/kernel/cluster_header.h"
#include "xpu/kernel/math.h"
#include "xpu/kernel/simd.h"
#endif
T
Thunderbrook 已提交
45

46 47
#include "paddle/fluid/framework/fleet/heter_ps/optimizer_conf.h"

T
Thunderbrook 已提交
48 49 50
namespace paddle {
namespace framework {

51
#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
52 53
template <typename KeyType, typename ValType>
class TableContainer
54 55
    : public concurrent_unordered_map<KeyType,
                                      ValType,
T
Thunderbrook 已提交
56 57 58
                                      std::numeric_limits<KeyType>::max()> {
 public:
  TableContainer(size_t capacity)
59 60
      : concurrent_unordered_map<KeyType,
                                 ValType,
T
Thunderbrook 已提交
61 62 63
                                 std::numeric_limits<KeyType>::max()>(
            capacity, ValType()) {}
};
64 65 66 67
#elif defined(PADDLE_WITH_XPU_KP)
template <typename KeyType, typename ValType>
class XPUCacheArray {
 public:
68
  explicit XPUCacheArray(long long capacity) : capacity_(capacity), size_(0) {
69 70 71 72 73 74 75 76 77 78
    xpu_malloc(reinterpret_cast<void**>(&keys), capacity_ * sizeof(KeyType));
    xpu_malloc(reinterpret_cast<void**>(&vals), capacity_ * sizeof(ValType));
  }

  virtual ~XPUCacheArray() {
    xpu_free(keys);
    xpu_free(vals);
  }

  void print() {}
D
danleifeng 已提交
79
  void print_collision(int i) {}
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

#if defined(__xpu__)
  __device__ ValType* find(const KeyType& key) {
    for (int i = 0; i < size_; i++) {
      if (keys[i] == key) return &vals[i];
    }
    return NULL;
  }
  __device__ bool insert(const KeyType& key, const ValType& val) {
    // # NOTE(zhangminxu): we set the capacity larger than the feasign number of
    // one batch
    if (size_ == capacity_) {
      return false;
    } else {
      keys[size_] = key;
      vals[size_] = val;
      size_++;
      return true;
    }
  }
#endif
101

102
  int prefetch(const int dev_id, XPUStream stream = NULL) { return 0; }
103 104 105 106 107 108 109 110 111
  size_t size() { return size_; }

 private:
  long long capacity_;
  long long size_;
  KeyType* keys;
  ValType* vals;
};
#endif
T
Thunderbrook 已提交
112 113 114 115

template <typename KeyType, typename ValType>
class HashTable {
 public:
116
  explicit HashTable(size_t capacity);
T
Thunderbrook 已提交
117 118 119
  virtual ~HashTable();
  HashTable(const HashTable&) = delete;
  HashTable& operator=(const HashTable&) = delete;
120 121

  template <typename StreamType>
122 123 124
  void insert(const KeyType* d_keys,
              const ValType* d_vals,
              size_t len,
125 126 127
              StreamType stream);

  template <typename StreamType>
128 129 130 131 132 133
  void insert(const KeyType* d_keys,
              size_t len,
              char* pool,
              size_t feature_value_size,
              size_t start_index,
              StreamType stream);
134 135

  template <typename StreamType>
136 137 138
  void get(const KeyType* d_keys,
           ValType* d_vals,
           size_t len,
139 140
           StreamType stream);

D
danleifeng 已提交
141
  template <typename StreamType, typename GPUAccessor>
D
danleifeng 已提交
142 143 144 145
  void get(const KeyType* d_keys,
           char* d_vals,
           size_t len,
           StreamType stream,
D
danleifeng 已提交
146
           GPUAccessor& fv_accessor);
147

T
Thunderbrook 已提交
148 149
  void show();

150 151 152
  void set_sparse_sgd(const OptimizerConfig& optimizer_config);
  void set_embedx_sgd(const OptimizerConfig& optimizer_config);

153 154 155 156 157
  template <typename StreamType>
  void dump_to_cpu(int devid, StreamType stream);

#if defined(PADDLE_WITH_CUDA)

D
danleifeng 已提交
158
  template <typename Sgd, typename StreamType>
159
  void update(const KeyType* d_keys,
D
danleifeng 已提交
160
              const float* d_grads,
161 162 163
              size_t len,
              Sgd sgd,
              StreamType stream);
T
Thunderbrook 已提交
164

165
  template <typename Sgd, typename StreamType>
166 167 168 169
  void update(const KeyType* d_keys,
              const char* d_grads,
              size_t len,
              Sgd sgd,
170 171 172 173
              StreamType stream);

#elif defined(PADDLE_WITH_XPU_KP)
  template <typename GradType, typename StreamType>
174 175 176
  void update(const KeyType* d_keys,
              const GradType* d_grads,
              size_t len,
177 178 179
              StreamType stream);

  template <typename StreamType>
180 181 182
  void update(const KeyType* d_keys,
              const char* d_grads,
              size_t len,
183 184 185
              StreamType stream);

#endif
186

187 188
  int size() { return container_->size(); }

189 190 191 192 193 194 195 196
  void set_feature_value_size(size_t pull_feature_value_size,
                              size_t push_grad_value_size) {
    pull_feature_value_size_ = pull_feature_value_size;
    push_grad_value_size_ = push_grad_value_size;
    VLOG(3) << "hashtable set pull value size: " << pull_feature_value_size_
            << " push value size: " << push_grad_value_size_;
  }

D
danleifeng 已提交
197 198
  void show_collision(int id) { return container_->print_collision(id); }

199
  std::unique_ptr<phi::RWLock> rwlock_{nullptr};
200

T
Thunderbrook 已提交
201
 private:
202
#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
203
  TableContainer<KeyType, ValType>* container_;
204 205 206
#elif defined(PADDLE_WITH_XPU_KP)
  XPUCacheArray<KeyType, ValType>* container_;
#endif
Z
zmxdream 已提交
207 208 209
  OptimizerConfig* device_optimizer_config_;
  OptimizerConfig host_optimizer_config_;

T
Thunderbrook 已提交
210 211 212
  int BLOCK_SIZE_{256};
  float LOAD_FACTOR{0.75f};
  size_t capacity_;
213 214 215
  size_t max_mf_dim_ = 8;
  size_t pull_feature_value_size_;
  size_t push_grad_value_size_;
T
Thunderbrook 已提交
216 217 218 219
};
}  // end namespace framework
}  // end namespace paddle
#endif