hashtable.h 6.0 KB
Newer Older
T
Thunderbrook 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
16
#ifdef PADDLE_WITH_HETERPS
T
Thunderbrook 已提交
17
#include <glog/logging.h>
18

T
Thunderbrook 已提交
19 20 21
#include <limits>
#include <memory>
#include <vector>
22

T
Thunderbrook 已提交
23
#ifdef PADDLE_WITH_PSLIB
T
Thunderbrook 已提交
24
#include "common_value.h"  // NOLINT
T
Thunderbrook 已提交
25
#endif
26 27

#if defined(PADDLE_WITH_PSCORE)
28
#include "paddle/fluid/distributed/ps/table/depends/feature_value.h"
T
Thunderbrook 已提交
29
#endif
30
#include "paddle/fluid/framework/fleet/heter_ps/feature_value.h"
31
#include "paddle/phi/core/utils/rw_lock.h"
32 33

#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
34
#include "paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h"
35
#include "paddle/fluid/framework/fleet/heter_ps/mem_pool.h"
36
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
37 38 39
#include "thrust/pair.h"
#elif defined(__xpu__)
#include <xpu/runtime.h>
40

41 42 43 44
#include "xpu/kernel/cluster_header.h"
#include "xpu/kernel/math.h"
#include "xpu/kernel/simd.h"
#endif
T
Thunderbrook 已提交
45

46 47
#include "paddle/fluid/framework/fleet/heter_ps/optimizer_conf.h"

T
Thunderbrook 已提交
48 49 50
namespace paddle {
namespace framework {

51
#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
52 53
template <typename KeyType, typename ValType>
class TableContainer
54 55
    : public concurrent_unordered_map<KeyType,
                                      ValType,
T
Thunderbrook 已提交
56 57 58
                                      std::numeric_limits<KeyType>::max()> {
 public:
  TableContainer(size_t capacity)
59 60
      : concurrent_unordered_map<KeyType,
                                 ValType,
T
Thunderbrook 已提交
61 62 63
                                 std::numeric_limits<KeyType>::max()>(
            capacity, ValType()) {}
};
64 65 66 67
#elif defined(PADDLE_WITH_XPU_KP)
template <typename KeyType, typename ValType>
class XPUCacheArray {
 public:
68
  explicit XPUCacheArray(long long capacity) : capacity_(capacity), size_(0) {
69 70 71 72 73 74 75 76 77 78
    xpu_malloc(reinterpret_cast<void**>(&keys), capacity_ * sizeof(KeyType));
    xpu_malloc(reinterpret_cast<void**>(&vals), capacity_ * sizeof(ValType));
  }

  virtual ~XPUCacheArray() {
    xpu_free(keys);
    xpu_free(vals);
  }

  void print() {}
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99

#if defined(__xpu__)
  __device__ ValType* find(const KeyType& key) {
    for (int i = 0; i < size_; i++) {
      if (keys[i] == key) return &vals[i];
    }
    return NULL;
  }
  __device__ bool insert(const KeyType& key, const ValType& val) {
    // # NOTE(zhangminxu): we set the capacity larger than the feasign number of
    // one batch
    if (size_ == capacity_) {
      return false;
    } else {
      keys[size_] = key;
      vals[size_] = val;
      size_++;
      return true;
    }
  }
#endif
100

101
  int prefetch(const int dev_id, XPUStream stream = NULL) { return 0; }
102 103 104 105 106 107 108 109 110
  size_t size() { return size_; }

 private:
  long long capacity_;
  long long size_;
  KeyType* keys;
  ValType* vals;
};
#endif
T
Thunderbrook 已提交
111 112 113 114

template <typename KeyType, typename ValType>
class HashTable {
 public:
115
  explicit HashTable(size_t capacity);
T
Thunderbrook 已提交
116 117 118
  virtual ~HashTable();
  HashTable(const HashTable&) = delete;
  HashTable& operator=(const HashTable&) = delete;
119 120

  template <typename StreamType>
121 122 123
  void insert(const KeyType* d_keys,
              const ValType* d_vals,
              size_t len,
124 125 126
              StreamType stream);

  template <typename StreamType>
127 128 129 130 131 132
  void insert(const KeyType* d_keys,
              size_t len,
              char* pool,
              size_t feature_value_size,
              size_t start_index,
              StreamType stream);
133 134

  template <typename StreamType>
135 136 137
  void get(const KeyType* d_keys,
           ValType* d_vals,
           size_t len,
138 139
           StreamType stream);

D
danleifeng 已提交
140 141 142 143 144 145
  template <typename StreamType, typename FVAccessor>
  void get(const KeyType* d_keys,
           char* d_vals,
           size_t len,
           StreamType stream,
           FVAccessor& fv_accessor);
146

T
Thunderbrook 已提交
147 148
  void show();

149 150 151
  void set_sparse_sgd(const OptimizerConfig& optimizer_config);
  void set_embedx_sgd(const OptimizerConfig& optimizer_config);

152 153 154 155 156
  template <typename StreamType>
  void dump_to_cpu(int devid, StreamType stream);

#if defined(PADDLE_WITH_CUDA)

D
danleifeng 已提交
157
  template <typename Sgd, typename StreamType>
158
  void update(const KeyType* d_keys,
D
danleifeng 已提交
159
              const float* d_grads,
160 161 162
              size_t len,
              Sgd sgd,
              StreamType stream);
T
Thunderbrook 已提交
163

164
  template <typename Sgd, typename StreamType>
165 166 167 168
  void update(const KeyType* d_keys,
              const char* d_grads,
              size_t len,
              Sgd sgd,
169 170 171 172
              StreamType stream);

#elif defined(PADDLE_WITH_XPU_KP)
  template <typename GradType, typename StreamType>
173 174 175
  void update(const KeyType* d_keys,
              const GradType* d_grads,
              size_t len,
176 177 178
              StreamType stream);

  template <typename StreamType>
179 180 181
  void update(const KeyType* d_keys,
              const char* d_grads,
              size_t len,
182 183 184
              StreamType stream);

#endif
185

186 187
  int size() { return container_->size(); }

188 189 190 191 192 193 194 195
  void set_feature_value_size(size_t pull_feature_value_size,
                              size_t push_grad_value_size) {
    pull_feature_value_size_ = pull_feature_value_size;
    push_grad_value_size_ = push_grad_value_size;
    VLOG(3) << "hashtable set pull value size: " << pull_feature_value_size_
            << " push value size: " << push_grad_value_size_;
  }

196
  std::unique_ptr<phi::RWLock> rwlock_{nullptr};
197

T
Thunderbrook 已提交
198
 private:
199
#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
200
  TableContainer<KeyType, ValType>* container_;
201 202 203
#elif defined(PADDLE_WITH_XPU_KP)
  XPUCacheArray<KeyType, ValType>* container_;
#endif
Z
zmxdream 已提交
204 205 206
  OptimizerConfig* device_optimizer_config_;
  OptimizerConfig host_optimizer_config_;

T
Thunderbrook 已提交
207 208 209
  int BLOCK_SIZE_{256};
  float LOAD_FACTOR{0.75f};
  size_t capacity_;
210 211 212
  size_t max_mf_dim_ = 8;
  size_t pull_feature_value_size_;
  size_t push_grad_value_size_;
T
Thunderbrook 已提交
213 214 215 216
};
}  // end namespace framework
}  // end namespace paddle
#endif