hashtable.h 5.7 KB
Newer Older
T
Thunderbrook 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
16
#ifdef PADDLE_WITH_HETERPS
T
Thunderbrook 已提交
17
#include <glog/logging.h>
T
Thunderbrook 已提交
18 19 20
#include <limits>
#include <memory>
#include <vector>
21

T
Thunderbrook 已提交
22
#ifdef PADDLE_WITH_PSLIB
T
Thunderbrook 已提交
23
#include "common_value.h"  // NOLINT
T
Thunderbrook 已提交
24
#endif
25 26

#if defined(PADDLE_WITH_PSCORE)
27
#include "paddle/fluid/distributed/ps/table/depends/feature_value.h"
T
Thunderbrook 已提交
28
#endif
29
#include "paddle/fluid/framework/fleet/heter_ps/feature_value.h"
30
#include "paddle/phi/core/utils/rw_lock.h"
31 32

#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
33
#include "paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h"
34
#include "paddle/fluid/framework/fleet/heter_ps/mem_pool.h"
35
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
36 37 38 39 40 41 42
#include "thrust/pair.h"
#elif defined(__xpu__)
#include <xpu/runtime.h>
#include "xpu/kernel/cluster_header.h"
#include "xpu/kernel/math.h"
#include "xpu/kernel/simd.h"
#endif
T
Thunderbrook 已提交
43

44 45
#include "paddle/fluid/framework/fleet/heter_ps/optimizer_conf.h"

T
Thunderbrook 已提交
46 47 48
namespace paddle {
namespace framework {

49
#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
50 51 52 53 54 55 56 57 58 59
template <typename KeyType, typename ValType>
class TableContainer
    : public concurrent_unordered_map<KeyType, ValType,
                                      std::numeric_limits<KeyType>::max()> {
 public:
  TableContainer(size_t capacity)
      : concurrent_unordered_map<KeyType, ValType,
                                 std::numeric_limits<KeyType>::max()>(
            capacity, ValType()) {}
};
60 61 62 63
#elif defined(PADDLE_WITH_XPU_KP)
template <typename KeyType, typename ValType>
class XPUCacheArray {
 public:
64
  explicit XPUCacheArray(long long capacity) : capacity_(capacity), size_(0) {
65 66 67 68 69 70 71 72 73 74
    xpu_malloc(reinterpret_cast<void**>(&keys), capacity_ * sizeof(KeyType));
    xpu_malloc(reinterpret_cast<void**>(&vals), capacity_ * sizeof(ValType));
  }

  virtual ~XPUCacheArray() {
    xpu_free(keys);
    xpu_free(vals);
  }

  void print() {}
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95

#if defined(__xpu__)
  __device__ ValType* find(const KeyType& key) {
    for (int i = 0; i < size_; i++) {
      if (keys[i] == key) return &vals[i];
    }
    return NULL;
  }
  __device__ bool insert(const KeyType& key, const ValType& val) {
    // # NOTE(zhangminxu): we set the capacity larger than the feasign number of
    // one batch
    if (size_ == capacity_) {
      return false;
    } else {
      keys[size_] = key;
      vals[size_] = val;
      size_++;
      return true;
    }
  }
#endif
96

97
  int prefetch(const int dev_id, XPUStream stream = NULL) { return 0; }
98 99 100 101 102 103 104 105 106
  size_t size() { return size_; }

 private:
  long long capacity_;
  long long size_;
  KeyType* keys;
  ValType* vals;
};
#endif
T
Thunderbrook 已提交
107 108 109 110

template <typename KeyType, typename ValType>
class HashTable {
 public:
111
  explicit HashTable(size_t capacity);
T
Thunderbrook 已提交
112 113 114
  virtual ~HashTable();
  HashTable(const HashTable&) = delete;
  HashTable& operator=(const HashTable&) = delete;
115 116

  template <typename StreamType>
T
Thunderbrook 已提交
117
  void insert(const KeyType* d_keys, const ValType* d_vals, size_t len,
118 119 120
              StreamType stream);

  template <typename StreamType>
Y
yaoxuefeng 已提交
121 122
  void insert(const KeyType* d_keys, size_t len, char* pool,
              size_t feature_value_size, size_t start_index, StreamType stream);
123 124

  template <typename StreamType>
T
Thunderbrook 已提交
125
  void get(const KeyType* d_keys, ValType* d_vals, size_t len,
126 127 128 129 130
           StreamType stream);

  template <typename StreamType>
  void get(const KeyType* d_keys, char* d_vals, size_t len, StreamType stream);

T
Thunderbrook 已提交
131 132
  void show();

133 134 135
  void set_sparse_sgd(const OptimizerConfig& optimizer_config);
  void set_embedx_sgd(const OptimizerConfig& optimizer_config);

136 137 138 139 140 141
  template <typename StreamType>
  void dump_to_cpu(int devid, StreamType stream);

#if defined(PADDLE_WITH_CUDA)

  template <typename GradType, typename Sgd, typename StreamType>
T
Thunderbrook 已提交
142
  void update(const KeyType* d_keys, const GradType* d_grads, size_t len,
143
              Sgd sgd, StreamType stream);
T
Thunderbrook 已提交
144

145
  template <typename Sgd, typename StreamType>
146
  void update(const KeyType* d_keys, const char* d_grads, size_t len, Sgd sgd,
147 148 149 150 151 152 153 154 155 156 157 158
              StreamType stream);

#elif defined(PADDLE_WITH_XPU_KP)
  template <typename GradType, typename StreamType>
  void update(const KeyType* d_keys, const GradType* d_grads, size_t len,
              StreamType stream);

  template <typename StreamType>
  void update(const KeyType* d_keys, const char* d_grads, size_t len,
              StreamType stream);

#endif
159

160 161
  int size() { return container_->size(); }

162 163 164 165 166 167 168 169
  void set_feature_value_size(size_t pull_feature_value_size,
                              size_t push_grad_value_size) {
    pull_feature_value_size_ = pull_feature_value_size;
    push_grad_value_size_ = push_grad_value_size;
    VLOG(3) << "hashtable set pull value size: " << pull_feature_value_size_
            << " push value size: " << push_grad_value_size_;
  }

170
  std::unique_ptr<phi::RWLock> rwlock_{nullptr};
171

T
Thunderbrook 已提交
172
 private:
173
#if defined(PADDLE_WITH_CUDA)
T
Thunderbrook 已提交
174
  TableContainer<KeyType, ValType>* container_;
175 176 177
#elif defined(PADDLE_WITH_XPU_KP)
  XPUCacheArray<KeyType, ValType>* container_;
#endif
Z
zmxdream 已提交
178 179 180
  OptimizerConfig* device_optimizer_config_;
  OptimizerConfig host_optimizer_config_;

T
Thunderbrook 已提交
181 182 183
  int BLOCK_SIZE_{256};
  float LOAD_FACTOR{0.75f};
  size_t capacity_;
184 185 186
  size_t max_mf_dim_ = 8;
  size_t pull_feature_value_size_;
  size_t push_grad_value_size_;
T
Thunderbrook 已提交
187 188 189 190
};
}  // end namespace framework
}  // end namespace paddle
#endif