tsimplehash.c 8.4 KB
Newer Older
H
Haojun Liao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "tsimplehash.h"
C
Cary Xu 已提交
17
#include "os.h"
H
Haojun Liao 已提交
18 19 20
#include "taoserror.h"

#define SHASH_DEFAULT_LOAD_FACTOR 0.75
C
Cary Xu 已提交
21
#define HASH_MAX_CAPACITY         (1024 * 1024 * 16)
H
Haojun Liao 已提交
22 23
#define SHASH_NEED_RESIZE(_h)     ((_h)->size >= (_h)->capacity * SHASH_DEFAULT_LOAD_FACTOR)

C
Cary Xu 已提交
24 25
#define GET_SHASH_NODE_KEY(_n, _dl) ((char *)(_n) + sizeof(SHNode) + (_dl))
#define GET_SHASH_NODE_DATA(_n)     ((char *)(_n) + sizeof(SHNode))
H
Haojun Liao 已提交
26

C
Cary Xu 已提交
27
#define HASH_INDEX(v, c) ((v) & ((c)-1))
H
Haojun Liao 已提交
28

C
Cary Xu 已提交
29 30 31
#define FREE_HASH_NODE(_n)   \
  do {                       \
    taosMemoryFreeClear(_n); \
H
Haojun Liao 已提交
32 33 34
  } while (0);

typedef struct SHNode {
C
Cary Xu 已提交
35 36
  struct SHNode *next;
  char           data[];
H
Haojun Liao 已提交
37 38
} SHNode;

39
struct SSHashObj {
C
Cary Xu 已提交
40 41 42 43 44 45 46
  SHNode    **hashList;
  size_t      capacity;  // number of slots
  int64_t     size;      // number of elements in hash table
  _hash_fn_t  hashFp;    // hash function
  _equal_fn_t equalFp;   // equal function
  int32_t     keyLen;
  int32_t     dataLen;
47
};
H
Haojun Liao 已提交
48 49

static FORCE_INLINE int32_t taosHashCapacity(int32_t length) {
wafwerar's avatar
wafwerar 已提交
50
  int32_t len = (length < HASH_MAX_CAPACITY ? length : HASH_MAX_CAPACITY);
H
Haojun Liao 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63

  int32_t i = 4;
  while (i < len) i = (i << 1u);
  return i;
}

SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn, size_t keyLen, size_t dataLen) {
  ASSERT(fn != NULL);

  if (capacity == 0) {
    capacity = 4;
  }

C
Cary Xu 已提交
64
  SSHashObj *pHashObj = (SSHashObj *)taosMemoryCalloc(1, sizeof(SSHashObj));
H
Haojun Liao 已提交
65 66 67 68 69 70 71 72 73
  if (pHashObj == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  // the max slots is not defined by user
  pHashObj->capacity = taosHashCapacity((int32_t)capacity);

  pHashObj->equalFp = memcmp;
C
Cary Xu 已提交
74
  pHashObj->hashFp = fn;
H
Haojun Liao 已提交
75 76 77 78 79
  ASSERT((pHashObj->capacity & (pHashObj->capacity - 1)) == 0);

  pHashObj->keyLen = keyLen;
  pHashObj->dataLen = dataLen;

wafwerar's avatar
wafwerar 已提交
80
  pHashObj->hashList = (SHNode **)taosMemoryCalloc(pHashObj->capacity, sizeof(void *));
H
Haojun Liao 已提交
81
  if (pHashObj->hashList == NULL) {
wafwerar's avatar
wafwerar 已提交
82
    taosMemoryFree(pHashObj);
H
Haojun Liao 已提交
83 84 85 86 87 88 89 90 91 92
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
  return pHashObj;
}

int32_t tSimpleHashGetSize(const SSHashObj *pHashObj) {
  if (pHashObj == NULL) {
    return 0;
  }
C
Cary Xu 已提交
93
  return (int32_t)atomic_load_64((int64_t *)&pHashObj->size);
H
Haojun Liao 已提交
94 95 96
}

static SHNode *doCreateHashNode(const void *key, size_t keyLen, const void *pData, size_t dsize, uint32_t hashVal) {
wafwerar's avatar
wafwerar 已提交
97
  SHNode *pNewNode = taosMemoryMalloc(sizeof(SHNode) + keyLen + dsize);
H
Haojun Liao 已提交
98 99 100 101 102 103 104 105 106 107 108
  if (pNewNode == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  pNewNode->next = NULL;
  memcpy(GET_SHASH_NODE_DATA(pNewNode), pData, dsize);
  memcpy(GET_SHASH_NODE_KEY(pNewNode, dsize), key, keyLen);
  return pNewNode;
}

109
static void taosHashTableResize(SSHashObj *pHashObj) {
C
Cary Xu 已提交
110
  if (!SHASH_NEED_RESIZE(pHashObj)) {
H
Haojun Liao 已提交
111 112 113 114 115
    return;
  }

  int32_t newCapacity = (int32_t)(pHashObj->capacity << 1u);
  if (newCapacity > HASH_MAX_CAPACITY) {
C
Cary Xu 已提交
116 117
    //    uDebug("current capacity:%zu, maximum capacity:%d, no resize applied due to limitation is reached",
    //           pHashObj->capacity, HASH_MAX_CAPACITY);
H
Haojun Liao 已提交
118 119 120 121
    return;
  }

  int64_t st = taosGetTimestampUs();
C
Cary Xu 已提交
122
  void   *pNewEntryList = taosMemoryRealloc(pHashObj->hashList, sizeof(void *) * newCapacity);
H
Haojun Liao 已提交
123
  if (pNewEntryList == NULL) {
C
Cary Xu 已提交
124
    //    qWarn("hash resize failed due to out of memory, capacity remain:%zu", pHashObj->capacity);
H
Haojun Liao 已提交
125 126 127 128
    return;
  }

  size_t inc = newCapacity - pHashObj->capacity;
C
Cary Xu 已提交
129
  memset((char *)pNewEntryList + pHashObj->capacity * sizeof(void *), 0, inc);
H
Haojun Liao 已提交
130 131 132 133 134

  pHashObj->hashList = pNewEntryList;
  pHashObj->capacity = newCapacity;

  for (int32_t idx = 0; idx < pHashObj->capacity; ++idx) {
C
Cary Xu 已提交
135
    SHNode *pNode = pHashObj->hashList[idx];
H
Haojun Liao 已提交
136 137 138 139
    if (pNode == NULL) {
      continue;
    }

C
Cary Xu 已提交
140
    SHNode *pNext = NULL;
C
Cary Xu 已提交
141 142 143
    SHNode *pPrev = NULL;


H
Haojun Liao 已提交
144
    while (pNode != NULL) {
C
Cary Xu 已提交
145
      void    *key = GET_SHASH_NODE_KEY(pNode, pHashObj->dataLen);
C
Cary Xu 已提交
146
      uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pHashObj->keyLen);
H
Haojun Liao 已提交
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168

      int32_t newIdx = HASH_INDEX(hashVal, pHashObj->capacity);
      pNext = pNode->next;
      if (newIdx != idx) {
        if (pPrev == NULL) {
          pHashObj->hashList[idx] = pNext;
        } else {
          pPrev->next = pNext;
        }

        pNode->next = pHashObj->hashList[newIdx];
        pHashObj->hashList[newIdx] = pNode;
      } else {
        pPrev = pNode;
      }

      pNode = pNext;
    }
  }

  int64_t et = taosGetTimestampUs();

C
Cary Xu 已提交
169 170 171
  //  uDebug("hash table resize completed, new capacity:%d, load factor:%f, elapsed time:%fms",
  //  (int32_t)pHashObj->capacity,
  //         ((double)pHashObj->size) / pHashObj->capacity, (et - st) / 1000.0);
H
Haojun Liao 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
}

int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, const void *data) {
  if (pHashObj == NULL || key == NULL) {
    return -1;
  }

  uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pHashObj->keyLen);

  // need the resize process, write lock applied
  if (SHASH_NEED_RESIZE(pHashObj)) {
    taosHashTableResize(pHashObj);
  }

  int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity);

  SHNode *pNode = pHashObj->hashList[slot];
  if (pNode == NULL) {
C
Cary Xu 已提交
190
    SHNode *pNewNode = doCreateHashNode(key, pHashObj->keyLen, data, pHashObj->dataLen, hashVal);
H
Haojun Liao 已提交
191 192 193 194 195
    if (pNewNode == NULL) {
      return -1;
    }

    pHashObj->hashList[slot] = pNewNode;
C
Cary Xu 已提交
196
    atomic_add_fetch_64(&pHashObj->size, 1);
H
Haojun Liao 已提交
197 198 199 200 201 202 203 204 205 206 207
    return 0;
  }

  while (pNode) {
    if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pHashObj->dataLen), key, pHashObj->keyLen) == 0) {
      break;
    }
    pNode = pNode->next;
  }

  if (pNode == NULL) {
C
Cary Xu 已提交
208
    SHNode *pNewNode = doCreateHashNode(key, pHashObj->keyLen, data, pHashObj->dataLen, hashVal);
H
Haojun Liao 已提交
209 210 211 212 213 214
    if (pNewNode == NULL) {
      return -1;
    }
    pNewNode->next = pHashObj->hashList[slot];
    pHashObj->hashList[slot] = pNewNode;
    atomic_add_fetch_64(&pHashObj->size, 1);
C
Cary Xu 已提交
215
  } else {  // update data
H
Haojun Liao 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
    memcpy(GET_SHASH_NODE_DATA(pNode), data, pHashObj->dataLen);
  }

  return 0;
}

static FORCE_INLINE SHNode *doSearchInEntryList(SSHashObj *pHashObj, const void *key, int32_t index) {
  SHNode *pNode = pHashObj->hashList[index];
  while (pNode) {
    if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pHashObj->dataLen), key, pHashObj->keyLen) == 0) {
      break;
    }

    pNode = pNode->next;
  }

  return pNode;
}

C
Cary Xu 已提交
235
static FORCE_INLINE bool taosHashTableEmpty(const SSHashObj *pHashObj) { return tSimpleHashGetSize(pHashObj) == 0; }
H
Haojun Liao 已提交
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260

void *tSimpleHashGet(SSHashObj *pHashObj, const void *key) {
  if (pHashObj == NULL || taosHashTableEmpty(pHashObj) || key == NULL) {
    return NULL;
  }

  uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pHashObj->keyLen);

  int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity);
  SHNode *pNode = pHashObj->hashList[slot];
  if (pNode == NULL) {
    return NULL;
  }

  char *data = NULL;
  pNode = doSearchInEntryList(pHashObj, key, slot);
  if (pNode != NULL) {
    data = GET_SHASH_NODE_DATA(pNode);
  }

  return data;
}

int32_t tSimpleHashRemove(SSHashObj *pHashObj, const void *key) {
  // todo
261
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
}

void tSimpleHashClear(SSHashObj *pHashObj) {
  if (pHashObj == NULL) {
    return;
  }

  SHNode *pNode, *pNext;
  for (int32_t i = 0; i < pHashObj->capacity; ++i) {
    pNode = pHashObj->hashList[i];
    if (pNode == NULL) {
      continue;
    }

    while (pNode) {
      pNext = pNode->next;
      FREE_HASH_NODE(pNode);
      pNode = pNext;
    }
  }
  pHashObj->size = 0;
}

void tSimpleHashCleanup(SSHashObj *pHashObj) {
  if (pHashObj == NULL) {
    return;
  }

  tSimpleHashClear(pHashObj);
wafwerar's avatar
wafwerar 已提交
291
  taosMemoryFreeClear(pHashObj->hashList);
H
Haojun Liao 已提交
292 293 294 295 296 297 298 299 300 301
}

size_t tSimpleHashGetMemSize(const SSHashObj *pHashObj) {
  if (pHashObj == NULL) {
    return 0;
  }

  return (pHashObj->capacity * sizeof(void *)) + sizeof(SHNode) * tSimpleHashGetSize(pHashObj) + sizeof(SSHashObj);
}

C
Cary Xu 已提交
302
void *tSimpleHashGetKey(const SSHashObj *pHashObj, void *data, size_t *keyLen) {
H
Haojun Liao 已提交
303
  int32_t offset = offsetof(SHNode, data);
C
Cary Xu 已提交
304
  SHNode *node = ((SHNode *)(char *)data - offset);
H
Haojun Liao 已提交
305 306 307 308 309 310
  if (keyLen != NULL) {
    *keyLen = pHashObj->keyLen;
  }

  return GET_SHASH_NODE_KEY(node, pHashObj->dataLen);
}