tsimplehash.c 11.3 KB
Newer Older
H
Haojun Liao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "tsimplehash.h"
#include "taoserror.h"
C
Cary Xu 已提交
18
#include "tlog.h"
H
Haojun Liao 已提交
19
#include "tdef.h"
H
Haojun Liao 已提交
20

H
Haojun Liao 已提交
21
#define DEFAULT_BUF_PAGE_SIZE     1024
H
Haojun Liao 已提交
22
#define SHASH_DEFAULT_LOAD_FACTOR 0.75
H
Haojun Liao 已提交
23
#define HASH_MAX_CAPACITY         (1024 * 1024 * 16L)
H
Haojun Liao 已提交
24 25
#define SHASH_NEED_RESIZE(_h)     ((_h)->size >= (_h)->capacity * SHASH_DEFAULT_LOAD_FACTOR)

H
Haojun Liao 已提交
26 27
#define GET_SHASH_NODE_DATA(_n)     (((SHNode*)_n)->data)
#define GET_SHASH_NODE_KEY(_n, _dl) ((char*)GET_SHASH_NODE_DATA(_n) + (_dl))
H
Haojun Liao 已提交
28

C
Cary Xu 已提交
29
#define HASH_INDEX(v, c) ((v) & ((c)-1))
H
Haojun Liao 已提交
30

C
Cary Xu 已提交
31 32 33
#define FREE_HASH_NODE(_n)   \
  do {                       \
    taosMemoryFreeClear(_n); \
H
Haojun Liao 已提交
34 35
  } while (0);

36
struct SSHashObj {
C
Cary Xu 已提交
37 38
  SHNode    **hashList;
  size_t      capacity;  // number of slots
39 40 41
  int64_t     size;      // number of elements in hash table
  _hash_fn_t  hashFp;    // hash function
  _equal_fn_t equalFp;   // equal function
H
Haojun Liao 已提交
42 43
  SArray*     pHashNodeBuf;// hash node allocation buffer, 1k size of each page by default
  int32_t     offset;      // allocation offset in current page
44
};
H
Haojun Liao 已提交
45 46

static FORCE_INLINE int32_t taosHashCapacity(int32_t length) {
wafwerar's avatar
wafwerar 已提交
47
  int32_t len = (length < HASH_MAX_CAPACITY ? length : HASH_MAX_CAPACITY);
H
Haojun Liao 已提交
48 49 50 51 52 53

  int32_t i = 4;
  while (i < len) i = (i << 1u);
  return i;
}

C
Cary Xu 已提交
54
SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn) {
H
Haojun Liao 已提交
55 56 57
  if (fn == NULL) {
    return NULL;
  }
H
Haojun Liao 已提交
58 59 60 61 62

  if (capacity == 0) {
    capacity = 4;
  }

H
Haojun Liao 已提交
63
  SSHashObj *pHashObj = (SSHashObj *)taosMemoryMalloc(sizeof(SSHashObj));
C
Cary Xu 已提交
64
  if (!pHashObj) {
H
Haojun Liao 已提交
65 66 67 68 69
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  // the max slots is not defined by user
H
Haojun Liao 已提交
70
  pHashObj->hashFp = fn;
H
Haojun Liao 已提交
71 72 73
  pHashObj->capacity = taosHashCapacity((int32_t)capacity);
  pHashObj->equalFp = memcmp;

H
Haojun Liao 已提交
74 75 76 77
  pHashObj->pHashNodeBuf = taosArrayInit(10, sizeof(void*));
  pHashObj->offset = 0;
  pHashObj->size = 0;
  
wafwerar's avatar
wafwerar 已提交
78
  pHashObj->hashList = (SHNode **)taosMemoryCalloc(pHashObj->capacity, sizeof(void *));
C
Cary Xu 已提交
79
  if (!pHashObj->hashList) {
wafwerar's avatar
wafwerar 已提交
80
    taosMemoryFree(pHashObj);
H
Haojun Liao 已提交
81 82 83 84 85 86 87
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
  return pHashObj;
}

int32_t tSimpleHashGetSize(const SSHashObj *pHashObj) {
C
Cary Xu 已提交
88
  if (!pHashObj) {
H
Haojun Liao 已提交
89 90
    return 0;
  }
91
  return (int32_t) pHashObj->size;
H
Haojun Liao 已提交
92 93
}

H
Haojun Liao 已提交
94 95 96 97
static void* doInternalAlloc(SSHashObj* pHashObj, int32_t size) {
  void** p = taosArrayGetLast(pHashObj->pHashNodeBuf);
  if (p == NULL || (pHashObj->offset + size) > DEFAULT_BUF_PAGE_SIZE) {
    // let's allocate one new page
98 99
    int32_t allocSize = TMAX(size, DEFAULT_BUF_PAGE_SIZE);
    void* pNewPage = taosMemoryMalloc(allocSize);
H
Haojun Liao 已提交
100 101 102 103
    if (pNewPage == NULL) {
      return NULL;
    }

104 105 106
    // if the allocate the buffer page is greater than the DFFAULT_BUF_PAGE_SIZE,
    // pHashObj->offset will always be greater than DEFAULT_BUF_PAGE_SIZE, which means that
    // current buffer page is full. And a new buffer page needs to be allocated.
H
Haojun Liao 已提交
107 108 109 110 111 112 113 114 115 116
    pHashObj->offset = size;
    taosArrayPush(pHashObj->pHashNodeBuf, &pNewPage);
    return pNewPage;
  } else {
    void* pPos = (*p) + pHashObj->offset;
    pHashObj->offset += size;
    return pPos;
  }
}

117 118
static SHNode *doCreateHashNode(SSHashObj *pHashObj, const void *key, size_t keyLen, const void *data, size_t dataLen,
                                uint32_t hashVal) {
H
Haojun Liao 已提交
119
  SHNode *pNewNode = doInternalAlloc(pHashObj, sizeof(SHNode) + keyLen + dataLen);
C
Cary Xu 已提交
120
  if (!pNewNode) {
H
Haojun Liao 已提交
121 122 123
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }
H
Haojun Liao 已提交
124

C
Cary Xu 已提交
125 126
  pNewNode->keyLen = keyLen;
  pNewNode->dataLen = dataLen;
H
Haojun Liao 已提交
127
  pNewNode->next = NULL;
128 129
  pNewNode->hashVal = hashVal;

H
Haojun Liao 已提交
130 131 132 133
  if (data) {
    memcpy(GET_SHASH_NODE_DATA(pNewNode), data, dataLen);
  }

C
Cary Xu 已提交
134
  memcpy(GET_SHASH_NODE_KEY(pNewNode, dataLen), key, keyLen);
H
Haojun Liao 已提交
135 136 137
  return pNewNode;
}

C
Cary Xu 已提交
138
static void tSimpleHashTableResize(SSHashObj *pHashObj) {
C
Cary Xu 已提交
139
  if (!SHASH_NEED_RESIZE(pHashObj)) {
H
Haojun Liao 已提交
140 141 142 143 144
    return;
  }

  int32_t newCapacity = (int32_t)(pHashObj->capacity << 1u);
  if (newCapacity > HASH_MAX_CAPACITY) {
145 146
    uDebug("current capacity:%" PRIzu ", maximum capacity:%" PRId32 ", no resize applied due to limitation is reached",
           pHashObj->capacity, (int32_t)HASH_MAX_CAPACITY);
H
Haojun Liao 已提交
147 148 149
    return;
  }

150
//  int64_t st = taosGetTimestampUs();
H
Haojun Liao 已提交
151
  void   *pNewEntryList = taosMemoryRealloc(pHashObj->hashList, POINTER_BYTES * newCapacity);
C
Cary Xu 已提交
152
  if (!pNewEntryList) {
C
Cary Xu 已提交
153
    uWarn("hash resize failed due to out of memory, capacity remain:%zu", pHashObj->capacity);
H
Haojun Liao 已提交
154 155 156 157
    return;
  }

  size_t inc = newCapacity - pHashObj->capacity;
H
Haojun Liao 已提交
158
  memset((char *)pNewEntryList + pHashObj->capacity * POINTER_BYTES, 0, inc * sizeof(void *));
H
Haojun Liao 已提交
159 160 161 162 163

  pHashObj->hashList = pNewEntryList;
  pHashObj->capacity = newCapacity;

  for (int32_t idx = 0; idx < pHashObj->capacity; ++idx) {
C
Cary Xu 已提交
164
    SHNode *pNode = pHashObj->hashList[idx];
C
Cary Xu 已提交
165
    if (!pNode) {
H
Haojun Liao 已提交
166 167 168
      continue;
    }

C
Cary Xu 已提交
169
    SHNode *pNext = NULL;
C
Cary Xu 已提交
170 171
    SHNode *pPrev = NULL;

H
Haojun Liao 已提交
172
    while (pNode != NULL) {
173
      int32_t newIdx = HASH_INDEX(pNode->hashVal, pHashObj->capacity);
H
Haojun Liao 已提交
174 175
      pNext = pNode->next;
      if (newIdx != idx) {
C
Cary Xu 已提交
176
        if (!pPrev) {
H
Haojun Liao 已提交
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
          pHashObj->hashList[idx] = pNext;
        } else {
          pPrev->next = pNext;
        }

        pNode->next = pHashObj->hashList[newIdx];
        pHashObj->hashList[newIdx] = pNode;
      } else {
        pPrev = pNode;
      }

      pNode = pNext;
    }
  }

192
//  int64_t et = taosGetTimestampUs();
C
Cary Xu 已提交
193 194 195
  //  uDebug("hash table resize completed, new capacity:%d, load factor:%f, elapsed time:%fms",
  //  (int32_t)pHashObj->capacity,
  //         ((double)pHashObj->size) / pHashObj->capacity, (et - st) / 1000.0);
H
Haojun Liao 已提交
196 197
}

C
Cary Xu 已提交
198
int32_t tSimpleHashPut(SSHashObj *pHashObj, const void *key, size_t keyLen, const void *data, size_t dataLen) {
C
Cary Xu 已提交
199
  if (!pHashObj || !key) {
H
Haojun Liao 已提交
200 201 202
    return -1;
  }

C
Cary Xu 已提交
203
  uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen);
H
Haojun Liao 已提交
204 205 206

  // need the resize process, write lock applied
  if (SHASH_NEED_RESIZE(pHashObj)) {
C
Cary Xu 已提交
207
    tSimpleHashTableResize(pHashObj);
H
Haojun Liao 已提交
208 209 210 211 212
  }

  int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity);

  SHNode *pNode = pHashObj->hashList[slot];
C
Cary Xu 已提交
213
  if (!pNode) {
214
    SHNode *pNewNode = doCreateHashNode(pHashObj, key, keyLen, data, dataLen, hashVal);
C
Cary Xu 已提交
215
    if (!pNewNode) {
H
Haojun Liao 已提交
216 217 218 219
      return -1;
    }

    pHashObj->hashList[slot] = pNewNode;
220
    pHashObj->size += 1;
H
Haojun Liao 已提交
221 222 223 224
    return 0;
  }

  while (pNode) {
225
    if ((keyLen == pNode->keyLen) && (*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) {
H
Haojun Liao 已提交
226 227 228 229 230
      break;
    }
    pNode = pNode->next;
  }

C
Cary Xu 已提交
231
  if (!pNode) {
232
    SHNode *pNewNode = doCreateHashNode(pHashObj, key, keyLen, data, dataLen, hashVal);
C
Cary Xu 已提交
233
    if (!pNewNode) {
H
Haojun Liao 已提交
234 235 236 237
      return -1;
    }
    pNewNode->next = pHashObj->hashList[slot];
    pHashObj->hashList[slot] = pNewNode;
238
    pHashObj->size += 1;
239
  } else if (data) {  // update data
C
Cary Xu 已提交
240
    memcpy(GET_SHASH_NODE_DATA(pNode), data, dataLen);
H
Haojun Liao 已提交
241 242 243 244 245
  }

  return 0;
}

C
Cary Xu 已提交
246
static FORCE_INLINE SHNode *doSearchInEntryList(SSHashObj *pHashObj, const void *key, size_t keyLen, int32_t index) {
H
Haojun Liao 已提交
247 248
  SHNode *pNode = pHashObj->hashList[index];
  while (pNode) {
249 250 251 252
    const char* p = GET_SHASH_NODE_KEY(pNode, pNode->dataLen);
    ASSERT(keyLen > 0);

    if (pNode->keyLen == keyLen && ((*(pHashObj->equalFp))(p, key, keyLen) == 0)) {
H
Haojun Liao 已提交
253 254 255 256 257 258 259 260
      break;
    }
    pNode = pNode->next;
  }

  return pNode;
}

C
Cary Xu 已提交
261
static FORCE_INLINE bool taosHashTableEmpty(const SSHashObj *pHashObj) { return tSimpleHashGetSize(pHashObj) == 0; }
H
Haojun Liao 已提交
262

C
Cary Xu 已提交
263
void *tSimpleHashGet(SSHashObj *pHashObj, const void *key, size_t keyLen) {
C
Cary Xu 已提交
264
  if (!pHashObj || taosHashTableEmpty(pHashObj) || !key) {
H
Haojun Liao 已提交
265 266 267
    return NULL;
  }

C
Cary Xu 已提交
268
  uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen);
H
Haojun Liao 已提交
269 270 271

  int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity);
  SHNode *pNode = pHashObj->hashList[slot];
C
Cary Xu 已提交
272
  if (!pNode) {
H
Haojun Liao 已提交
273 274 275 276
    return NULL;
  }

  char *data = NULL;
C
Cary Xu 已提交
277
  pNode = doSearchInEntryList(pHashObj, key, keyLen, slot);
H
Haojun Liao 已提交
278 279 280 281 282 283 284
  if (pNode != NULL) {
    data = GET_SHASH_NODE_DATA(pNode);
  }

  return data;
}

C
Cary Xu 已提交
285
int32_t tSimpleHashRemove(SSHashObj *pHashObj, const void *key, size_t keyLen) {
5
54liuyao 已提交
286
  int32_t code = TSDB_CODE_FAILED;
C
Cary Xu 已提交
287
  if (!pHashObj || !key) {
5
54liuyao 已提交
288
    return code;
C
Cary Xu 已提交
289 290
  }

C
Cary Xu 已提交
291
  uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen);
C
Cary Xu 已提交
292 293 294 295 296 297

  int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity);

  SHNode *pNode = pHashObj->hashList[slot];
  SHNode *pPrev = NULL;
  while (pNode) {
C
Cary Xu 已提交
298
    if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) {
C
Cary Xu 已提交
299 300 301 302 303 304
      if (!pPrev) {
        pHashObj->hashList[slot] = pNode->next;
      } else {
        pPrev->next = pNode->next;
      }
      FREE_HASH_NODE(pNode);
305
      pHashObj->size -= 1;
5
54liuyao 已提交
306
      code = TSDB_CODE_SUCCESS;
C
Cary Xu 已提交
307 308 309 310 311 312
      break;
    }
    pPrev = pNode;
    pNode = pNode->next;
  }

5
54liuyao 已提交
313
  return code;
H
Haojun Liao 已提交
314 315
}

316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
int32_t tSimpleHashIterateRemove(SSHashObj *pHashObj, const void *key, size_t keyLen, void **pIter, int32_t *iter) {
  if (!pHashObj || !key) {
    return TSDB_CODE_FAILED;
  }

  uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen);

  int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity);

  SHNode *pNode = pHashObj->hashList[slot];
  SHNode *pPrev = NULL;
  while (pNode) {
    if ((*(pHashObj->equalFp))(GET_SHASH_NODE_KEY(pNode, pNode->dataLen), key, keyLen) == 0) {
      if (!pPrev) {
        pHashObj->hashList[slot] = pNode->next;
      } else {
        pPrev->next = pNode->next;
      }

335
      if (*pIter == (void *)GET_SHASH_NODE_DATA(pNode)) {
C
Cary Xu 已提交
336
        *pIter = pPrev ? GET_SHASH_NODE_DATA(pPrev) : NULL;
337 338 339
      }

      FREE_HASH_NODE(pNode);
340
      pHashObj->size -= 1;
341 342 343 344 345 346 347 348 349
      break;
    }
    pPrev = pNode;
    pNode = pNode->next;
  }

  return TSDB_CODE_SUCCESS;
}

350 351 352 353
static void destroyItems(void* pItem) {
  taosMemoryFree(*(void**)pItem);
}

H
Haojun Liao 已提交
354
void tSimpleHashClear(SSHashObj *pHashObj) {
C
Cary Xu 已提交
355
  if (!pHashObj || taosHashTableEmpty(pHashObj)) {
H
Haojun Liao 已提交
356 357 358
    return;
  }

359 360 361 362
  memset(pHashObj->hashList, 0, pHashObj->capacity * sizeof(void*));
  taosArrayClearEx(pHashObj->pHashNodeBuf, destroyItems);
  pHashObj->offset = 0;
  pHashObj->size = 0;
H
Haojun Liao 已提交
363 364
}

H
Haojun Liao 已提交
365
void tSimpleHashCleanup(SSHashObj *pHashObj) {
C
Cary Xu 已提交
366
  if (!pHashObj) {
H
Haojun Liao 已提交
367 368 369 370
    return;
  }

  tSimpleHashClear(pHashObj);
H
Haojun Liao 已提交
371
  taosArrayDestroy(pHashObj->pHashNodeBuf);
wafwerar's avatar
wafwerar 已提交
372
  taosMemoryFreeClear(pHashObj->hashList);
C
Cary Xu 已提交
373
  taosMemoryFree(pHashObj);
H
Haojun Liao 已提交
374 375 376
}

size_t tSimpleHashGetMemSize(const SSHashObj *pHashObj) {
C
Cary Xu 已提交
377
  if (!pHashObj) {
H
Haojun Liao 已提交
378 379 380 381 382 383
    return 0;
  }

  return (pHashObj->capacity * sizeof(void *)) + sizeof(SHNode) * tSimpleHashGetSize(pHashObj) + sizeof(SSHashObj);
}

C
Cary Xu 已提交
384 385 386
void *tSimpleHashIterate(const SSHashObj *pHashObj, void *data, int32_t *iter) {
  if (!pHashObj) {
    return NULL;
C
Cary Xu 已提交
387 388
  }

C
Cary Xu 已提交
389 390 391
  SHNode *pNode = NULL;

  if (!data) {
392
    for (int32_t i = *iter; i < pHashObj->capacity; ++i) {
C
Cary Xu 已提交
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
      pNode = pHashObj->hashList[i];
      if (!pNode) {
        continue;
      }
      *iter = i;
      return GET_SHASH_NODE_DATA(pNode);
    }
    return NULL;
  }

  pNode = (SHNode *)((char *)data - offsetof(SHNode, data));

  if (pNode->next) {
    return GET_SHASH_NODE_DATA(pNode->next);
  }

  ++(*iter);
  for (int32_t i = *iter; i < pHashObj->capacity; ++i) {
    pNode = pHashObj->hashList[i];
    if (!pNode) {
      continue;
    }
    *iter = i;
    return GET_SHASH_NODE_DATA(pNode);
  }

C
Cary Xu 已提交
419
  return NULL;
420
}