thashutil.c 5.9 KB
Newer Older
S
TD-4088  
Shengliang Guan 已提交
1 2
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
H
hzcheng 已提交
3
 *
S
TD-4088  
Shengliang Guan 已提交
4 5 6
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
H
hzcheng 已提交
7
 *
S
TD-4088  
Shengliang Guan 已提交
8 9 10 11 12 13
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
H
hzcheng 已提交
14
 */
S
TD-4088  
Shengliang Guan 已提交
15

S
hash  
Shengliang Guan 已提交
16
#define _DEFAULT_SOURCE
S
compare  
Shengliang Guan 已提交
17
#include "tcompare.h"
H
Hongze Cheng 已提交
18
#include "thash.h"
H
Hongze Cheng 已提交
19
#include "types.h"
20
#include "xxhash.h"
H
hzcheng 已提交
21

H
Haojun Liao 已提交
22
#define ROTL32(x, r) ((x) << (r) | (x) >> (32u - (r)))
H
hzcheng 已提交
23

H
Hongze Cheng 已提交
24
#define DLT  (FLT_COMPAR_TOL_FACTOR * FLT_EPSILON)
25 26
#define BASE 1000

H
hzcheng 已提交
27 28 29 30 31 32
#define FMIX32(h)      \
  do {                 \
    (h) ^= (h) >> 16;  \
    (h) *= 0x85ebca6b; \
    (h) ^= (h) >> 13;  \
    (h) *= 0xc2b2ae35; \
H
Hongze Cheng 已提交
33 34
    (h) ^= (h) >> 16;  \
  } while (0)
G
Ganlin Zhao 已提交
35

L
Liu Jicong 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
uint32_t taosFastHash(const char *key, uint32_t len) {
  uint32_t result = 0x55555555;
  for (uint32_t i = 0; i < len; i++) {
    result ^= (uint8_t)key[i];
    result = ROTL32(result, 5);
  }
  return result;
}

uint32_t taosDJB2Hash(const char *key, uint32_t len) {
  uint32_t hash = 5381;
  for (uint32_t i = 0; i < len; i++) {
    hash = ((hash << 5) + hash) + (uint8_t)key[i]; /* hash * 33 + c */
  }
  return hash;
}

53 54 55 56 57
uint32_t xxHash(const char *key, uint32_t len) {
  int32_t seed = 0xcc9e2d51;
  return XXH32(key, len, seed);
}

H
Haojun Liao 已提交
58
uint32_t MurmurHash3_32(const char *key, uint32_t len) {
H
hzcheng 已提交
59
  const uint8_t *data = (const uint8_t *)key;
H
Hongze Cheng 已提交
60
  const int32_t  nblocks = len >> 2u;
H
hzcheng 已提交
61

H
Haojun Liao 已提交
62
  uint32_t h1 = 0x12345678;
H
hzcheng 已提交
63 64 65 66 67 68

  const uint32_t c1 = 0xcc9e2d51;
  const uint32_t c2 = 0x1b873593;

  const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);

S
hash  
Shengliang Guan 已提交
69
  for (int32_t i = -nblocks; i; i++) {
H
hzcheng 已提交
70 71 72
    uint32_t k1 = blocks[i];

    k1 *= c1;
H
Haojun Liao 已提交
73
    k1 = ROTL32(k1, 15u);
H
hzcheng 已提交
74 75 76
    k1 *= c2;

    h1 ^= k1;
H
Haojun Liao 已提交
77
    h1 = ROTL32(h1, 13u);
H
hzcheng 已提交
78 79 80 81 82 83 84
    h1 = h1 * 5 + 0xe6546b64;
  }

  const uint8_t *tail = (data + nblocks * 4);

  uint32_t k1 = 0;

H
Haojun Liao 已提交
85
  switch (len & 3u) {
H
hzcheng 已提交
86 87 88 89 90 91 92
    case 3:
      k1 ^= tail[2] << 16;
    case 2:
      k1 ^= tail[1] << 8;
    case 1:
      k1 ^= tail[0];
      k1 *= c1;
H
Haojun Liao 已提交
93
      k1 = ROTL32(k1, 15u);
H
hzcheng 已提交
94 95 96 97 98 99 100 101
      k1 *= c2;
      h1 ^= k1;
  };

  h1 ^= len;

  FMIX32(h1);

H
Haojun Liao 已提交
102
  return h1;
H
hzcheng 已提交
103
}
104

G
Ganlin Zhao 已提交
105 106
uint64_t MurmurHash3_64(const char *key, uint32_t len) {
  const uint64_t m = 0x87c37b91114253d5;
H
Hongze Cheng 已提交
107 108 109
  const int      r = 47;
  uint32_t       seed = 0x12345678;
  uint64_t       h = seed ^ (len * m);
G
Ganlin Zhao 已提交
110
  const uint8_t *data = (const uint8_t *)key;
H
Hongze Cheng 已提交
111
  const uint8_t *end = data + (len - (len & 7));
G
Ganlin Zhao 已提交
112

H
Hongze Cheng 已提交
113 114
  while (data != end) {
    uint64_t k = *((uint64_t *)data);
G
Ganlin Zhao 已提交
115 116 117 118 119 120 121 122 123

    k *= m;
    k ^= k >> r;
    k *= m;
    h ^= k;
    h *= m;
    data += 8;
  }

H
Hongze Cheng 已提交
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
  switch (len & 7) {
    case 7:
      h ^= (uint64_t)data[6] << 48; /* fall-thru */
    case 6:
      h ^= (uint64_t)data[5] << 40; /* fall-thru */
    case 5:
      h ^= (uint64_t)data[4] << 32; /* fall-thru */
    case 4:
      h ^= (uint64_t)data[3] << 24; /* fall-thru */
    case 3:
      h ^= (uint64_t)data[2] << 16; /* fall-thru */
    case 2:
      h ^= (uint64_t)data[1] << 8; /* fall-thru */
    case 1:
      h ^= (uint64_t)data[0];
G
Ganlin Zhao 已提交
139 140 141 142 143 144 145 146 147
      h *= m; /* fall-thru */
  };

  h ^= h >> r;
  h *= m;
  h ^= h >> r;
  return h;
}

148
uint32_t taosIntHash_32(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint32_t *)key; }
H
hjxilinx 已提交
149 150
uint32_t taosIntHash_16(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint16_t *)key; }
uint32_t taosIntHash_8(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint8_t *)key; }
151
uint32_t taosFloatHash(const char *key, uint32_t UNUSED_PARAM(len)) {
G
Ganlin Zhao 已提交
152
  float f = GET_FLOAT_VAL(key);
153 154 155
  if (isnan(f)) {
    return 0x7fc00000;
  }
G
Ganlin Zhao 已提交
156

157
  if (FLT_EQUAL(f, 0.0)) {
158
    return 0;
G
Ganlin Zhao 已提交
159
  }
H
Hongze Cheng 已提交
160 161 162
  if (fabs(f) < FLT_MAX / BASE - DLT) {
    int32_t t = (int32_t)(round(BASE * (f + DLT)));
    return (uint32_t)t;
Y
yihaoDeng 已提交
163
  } else {
H
Hongze Cheng 已提交
164
    return 0x7fc00000;
165
  }
166 167
}
uint32_t taosDoubleHash(const char *key, uint32_t UNUSED_PARAM(len)) {
G
Ganlin Zhao 已提交
168
  double f = GET_DOUBLE_VAL(key);
169 170 171
  if (isnan(f)) {
    return 0x7fc00000;
  }
172 173

  if (FLT_EQUAL(f, 0.0)) {
174
    return 0;
G
Ganlin Zhao 已提交
175
  }
H
Hongze Cheng 已提交
176 177 178
  if (fabs(f) < DBL_MAX / BASE - DLT) {
    int32_t t = (int32_t)(round(BASE * (f + DLT)));
    return (uint32_t)t;
Y
yihaoDeng 已提交
179
  } else {
H
Hongze Cheng 已提交
180
    return 0x7fc00000;
G
Ganlin Zhao 已提交
181
  }
182
}
183 184 185 186 187
uint32_t taosIntHash_64(const char *key, uint32_t UNUSED_PARAM(len)) {
  uint64_t val = *(uint64_t *)key;

  uint64_t hash = val >> 16U;
  hash += (val & 0xFFFFU);
G
Ganlin Zhao 已提交
188

S
TD-1037  
Shengliang Guan 已提交
189
  return (uint32_t)hash;
190 191 192 193
}

_hash_fn_t taosGetDefaultHashFunction(int32_t type) {
  _hash_fn_t fn = NULL;
H
Hongze Cheng 已提交
194
  switch (type) {
H
hjxilinx 已提交
195
    case TSDB_DATA_TYPE_TIMESTAMP:
W
wpan 已提交
196
    case TSDB_DATA_TYPE_UBIGINT:
G
Ganlin Zhao 已提交
197
    case TSDB_DATA_TYPE_BIGINT:
W
wpan 已提交
198 199
      fn = taosIntHash_64;
      break;
G
Ganlin Zhao 已提交
200 201
    case TSDB_DATA_TYPE_BINARY:
    case TSDB_DATA_TYPE_NCHAR:
202
      fn = xxHash;
W
wpan 已提交
203 204
      break;
    case TSDB_DATA_TYPE_UINT:
G
Ganlin Zhao 已提交
205 206
    case TSDB_DATA_TYPE_INT:
      fn = taosIntHash_32;
W
wpan 已提交
207
      break;
G
Ganlin Zhao 已提交
208 209 210
    case TSDB_DATA_TYPE_SMALLINT:
    case TSDB_DATA_TYPE_USMALLINT:
      fn = taosIntHash_16;
W
wpan 已提交
211 212 213
      break;
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_UTINYINT:
G
Ganlin Zhao 已提交
214 215 216 217 218
    case TSDB_DATA_TYPE_TINYINT:
      fn = taosIntHash_8;
      break;
    case TSDB_DATA_TYPE_FLOAT:
      fn = taosFloatHash;
W
wpan 已提交
219
      break;
G
Ganlin Zhao 已提交
220 221 222 223
    case TSDB_DATA_TYPE_DOUBLE:
      fn = taosDoubleHash;
      break;
    default:
W
wpan 已提交
224 225
      fn = taosIntHash_32;
      break;
226
  }
G
Ganlin Zhao 已提交
227

228
  return fn;
229 230
}

D
dapan1121 已提交
231 232 233 234 235 236 237
int32_t taosFloatEqual(const void *a, const void *b, size_t UNUSED_PARAM(sz)) {
  return getComparFunc(TSDB_DATA_TYPE_FLOAT, -1)(a, b);
}

int32_t taosDoubleEqual(const void *a, const void *b, size_t UNUSED_PARAM(sz)) {
  return getComparFunc(TSDB_DATA_TYPE_DOUBLE, -1)(a, b);
}
238 239 240

_equal_fn_t taosGetDefaultEqualFunction(int32_t type) {
  _equal_fn_t fn = NULL;
D
dapan1121 已提交
241
  switch (type) {
H
Hongze Cheng 已提交
242 243 244 245 246 247 248 249 250
    case TSDB_DATA_TYPE_FLOAT:
      fn = taosFloatEqual;
      break;
    case TSDB_DATA_TYPE_DOUBLE:
      fn = taosDoubleEqual;
      break;
    default:
      fn = memcmp;
      break;
D
dapan1121 已提交
251
  }
252 253
  return fn;
}