thashutil.c 5.8 KB
Newer Older
S
TD-4088  
Shengliang Guan 已提交
1 2
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
H
hzcheng 已提交
3
 *
S
TD-4088  
Shengliang Guan 已提交
4 5 6
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
H
hzcheng 已提交
7
 *
S
TD-4088  
Shengliang Guan 已提交
8 9 10 11 12 13
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
H
hzcheng 已提交
14
 */
S
TD-4088  
Shengliang Guan 已提交
15

S
hash  
Shengliang Guan 已提交
16
#define _DEFAULT_SOURCE
S
compare  
Shengliang Guan 已提交
17
#include "tcompare.h"
H
Hongze Cheng 已提交
18
#include "thash.h"
H
Hongze Cheng 已提交
19
#include "types.h"
20
#include "xxhash.h"
H
hzcheng 已提交
21

H
Haojun Liao 已提交
22
#define ROTL32(x, r) ((x) << (r) | (x) >> (32u - (r)))
H
hzcheng 已提交
23

H
Hongze Cheng 已提交
24
#define DLT  (FLT_COMPAR_TOL_FACTOR * FLT_EPSILON)
25 26
#define BASE 1000

H
hzcheng 已提交
27 28 29 30 31 32
#define FMIX32(h)      \
  do {                 \
    (h) ^= (h) >> 16;  \
    (h) *= 0x85ebca6b; \
    (h) ^= (h) >> 13;  \
    (h) *= 0xc2b2ae35; \
H
Hongze Cheng 已提交
33 34
    (h) ^= (h) >> 16;  \
  } while (0)
G
Ganlin Zhao 已提交
35

L
Liu Jicong 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
uint32_t taosFastHash(const char *key, uint32_t len) {
  uint32_t result = 0x55555555;
  for (uint32_t i = 0; i < len; i++) {
    result ^= (uint8_t)key[i];
    result = ROTL32(result, 5);
  }
  return result;
}

uint32_t taosDJB2Hash(const char *key, uint32_t len) {
  uint32_t hash = 5381;
  for (uint32_t i = 0; i < len; i++) {
    hash = ((hash << 5) + hash) + (uint8_t)key[i]; /* hash * 33 + c */
  }
  return hash;
}

H
Haojun Liao 已提交
53
uint32_t MurmurHash3_32(const char *key, uint32_t len) {
H
hzcheng 已提交
54
  const uint8_t *data = (const uint8_t *)key;
H
Hongze Cheng 已提交
55
  const int32_t  nblocks = len >> 2u;
H
hzcheng 已提交
56

H
Haojun Liao 已提交
57
  uint32_t h1 = 0x12345678;
H
hzcheng 已提交
58 59 60 61 62 63

  const uint32_t c1 = 0xcc9e2d51;
  const uint32_t c2 = 0x1b873593;

  const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);

S
hash  
Shengliang Guan 已提交
64
  for (int32_t i = -nblocks; i; i++) {
H
hzcheng 已提交
65 66 67
    uint32_t k1 = blocks[i];

    k1 *= c1;
H
Haojun Liao 已提交
68
    k1 = ROTL32(k1, 15u);
H
hzcheng 已提交
69 70 71
    k1 *= c2;

    h1 ^= k1;
H
Haojun Liao 已提交
72
    h1 = ROTL32(h1, 13u);
H
hzcheng 已提交
73 74 75 76 77 78 79
    h1 = h1 * 5 + 0xe6546b64;
  }

  const uint8_t *tail = (data + nblocks * 4);

  uint32_t k1 = 0;

H
Haojun Liao 已提交
80
  switch (len & 3u) {
H
hzcheng 已提交
81 82 83 84 85 86 87
    case 3:
      k1 ^= tail[2] << 16;
    case 2:
      k1 ^= tail[1] << 8;
    case 1:
      k1 ^= tail[0];
      k1 *= c1;
H
Haojun Liao 已提交
88
      k1 = ROTL32(k1, 15u);
H
hzcheng 已提交
89 90 91 92 93 94 95 96
      k1 *= c2;
      h1 ^= k1;
  };

  h1 ^= len;

  FMIX32(h1);

H
Haojun Liao 已提交
97
  return h1;
H
hzcheng 已提交
98
}
99

G
Ganlin Zhao 已提交
100 101
uint64_t MurmurHash3_64(const char *key, uint32_t len) {
  const uint64_t m = 0x87c37b91114253d5;
H
Hongze Cheng 已提交
102 103 104
  const int      r = 47;
  uint32_t       seed = 0x12345678;
  uint64_t       h = seed ^ (len * m);
G
Ganlin Zhao 已提交
105
  const uint8_t *data = (const uint8_t *)key;
H
Hongze Cheng 已提交
106
  const uint8_t *end = data + (len - (len & 7));
G
Ganlin Zhao 已提交
107

H
Hongze Cheng 已提交
108 109
  while (data != end) {
    uint64_t k = *((uint64_t *)data);
G
Ganlin Zhao 已提交
110 111 112 113 114 115 116 117 118

    k *= m;
    k ^= k >> r;
    k *= m;
    h ^= k;
    h *= m;
    data += 8;
  }

H
Hongze Cheng 已提交
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
  switch (len & 7) {
    case 7:
      h ^= (uint64_t)data[6] << 48; /* fall-thru */
    case 6:
      h ^= (uint64_t)data[5] << 40; /* fall-thru */
    case 5:
      h ^= (uint64_t)data[4] << 32; /* fall-thru */
    case 4:
      h ^= (uint64_t)data[3] << 24; /* fall-thru */
    case 3:
      h ^= (uint64_t)data[2] << 16; /* fall-thru */
    case 2:
      h ^= (uint64_t)data[1] << 8; /* fall-thru */
    case 1:
      h ^= (uint64_t)data[0];
G
Ganlin Zhao 已提交
134 135 136 137 138 139 140 141 142
      h *= m; /* fall-thru */
  };

  h ^= h >> r;
  h *= m;
  h ^= h >> r;
  return h;
}

143
uint32_t taosIntHash_32(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint32_t *)key; }
H
hjxilinx 已提交
144 145
uint32_t taosIntHash_16(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint16_t *)key; }
uint32_t taosIntHash_8(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint8_t *)key; }
146
uint32_t taosFloatHash(const char *key, uint32_t UNUSED_PARAM(len)) {
G
Ganlin Zhao 已提交
147
  float f = GET_FLOAT_VAL(key);
148 149 150
  if (isnan(f)) {
    return 0x7fc00000;
  }
G
Ganlin Zhao 已提交
151

152
  if (FLT_EQUAL(f, 0.0)) {
153
    return 0;
G
Ganlin Zhao 已提交
154
  }
H
Hongze Cheng 已提交
155 156 157
  if (fabs(f) < FLT_MAX / BASE - DLT) {
    int32_t t = (int32_t)(round(BASE * (f + DLT)));
    return (uint32_t)t;
Y
yihaoDeng 已提交
158
  } else {
H
Hongze Cheng 已提交
159
    return 0x7fc00000;
160
  }
161 162
}
uint32_t taosDoubleHash(const char *key, uint32_t UNUSED_PARAM(len)) {
G
Ganlin Zhao 已提交
163
  double f = GET_DOUBLE_VAL(key);
164 165 166
  if (isnan(f)) {
    return 0x7fc00000;
  }
167 168

  if (FLT_EQUAL(f, 0.0)) {
169
    return 0;
G
Ganlin Zhao 已提交
170
  }
H
Hongze Cheng 已提交
171 172 173
  if (fabs(f) < DBL_MAX / BASE - DLT) {
    int32_t t = (int32_t)(round(BASE * (f + DLT)));
    return (uint32_t)t;
Y
yihaoDeng 已提交
174
  } else {
H
Hongze Cheng 已提交
175
    return 0x7fc00000;
G
Ganlin Zhao 已提交
176
  }
177
}
178 179 180 181 182
uint32_t taosIntHash_64(const char *key, uint32_t UNUSED_PARAM(len)) {
  uint64_t val = *(uint64_t *)key;

  uint64_t hash = val >> 16U;
  hash += (val & 0xFFFFU);
G
Ganlin Zhao 已提交
183

S
TD-1037  
Shengliang Guan 已提交
184
  return (uint32_t)hash;
185 186 187 188
}

_hash_fn_t taosGetDefaultHashFunction(int32_t type) {
  _hash_fn_t fn = NULL;
H
Hongze Cheng 已提交
189
  switch (type) {
H
hjxilinx 已提交
190
    case TSDB_DATA_TYPE_TIMESTAMP:
W
wpan 已提交
191
    case TSDB_DATA_TYPE_UBIGINT:
G
Ganlin Zhao 已提交
192
    case TSDB_DATA_TYPE_BIGINT:
W
wpan 已提交
193 194
      fn = taosIntHash_64;
      break;
G
Ganlin Zhao 已提交
195 196
    case TSDB_DATA_TYPE_BINARY:
    case TSDB_DATA_TYPE_NCHAR:
197
      fn = MurmurHash3_32;
W
wpan 已提交
198 199
      break;
    case TSDB_DATA_TYPE_UINT:
G
Ganlin Zhao 已提交
200 201
    case TSDB_DATA_TYPE_INT:
      fn = taosIntHash_32;
W
wpan 已提交
202
      break;
G
Ganlin Zhao 已提交
203 204 205
    case TSDB_DATA_TYPE_SMALLINT:
    case TSDB_DATA_TYPE_USMALLINT:
      fn = taosIntHash_16;
W
wpan 已提交
206 207 208
      break;
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_UTINYINT:
G
Ganlin Zhao 已提交
209 210 211 212 213
    case TSDB_DATA_TYPE_TINYINT:
      fn = taosIntHash_8;
      break;
    case TSDB_DATA_TYPE_FLOAT:
      fn = taosFloatHash;
W
wpan 已提交
214
      break;
G
Ganlin Zhao 已提交
215 216 217 218
    case TSDB_DATA_TYPE_DOUBLE:
      fn = taosDoubleHash;
      break;
    default:
W
wpan 已提交
219 220
      fn = taosIntHash_32;
      break;
221
  }
G
Ganlin Zhao 已提交
222

223
  return fn;
224 225
}

D
dapan1121 已提交
226 227 228 229 230 231 232
int32_t taosFloatEqual(const void *a, const void *b, size_t UNUSED_PARAM(sz)) {
  return getComparFunc(TSDB_DATA_TYPE_FLOAT, -1)(a, b);
}

int32_t taosDoubleEqual(const void *a, const void *b, size_t UNUSED_PARAM(sz)) {
  return getComparFunc(TSDB_DATA_TYPE_DOUBLE, -1)(a, b);
}
233 234 235

_equal_fn_t taosGetDefaultEqualFunction(int32_t type) {
  _equal_fn_t fn = NULL;
D
dapan1121 已提交
236
  switch (type) {
H
Hongze Cheng 已提交
237 238 239 240 241 242 243 244 245
    case TSDB_DATA_TYPE_FLOAT:
      fn = taosFloatEqual;
      break;
    case TSDB_DATA_TYPE_DOUBLE:
      fn = taosDoubleEqual;
      break;
    default:
      fn = memcmp;
      break;
D
dapan1121 已提交
246
  }
247 248
  return fn;
}