thashutil.c 5.8 KB
Newer Older
S
TD-4088  
Shengliang Guan 已提交
1 2
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
H
hzcheng 已提交
3
 *
S
TD-4088  
Shengliang Guan 已提交
4 5 6
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
H
hzcheng 已提交
7
 *
S
TD-4088  
Shengliang Guan 已提交
8 9 10 11 12 13
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
H
hzcheng 已提交
14
 */
S
TD-4088  
Shengliang Guan 已提交
15

S
hash  
Shengliang Guan 已提交
16
#define _DEFAULT_SOURCE
S
compare  
Shengliang Guan 已提交
17
#include "tcompare.h"
H
Hongze Cheng 已提交
18
#include "thash.h"
H
Hongze Cheng 已提交
19
#include "types.h"
H
hzcheng 已提交
20

H
Haojun Liao 已提交
21
#define ROTL32(x, r) ((x) << (r) | (x) >> (32u - (r)))
H
hzcheng 已提交
22

H
Hongze Cheng 已提交
23
#define DLT  (FLT_COMPAR_TOL_FACTOR * FLT_EPSILON)
24 25
#define BASE 1000

H
hzcheng 已提交
26 27 28 29 30 31
#define FMIX32(h)      \
  do {                 \
    (h) ^= (h) >> 16;  \
    (h) *= 0x85ebca6b; \
    (h) ^= (h) >> 13;  \
    (h) *= 0xc2b2ae35; \
H
Hongze Cheng 已提交
32 33
    (h) ^= (h) >> 16;  \
  } while (0)
G
Ganlin Zhao 已提交
34

L
Liu Jicong 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
uint32_t taosFastHash(const char *key, uint32_t len) {
  uint32_t result = 0x55555555;
  for (uint32_t i = 0; i < len; i++) {
    result ^= (uint8_t)key[i];
    result = ROTL32(result, 5);
  }
  return result;
}

uint32_t taosDJB2Hash(const char *key, uint32_t len) {
  uint32_t hash = 5381;
  for (uint32_t i = 0; i < len; i++) {
    hash = ((hash << 5) + hash) + (uint8_t)key[i]; /* hash * 33 + c */
  }
  return hash;
}

H
Haojun Liao 已提交
52
uint32_t MurmurHash3_32(const char *key, uint32_t len) {
H
hzcheng 已提交
53
  const uint8_t *data = (const uint8_t *)key;
H
Hongze Cheng 已提交
54
  const int32_t  nblocks = len >> 2u;
H
hzcheng 已提交
55

H
Haojun Liao 已提交
56
  uint32_t h1 = 0x12345678;
H
hzcheng 已提交
57 58 59 60 61 62

  const uint32_t c1 = 0xcc9e2d51;
  const uint32_t c2 = 0x1b873593;

  const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);

S
hash  
Shengliang Guan 已提交
63
  for (int32_t i = -nblocks; i; i++) {
H
hzcheng 已提交
64 65 66
    uint32_t k1 = blocks[i];

    k1 *= c1;
H
Haojun Liao 已提交
67
    k1 = ROTL32(k1, 15u);
H
hzcheng 已提交
68 69 70
    k1 *= c2;

    h1 ^= k1;
H
Haojun Liao 已提交
71
    h1 = ROTL32(h1, 13u);
H
hzcheng 已提交
72 73 74 75 76 77 78
    h1 = h1 * 5 + 0xe6546b64;
  }

  const uint8_t *tail = (data + nblocks * 4);

  uint32_t k1 = 0;

H
Haojun Liao 已提交
79
  switch (len & 3u) {
H
hzcheng 已提交
80 81 82 83 84 85 86
    case 3:
      k1 ^= tail[2] << 16;
    case 2:
      k1 ^= tail[1] << 8;
    case 1:
      k1 ^= tail[0];
      k1 *= c1;
H
Haojun Liao 已提交
87
      k1 = ROTL32(k1, 15u);
H
hzcheng 已提交
88 89 90 91 92 93 94 95
      k1 *= c2;
      h1 ^= k1;
  };

  h1 ^= len;

  FMIX32(h1);

H
Haojun Liao 已提交
96
  return h1;
H
hzcheng 已提交
97
}
98

G
Ganlin Zhao 已提交
99 100
uint64_t MurmurHash3_64(const char *key, uint32_t len) {
  const uint64_t m = 0x87c37b91114253d5;
H
Hongze Cheng 已提交
101 102 103
  const int      r = 47;
  uint32_t       seed = 0x12345678;
  uint64_t       h = seed ^ (len * m);
G
Ganlin Zhao 已提交
104
  const uint8_t *data = (const uint8_t *)key;
H
Hongze Cheng 已提交
105
  const uint8_t *end = data + (len - (len & 7));
G
Ganlin Zhao 已提交
106

H
Hongze Cheng 已提交
107 108
  while (data != end) {
    uint64_t k = *((uint64_t *)data);
G
Ganlin Zhao 已提交
109 110 111 112 113 114 115 116 117

    k *= m;
    k ^= k >> r;
    k *= m;
    h ^= k;
    h *= m;
    data += 8;
  }

H
Hongze Cheng 已提交
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
  switch (len & 7) {
    case 7:
      h ^= (uint64_t)data[6] << 48; /* fall-thru */
    case 6:
      h ^= (uint64_t)data[5] << 40; /* fall-thru */
    case 5:
      h ^= (uint64_t)data[4] << 32; /* fall-thru */
    case 4:
      h ^= (uint64_t)data[3] << 24; /* fall-thru */
    case 3:
      h ^= (uint64_t)data[2] << 16; /* fall-thru */
    case 2:
      h ^= (uint64_t)data[1] << 8; /* fall-thru */
    case 1:
      h ^= (uint64_t)data[0];
G
Ganlin Zhao 已提交
133 134 135 136 137 138 139 140 141
      h *= m; /* fall-thru */
  };

  h ^= h >> r;
  h *= m;
  h ^= h >> r;
  return h;
}

142
uint32_t taosIntHash_32(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint32_t *)key; }
H
hjxilinx 已提交
143 144
uint32_t taosIntHash_16(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint16_t *)key; }
uint32_t taosIntHash_8(const char *key, uint32_t UNUSED_PARAM(len)) { return *(uint8_t *)key; }
145
uint32_t taosFloatHash(const char *key, uint32_t UNUSED_PARAM(len)) {
G
Ganlin Zhao 已提交
146
  float f = GET_FLOAT_VAL(key);
147 148 149
  if (isnan(f)) {
    return 0x7fc00000;
  }
G
Ganlin Zhao 已提交
150

151
  if (FLT_EQUAL(f, 0.0)) {
152
    return 0;
G
Ganlin Zhao 已提交
153
  }
H
Hongze Cheng 已提交
154 155 156
  if (fabs(f) < FLT_MAX / BASE - DLT) {
    int32_t t = (int32_t)(round(BASE * (f + DLT)));
    return (uint32_t)t;
Y
yihaoDeng 已提交
157
  } else {
H
Hongze Cheng 已提交
158
    return 0x7fc00000;
159
  }
160 161
}
uint32_t taosDoubleHash(const char *key, uint32_t UNUSED_PARAM(len)) {
G
Ganlin Zhao 已提交
162
  double f = GET_DOUBLE_VAL(key);
163 164 165
  if (isnan(f)) {
    return 0x7fc00000;
  }
166 167

  if (FLT_EQUAL(f, 0.0)) {
168
    return 0;
G
Ganlin Zhao 已提交
169
  }
H
Hongze Cheng 已提交
170 171 172
  if (fabs(f) < DBL_MAX / BASE - DLT) {
    int32_t t = (int32_t)(round(BASE * (f + DLT)));
    return (uint32_t)t;
Y
yihaoDeng 已提交
173
  } else {
H
Hongze Cheng 已提交
174
    return 0x7fc00000;
G
Ganlin Zhao 已提交
175
  }
176
}
177 178 179 180 181
uint32_t taosIntHash_64(const char *key, uint32_t UNUSED_PARAM(len)) {
  uint64_t val = *(uint64_t *)key;

  uint64_t hash = val >> 16U;
  hash += (val & 0xFFFFU);
G
Ganlin Zhao 已提交
182

S
TD-1037  
Shengliang Guan 已提交
183
  return (uint32_t)hash;
184 185 186 187
}

_hash_fn_t taosGetDefaultHashFunction(int32_t type) {
  _hash_fn_t fn = NULL;
H
Hongze Cheng 已提交
188
  switch (type) {
H
hjxilinx 已提交
189
    case TSDB_DATA_TYPE_TIMESTAMP:
W
wpan 已提交
190
    case TSDB_DATA_TYPE_UBIGINT:
G
Ganlin Zhao 已提交
191
    case TSDB_DATA_TYPE_BIGINT:
W
wpan 已提交
192 193
      fn = taosIntHash_64;
      break;
G
Ganlin Zhao 已提交
194
    case TSDB_DATA_TYPE_BINARY:
W
wpan 已提交
195 196
      fn = MurmurHash3_32;
      break;
G
Ganlin Zhao 已提交
197
    case TSDB_DATA_TYPE_NCHAR:
W
wpan 已提交
198 199 200
      fn = MurmurHash3_32;
      break;
    case TSDB_DATA_TYPE_UINT:
G
Ganlin Zhao 已提交
201 202
    case TSDB_DATA_TYPE_INT:
      fn = taosIntHash_32;
W
wpan 已提交
203
      break;
G
Ganlin Zhao 已提交
204 205 206
    case TSDB_DATA_TYPE_SMALLINT:
    case TSDB_DATA_TYPE_USMALLINT:
      fn = taosIntHash_16;
W
wpan 已提交
207 208 209
      break;
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_UTINYINT:
G
Ganlin Zhao 已提交
210 211 212 213 214
    case TSDB_DATA_TYPE_TINYINT:
      fn = taosIntHash_8;
      break;
    case TSDB_DATA_TYPE_FLOAT:
      fn = taosFloatHash;
W
wpan 已提交
215
      break;
G
Ganlin Zhao 已提交
216 217 218 219
    case TSDB_DATA_TYPE_DOUBLE:
      fn = taosDoubleHash;
      break;
    default:
W
wpan 已提交
220 221
      fn = taosIntHash_32;
      break;
222
  }
G
Ganlin Zhao 已提交
223

224
  return fn;
225 226
}

D
dapan1121 已提交
227 228 229 230 231 232 233
int32_t taosFloatEqual(const void *a, const void *b, size_t UNUSED_PARAM(sz)) {
  return getComparFunc(TSDB_DATA_TYPE_FLOAT, -1)(a, b);
}

int32_t taosDoubleEqual(const void *a, const void *b, size_t UNUSED_PARAM(sz)) {
  return getComparFunc(TSDB_DATA_TYPE_DOUBLE, -1)(a, b);
}
234 235 236

_equal_fn_t taosGetDefaultEqualFunction(int32_t type) {
  _equal_fn_t fn = NULL;
D
dapan1121 已提交
237
  switch (type) {
H
Hongze Cheng 已提交
238 239 240 241 242 243 244 245 246
    case TSDB_DATA_TYPE_FLOAT:
      fn = taosFloatEqual;
      break;
    case TSDB_DATA_TYPE_DOUBLE:
      fn = taosDoubleEqual;
      break;
    default:
      fn = memcmp;
      break;
D
dapan1121 已提交
247
  }
248 249
  return fn;
}