indexFstUtil.c 4.3 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
dengyihao's avatar
dengyihao 已提交
15 16
#include "indexFstUtil.h"
#include "indexFstCommon.h"
dengyihao's avatar
dengyihao 已提交
17

18 19
// A sentinel value used to indicate an empty final state
const CompiledAddr EMPTY_ADDRESS = 0;
dengyihao's avatar
dengyihao 已提交
20
/// A sentinel value used to indicate an invalid state.
21
const CompiledAddr NONE_ADDRESS = 1;
dengyihao's avatar
dengyihao 已提交
22 23

// This version number is written to every finite state transducer created by
dengyihao's avatar
dengyihao 已提交
24
// this version. When a finite state transducer is read, its version number is
dengyihao's avatar
dengyihao 已提交
25
// checked against this value.
26
const uint64_t VERSION = 3;
dengyihao's avatar
dengyihao 已提交
27

28 29
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
dengyihao's avatar
dengyihao 已提交
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
const uint64_t TRANS_INDEX_THRESHOLD = 32;

uint8_t packSize(uint64_t n) {
  if (n < (1u << 8)) {
    return 1;
  } else if (n < (1u << 16)) {
    return 2;
  } else if (n < (1u << 24)) {
    return 3;
  } else if (n < ((uint64_t)(1) << 32)) {
    return 4;
  } else if (n < ((uint64_t)(1) << 40)) {
    return 5;
  } else if (n < ((uint64_t)(1) << 48)) {
    return 6;
  } else if (n < ((uint64_t)(1) << 56)) {
    return 7;
  } else {
    return 8;
  }
}

dengyihao's avatar
dengyihao 已提交
52
uint64_t unpackUint64(uint8_t* ch, uint8_t sz) {
dengyihao's avatar
dengyihao 已提交
53
  uint64_t n = 0;
dengyihao's avatar
dengyihao 已提交
54 55 56
  for (uint8_t i = 0; i < sz; i++) {
    n = n | (ch[i] << (8 * i));
  }
57
  return n;
dengyihao's avatar
dengyihao 已提交
58 59 60
}
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) {
  if (transAddr == EMPTY_ADDRESS) {
61
    return packSize(EMPTY_ADDRESS);
dengyihao's avatar
dengyihao 已提交
62 63
  } else {
    return packSize(nodeAddr - transAddr);
64 65
  }
}
dengyihao's avatar
dengyihao 已提交
66
CompiledAddr unpackDelta(char* data, uint64_t len, uint64_t nodeAddr) {
67
  uint64_t delta = unpackUint64(data, len);
dengyihao's avatar
dengyihao 已提交
68 69 70 71 72 73 74 75 76
  // delta_add = u64_to_usize
  if (delta == EMPTY_ADDRESS) {
    return EMPTY_ADDRESS;
  } else {
    return nodeAddr - delta;
  }
}

// fst slice func
dengyihao's avatar
dengyihao 已提交
77

dengyihao's avatar
dengyihao 已提交
78
FstSlice fstSliceCreate(uint8_t* data, uint64_t len) {
wafwerar's avatar
wafwerar 已提交
79
  FstString* str = (FstString*)taosMemoryMalloc(sizeof(FstString));
80 81
  str->ref = 1;
  str->len = len;
wafwerar's avatar
wafwerar 已提交
82
  str->data = taosMemoryMalloc(len * sizeof(uint8_t));
dengyihao's avatar
dengyihao 已提交
83 84 85 86

  if (data != NULL) {
    memcpy(str->data, data, len);
  }
87

dengyihao's avatar
dengyihao 已提交
88 89
  FstSlice s = {.str = str, .start = 0, .end = len - 1};
  return s;
90
}
dengyihao's avatar
dengyihao 已提交
91
// just shallow copy
dengyihao's avatar
dengyihao 已提交
92 93
FstSlice fstSliceCopy(FstSlice* s, int32_t start, int32_t end) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
94
  atomic_add_fetch_32(&str->ref, 1);
dengyihao's avatar
dengyihao 已提交
95 96

  FstSlice t = {.str = str, .start = start + s->start, .end = end + s->start};
dengyihao's avatar
dengyihao 已提交
97 98
  return t;
}
dengyihao's avatar
dengyihao 已提交
99
FstSlice fstSliceDeepCopy(FstSlice* s, int32_t start, int32_t end) {
dengyihao's avatar
dengyihao 已提交
100 101 102
  int32_t tlen = end - start + 1;
  int32_t slen;

dengyihao's avatar
dengyihao 已提交
103
  uint8_t* data = fstSliceData(s, &slen);
dengyihao's avatar
dengyihao 已提交
104
  assert(tlen <= slen);
dengyihao's avatar
dengyihao 已提交
105

wafwerar's avatar
wafwerar 已提交
106
  uint8_t* buf = taosMemoryMalloc(sizeof(uint8_t) * tlen);
dengyihao's avatar
dengyihao 已提交
107
  memcpy(buf, data + start, tlen);
108

wafwerar's avatar
wafwerar 已提交
109
  FstString* str = taosMemoryMalloc(sizeof(FstString));
dengyihao's avatar
dengyihao 已提交
110
  str->data = buf;
111 112
  str->len = tlen;
  str->ref = 1;
dengyihao's avatar
dengyihao 已提交
113 114

  FstSlice ans;
115 116 117 118
  ans.str = str;
  ans.start = 0;
  ans.end = tlen - 1;
  return ans;
dengyihao's avatar
dengyihao 已提交
119
}
dengyihao's avatar
dengyihao 已提交
120
bool fstSliceIsEmpty(FstSlice* s) { return s->str == NULL || s->str->len == 0 || s->start < 0 || s->end < 0; }
dengyihao's avatar
dengyihao 已提交
121

dengyihao's avatar
dengyihao 已提交
122 123
uint8_t* fstSliceData(FstSlice* s, int32_t* size) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
124 125 126
  if (size != NULL) {
    *size = s->end - s->start + 1;
  }
127
  return str->data + s->start;
dengyihao's avatar
dengyihao 已提交
128
}
dengyihao's avatar
dengyihao 已提交
129 130
void fstSliceDestroy(FstSlice* s) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
131 132 133

  int32_t ref = atomic_sub_fetch_32(&str->ref, 1);
  if (ref == 0) {
wafwerar's avatar
wafwerar 已提交
134 135
    taosMemoryFree(str->data);
    taosMemoryFree(str);
136
    s->str = NULL;
dengyihao's avatar
dengyihao 已提交
137
  }
dengyihao's avatar
dengyihao 已提交
138 139
}

dengyihao's avatar
dengyihao 已提交
140
int fstSliceCompare(FstSlice* a, FstSlice* b) {
141
  int32_t  alen, blen;
dengyihao's avatar
dengyihao 已提交
142 143
  uint8_t* aBuf = fstSliceData(a, &alen);
  uint8_t* bBuf = fstSliceData(b, &blen);
dengyihao's avatar
dengyihao 已提交
144 145 146 147 148

  uint32_t i, j;
  for (i = 0, j = 0; i < alen && j < blen; i++, j++) {
    uint8_t x = aBuf[i];
    uint8_t y = bBuf[j];
149 150 151 152 153 154 155 156 157 158 159 160 161 162
    if (x == y) {
      continue;
    } else if (x < y) {
      return -1;
    } else {
      return 1;
    };
  }
  if (i < alen) {
    return 1;
  } else if (j < blen) {
    return -1;
  } else {
    return 0;
dengyihao's avatar
dengyihao 已提交
163
  }
164
}