indexFstUtil.c 4.3 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
dengyihao's avatar
dengyihao 已提交
15 16
#include "indexFstUtil.h"
#include "indexFstCommon.h"
dengyihao's avatar
dengyihao 已提交
17

18 19
// A sentinel value used to indicate an empty final state
const CompiledAddr EMPTY_ADDRESS = 0;
dengyihao's avatar
dengyihao 已提交
20
/// A sentinel value used to indicate an invalid state.
21
const CompiledAddr NONE_ADDRESS = 1;
dengyihao's avatar
dengyihao 已提交
22 23 24 25

// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
26 27 28
const uint64_t VERSION = 3;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
dengyihao's avatar
dengyihao 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

const uint64_t TRANS_INDEX_THRESHOLD = 32;

uint8_t packSize(uint64_t n) {
  if (n < (1u << 8)) {
    return 1;
  } else if (n < (1u << 16)) {
    return 2;
  } else if (n < (1u << 24)) {
    return 3;
  } else if (n < ((uint64_t)(1) << 32)) {
    return 4;
  } else if (n < ((uint64_t)(1) << 40)) {
    return 5;
  } else if (n < ((uint64_t)(1) << 48)) {
    return 6;
  } else if (n < ((uint64_t)(1) << 56)) {
    return 7;
  } else {
    return 8;
  }
}

dengyihao's avatar
dengyihao 已提交
52
uint64_t unpackUint64(uint8_t* ch, uint8_t sz) {
dengyihao's avatar
dengyihao 已提交
53
  uint64_t n = 0;
dengyihao's avatar
dengyihao 已提交
54
  for (uint8_t i = 0; i < sz; i++) {
dengyihao's avatar
dengyihao 已提交
55
    //
dengyihao's avatar
dengyihao 已提交
56 57
    n = n | (ch[i] << (8 * i));
  }
58
  return n;
dengyihao's avatar
dengyihao 已提交
59 60 61
}
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) {
  if (transAddr == EMPTY_ADDRESS) {
62
    return packSize(EMPTY_ADDRESS);
dengyihao's avatar
dengyihao 已提交
63 64
  } else {
    return packSize(nodeAddr - transAddr);
65 66
  }
}
dengyihao's avatar
dengyihao 已提交
67
CompiledAddr unpackDelta(char* data, uint64_t len, uint64_t nodeAddr) {
68
  uint64_t delta = unpackUint64(data, len);
dengyihao's avatar
dengyihao 已提交
69 70 71 72 73 74 75 76 77
  // delta_add = u64_to_usize
  if (delta == EMPTY_ADDRESS) {
    return EMPTY_ADDRESS;
  } else {
    return nodeAddr - delta;
  }
}

// fst slice func
dengyihao's avatar
dengyihao 已提交
78

dengyihao's avatar
dengyihao 已提交
79
FstSlice fstSliceCreate(uint8_t* data, uint64_t len) {
wafwerar's avatar
wafwerar 已提交
80
  FstString* str = (FstString*)taosMemoryMalloc(sizeof(FstString));
81 82
  str->ref = 1;
  str->len = len;
wafwerar's avatar
wafwerar 已提交
83
  str->data = taosMemoryMalloc(len * sizeof(uint8_t));
dengyihao's avatar
dengyihao 已提交
84 85 86 87

  if (data != NULL) {
    memcpy(str->data, data, len);
  }
88

dengyihao's avatar
dengyihao 已提交
89 90
  FstSlice s = {.str = str, .start = 0, .end = len - 1};
  return s;
91
}
dengyihao's avatar
dengyihao 已提交
92
// just shallow copy
dengyihao's avatar
dengyihao 已提交
93 94
FstSlice fstSliceCopy(FstSlice* s, int32_t start, int32_t end) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
95
  atomic_add_fetch_32(&str->ref, 1);
dengyihao's avatar
dengyihao 已提交
96 97

  FstSlice t = {.str = str, .start = start + s->start, .end = end + s->start};
dengyihao's avatar
dengyihao 已提交
98 99
  return t;
}
dengyihao's avatar
dengyihao 已提交
100
FstSlice fstSliceDeepCopy(FstSlice* s, int32_t start, int32_t end) {
dengyihao's avatar
dengyihao 已提交
101 102 103
  int32_t tlen = end - start + 1;
  int32_t slen;

dengyihao's avatar
dengyihao 已提交
104
  uint8_t* data = fstSliceData(s, &slen);
dengyihao's avatar
dengyihao 已提交
105
  assert(tlen <= slen);
dengyihao's avatar
dengyihao 已提交
106

wafwerar's avatar
wafwerar 已提交
107
  uint8_t* buf = taosMemoryMalloc(sizeof(uint8_t) * tlen);
dengyihao's avatar
dengyihao 已提交
108
  memcpy(buf, data + start, tlen);
109

wafwerar's avatar
wafwerar 已提交
110
  FstString* str = taosMemoryMalloc(sizeof(FstString));
dengyihao's avatar
dengyihao 已提交
111
  str->data = buf;
112 113
  str->len = tlen;
  str->ref = 1;
dengyihao's avatar
dengyihao 已提交
114 115

  FstSlice ans;
116 117 118 119
  ans.str = str;
  ans.start = 0;
  ans.end = tlen - 1;
  return ans;
dengyihao's avatar
dengyihao 已提交
120
}
dengyihao's avatar
dengyihao 已提交
121
bool fstSliceIsEmpty(FstSlice* s) { return s->str == NULL || s->str->len == 0 || s->start < 0 || s->end < 0; }
dengyihao's avatar
dengyihao 已提交
122

dengyihao's avatar
dengyihao 已提交
123 124
uint8_t* fstSliceData(FstSlice* s, int32_t* size) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
125 126 127
  if (size != NULL) {
    *size = s->end - s->start + 1;
  }
128
  return str->data + s->start;
dengyihao's avatar
dengyihao 已提交
129
}
dengyihao's avatar
dengyihao 已提交
130 131
void fstSliceDestroy(FstSlice* s) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
132 133 134

  int32_t ref = atomic_sub_fetch_32(&str->ref, 1);
  if (ref == 0) {
wafwerar's avatar
wafwerar 已提交
135 136
    taosMemoryFree(str->data);
    taosMemoryFree(str);
137
    s->str = NULL;
dengyihao's avatar
dengyihao 已提交
138
  }
dengyihao's avatar
dengyihao 已提交
139 140
}

dengyihao's avatar
dengyihao 已提交
141
int fstSliceCompare(FstSlice* a, FstSlice* b) {
142
  int32_t  alen, blen;
dengyihao's avatar
dengyihao 已提交
143 144
  uint8_t* aBuf = fstSliceData(a, &alen);
  uint8_t* bBuf = fstSliceData(b, &blen);
dengyihao's avatar
dengyihao 已提交
145 146 147 148 149

  uint32_t i, j;
  for (i = 0, j = 0; i < alen && j < blen; i++, j++) {
    uint8_t x = aBuf[i];
    uint8_t y = bBuf[j];
150 151 152 153 154 155 156 157 158 159 160 161 162 163
    if (x == y) {
      continue;
    } else if (x < y) {
      return -1;
    } else {
      return 1;
    };
  }
  if (i < alen) {
    return 1;
  } else if (j < blen) {
    return -1;
  } else {
    return 0;
dengyihao's avatar
dengyihao 已提交
164
  }
165
}