indexFstUtil.c 4.6 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
dengyihao's avatar
dengyihao 已提交
15 16
#include "indexFstUtil.h"
#include "indexFstCommon.h"
dengyihao's avatar
dengyihao 已提交
17

18 19
// A sentinel value used to indicate an empty final state
const CompiledAddr EMPTY_ADDRESS = 0;
dengyihao's avatar
dengyihao 已提交
20
/// A sentinel value used to indicate an invalid state.
21
const CompiledAddr NONE_ADDRESS = 1;
dengyihao's avatar
dengyihao 已提交
22 23 24 25

// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
26 27 28
const uint64_t VERSION = 3;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
dengyihao's avatar
dengyihao 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

const uint64_t TRANS_INDEX_THRESHOLD = 32;

uint8_t packSize(uint64_t n) {
  if (n < (1u << 8)) {
    return 1;
  } else if (n < (1u << 16)) {
    return 2;
  } else if (n < (1u << 24)) {
    return 3;
  } else if (n < ((uint64_t)(1) << 32)) {
    return 4;
  } else if (n < ((uint64_t)(1) << 40)) {
    return 5;
  } else if (n < ((uint64_t)(1) << 48)) {
    return 6;
  } else if (n < ((uint64_t)(1) << 56)) {
    return 7;
  } else {
    return 8;
  }
}

dengyihao's avatar
dengyihao 已提交
52
uint64_t unpackUint64(uint8_t* ch, uint8_t sz) {
dengyihao's avatar
dengyihao 已提交
53
  uint64_t n = 0;
dengyihao's avatar
dengyihao 已提交
54
  for (uint8_t i = 0; i < sz; i++) {
dengyihao's avatar
dengyihao 已提交
55
    //
dengyihao's avatar
dengyihao 已提交
56 57
    n = n | (ch[i] << (8 * i));
  }
58
  return n;
dengyihao's avatar
dengyihao 已提交
59 60 61
}
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) {
  if (transAddr == EMPTY_ADDRESS) {
62
    return packSize(EMPTY_ADDRESS);
dengyihao's avatar
dengyihao 已提交
63 64
  } else {
    return packSize(nodeAddr - transAddr);
65 66
  }
}
dengyihao's avatar
dengyihao 已提交
67
CompiledAddr unpackDelta(char* data, uint64_t len, uint64_t nodeAddr) {
68
  uint64_t delta = unpackUint64(data, len);
dengyihao's avatar
dengyihao 已提交
69 70 71 72 73 74 75 76 77
  // delta_add = u64_to_usize
  if (delta == EMPTY_ADDRESS) {
    return EMPTY_ADDRESS;
  } else {
    return nodeAddr - delta;
  }
}

// fst slice func
dengyihao's avatar
dengyihao 已提交
78 79
//

dengyihao's avatar
dengyihao 已提交
80
FstSlice fstSliceCreate(uint8_t* data, uint64_t len) {
wafwerar's avatar
wafwerar 已提交
81
  FstString* str = (FstString*)taosMemoryMalloc(sizeof(FstString));
82 83
  str->ref = 1;
  str->len = len;
wafwerar's avatar
wafwerar 已提交
84
  str->data = taosMemoryMalloc(len * sizeof(uint8_t));
dengyihao's avatar
dengyihao 已提交
85 86 87 88

  if (data != NULL) {
    memcpy(str->data, data, len);
  }
89

dengyihao's avatar
dengyihao 已提交
90 91
  FstSlice s = {.str = str, .start = 0, .end = len - 1};
  return s;
92
}
dengyihao's avatar
dengyihao 已提交
93
// just shallow copy
dengyihao's avatar
dengyihao 已提交
94 95
FstSlice fstSliceCopy(FstSlice* s, int32_t start, int32_t end) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
96 97 98
  str->ref++;

  FstSlice t = {.str = str, .start = start + s->start, .end = end + s->start};
dengyihao's avatar
dengyihao 已提交
99 100
  return t;
}
dengyihao's avatar
dengyihao 已提交
101
FstSlice fstSliceDeepCopy(FstSlice* s, int32_t start, int32_t end) {
102 103
  int32_t  tlen = end - start + 1;
  int32_t  slen;
dengyihao's avatar
dengyihao 已提交
104
  uint8_t* data = fstSliceData(s, &slen);
dengyihao's avatar
dengyihao 已提交
105
  assert(tlen <= slen);
dengyihao's avatar
dengyihao 已提交
106

wafwerar's avatar
wafwerar 已提交
107
  uint8_t* buf = taosMemoryMalloc(sizeof(uint8_t) * tlen);
dengyihao's avatar
dengyihao 已提交
108
  memcpy(buf, data + start, tlen);
109

wafwerar's avatar
wafwerar 已提交
110
  FstString* str = taosMemoryMalloc(sizeof(FstString));
dengyihao's avatar
dengyihao 已提交
111
  str->data = buf;
112 113
  str->len = tlen;
  str->ref = 1;
dengyihao's avatar
dengyihao 已提交
114 115

  FstSlice ans;
116 117 118 119
  ans.str = str;
  ans.start = 0;
  ans.end = tlen - 1;
  return ans;
dengyihao's avatar
dengyihao 已提交
120
}
dengyihao's avatar
dengyihao 已提交
121
bool fstSliceIsEmpty(FstSlice* s) { return s->str == NULL || s->str->len == 0 || s->start < 0 || s->end < 0; }
dengyihao's avatar
dengyihao 已提交
122

dengyihao's avatar
dengyihao 已提交
123 124
uint8_t* fstSliceData(FstSlice* s, int32_t* size) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
125 126 127
  if (size != NULL) {
    *size = s->end - s->start + 1;
  }
128
  return str->data + s->start;
dengyihao's avatar
dengyihao 已提交
129
}
dengyihao's avatar
dengyihao 已提交
130 131
void fstSliceDestroy(FstSlice* s) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
132
  str->ref--;
dengyihao's avatar
dengyihao 已提交
133
  if (str->ref == 0) {
wafwerar's avatar
wafwerar 已提交
134 135
    taosMemoryFree(str->data);
    taosMemoryFree(str);
136
    s->str = NULL;
dengyihao's avatar
dengyihao 已提交
137
  }
dengyihao's avatar
dengyihao 已提交
138 139
}

dengyihao's avatar
dengyihao 已提交
140
int fstSliceCompare(FstSlice* a, FstSlice* b) {
141
  int32_t  alen, blen;
dengyihao's avatar
dengyihao 已提交
142 143
  uint8_t* aBuf = fstSliceData(a, &alen);
  uint8_t* bBuf = fstSliceData(b, &blen);
dengyihao's avatar
dengyihao 已提交
144 145 146 147 148

  uint32_t i, j;
  for (i = 0, j = 0; i < alen && j < blen; i++, j++) {
    uint8_t x = aBuf[i];
    uint8_t y = bBuf[j];
149 150 151 152 153 154 155 156 157 158 159 160 161 162
    if (x == y) {
      continue;
    } else if (x < y) {
      return -1;
    } else {
      return 1;
    };
  }
  if (i < alen) {
    return 1;
  } else if (j < blen) {
    return -1;
  } else {
    return 0;
dengyihao's avatar
dengyihao 已提交
163
  }
164
}
dengyihao's avatar
dengyihao 已提交
165

166
// FstStack* fstStackCreate(size_t elemSize, StackFreeElem freeFn) {
wafwerar's avatar
wafwerar 已提交
167
//  FstStack *s = taosMemoryCalloc(1, sizeof(FstStack));
dengyihao's avatar
dengyihao 已提交
168
//  if (s == NULL) { return NULL; }
169 170 171
//  s->
//  s->freeFn
//
dengyihao's avatar
dengyihao 已提交
172
//}
173 174 175 176 177
// void  *fstStackPush(FstStack *s, void *elem);
// void  *fstStackTop(FstStack *s);
// size_t fstStackLen(FstStack *s);
// void  *fstStackGetAt(FstStack *s, size_t i);
// void   fstStackDestory(FstStack *);