indexFstUtil.c 4.6 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
dengyihao's avatar
dengyihao 已提交
15 16
#include "indexFstUtil.h"
#include "indexFstCommon.h"
dengyihao's avatar
dengyihao 已提交
17

18 19
// A sentinel value used to indicate an empty final state
const CompiledAddr EMPTY_ADDRESS = 0;
dengyihao's avatar
dengyihao 已提交
20
/// A sentinel value used to indicate an invalid state.
21
const CompiledAddr NONE_ADDRESS = 1;
dengyihao's avatar
dengyihao 已提交
22 23 24 25

// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
26 27 28
const uint64_t VERSION = 3;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
dengyihao's avatar
dengyihao 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

const uint64_t TRANS_INDEX_THRESHOLD = 32;

uint8_t packSize(uint64_t n) {
  if (n < (1u << 8)) {
    return 1;
  } else if (n < (1u << 16)) {
    return 2;
  } else if (n < (1u << 24)) {
    return 3;
  } else if (n < ((uint64_t)(1) << 32)) {
    return 4;
  } else if (n < ((uint64_t)(1) << 40)) {
    return 5;
  } else if (n < ((uint64_t)(1) << 48)) {
    return 6;
  } else if (n < ((uint64_t)(1) << 56)) {
    return 7;
  } else {
    return 8;
  }
}

dengyihao's avatar
dengyihao 已提交
52
uint64_t unpackUint64(uint8_t* ch, uint8_t sz) {
dengyihao's avatar
dengyihao 已提交
53
  uint64_t n = 0;
dengyihao's avatar
dengyihao 已提交
54
  for (uint8_t i = 0; i < sz; i++) {
dengyihao's avatar
dengyihao 已提交
55
    //
dengyihao's avatar
dengyihao 已提交
56 57
    n = n | (ch[i] << (8 * i));
  }
58
  return n;
dengyihao's avatar
dengyihao 已提交
59 60 61
}
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) {
  if (transAddr == EMPTY_ADDRESS) {
62
    return packSize(EMPTY_ADDRESS);
dengyihao's avatar
dengyihao 已提交
63 64
  } else {
    return packSize(nodeAddr - transAddr);
65 66
  }
}
dengyihao's avatar
dengyihao 已提交
67
CompiledAddr unpackDelta(char* data, uint64_t len, uint64_t nodeAddr) {
68
  uint64_t delta = unpackUint64(data, len);
dengyihao's avatar
dengyihao 已提交
69 70 71 72 73 74 75 76 77
  // delta_add = u64_to_usize
  if (delta == EMPTY_ADDRESS) {
    return EMPTY_ADDRESS;
  } else {
    return nodeAddr - delta;
  }
}

// fst slice func
dengyihao's avatar
dengyihao 已提交
78 79
//

dengyihao's avatar
dengyihao 已提交
80
FstSlice fstSliceCreate(uint8_t* data, uint64_t len) {
wafwerar's avatar
wafwerar 已提交
81
  FstString* str = (FstString*)taosMemoryMalloc(sizeof(FstString));
82 83
  str->ref = 1;
  str->len = len;
wafwerar's avatar
wafwerar 已提交
84
  str->data = taosMemoryMalloc(len * sizeof(uint8_t));
dengyihao's avatar
dengyihao 已提交
85
  memcpy(str->data, data, len);
86

dengyihao's avatar
dengyihao 已提交
87 88
  FstSlice s = {.str = str, .start = 0, .end = len - 1};
  return s;
89
}
dengyihao's avatar
dengyihao 已提交
90
// just shallow copy
dengyihao's avatar
dengyihao 已提交
91 92
FstSlice fstSliceCopy(FstSlice* s, int32_t start, int32_t end) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
93 94 95
  str->ref++;

  FstSlice t = {.str = str, .start = start + s->start, .end = end + s->start};
dengyihao's avatar
dengyihao 已提交
96 97
  return t;
}
dengyihao's avatar
dengyihao 已提交
98
FstSlice fstSliceDeepCopy(FstSlice* s, int32_t start, int32_t end) {
99 100
  int32_t  tlen = end - start + 1;
  int32_t  slen;
dengyihao's avatar
dengyihao 已提交
101
  uint8_t* data = fstSliceData(s, &slen);
dengyihao's avatar
dengyihao 已提交
102
  assert(tlen <= slen);
dengyihao's avatar
dengyihao 已提交
103

wafwerar's avatar
wafwerar 已提交
104
  uint8_t* buf = taosMemoryMalloc(sizeof(uint8_t) * tlen);
dengyihao's avatar
dengyihao 已提交
105
  memcpy(buf, data + start, tlen);
106

wafwerar's avatar
wafwerar 已提交
107
  FstString* str = taosMemoryMalloc(sizeof(FstString));
dengyihao's avatar
dengyihao 已提交
108
  str->data = buf;
109 110
  str->len = tlen;
  str->ref = 1;
dengyihao's avatar
dengyihao 已提交
111 112

  FstSlice ans;
113 114 115 116
  ans.str = str;
  ans.start = 0;
  ans.end = tlen - 1;
  return ans;
dengyihao's avatar
dengyihao 已提交
117
}
dengyihao's avatar
dengyihao 已提交
118
bool fstSliceIsEmpty(FstSlice* s) { return s->str == NULL || s->str->len == 0 || s->start < 0 || s->end < 0; }
dengyihao's avatar
dengyihao 已提交
119

dengyihao's avatar
dengyihao 已提交
120 121
uint8_t* fstSliceData(FstSlice* s, int32_t* size) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
122 123 124
  if (size != NULL) {
    *size = s->end - s->start + 1;
  }
125
  return str->data + s->start;
dengyihao's avatar
dengyihao 已提交
126
}
dengyihao's avatar
dengyihao 已提交
127 128
void fstSliceDestroy(FstSlice* s) {
  FstString* str = s->str;
dengyihao's avatar
dengyihao 已提交
129
  str->ref--;
dengyihao's avatar
dengyihao 已提交
130
  if (str->ref == 0) {
wafwerar's avatar
wafwerar 已提交
131 132
    taosMemoryFree(str->data);
    taosMemoryFree(str);
133
    s->str = NULL;
dengyihao's avatar
dengyihao 已提交
134
  }
dengyihao's avatar
dengyihao 已提交
135 136
}

dengyihao's avatar
dengyihao 已提交
137
int fstSliceCompare(FstSlice* a, FstSlice* b) {
138
  int32_t  alen, blen;
dengyihao's avatar
dengyihao 已提交
139 140
  uint8_t* aBuf = fstSliceData(a, &alen);
  uint8_t* bBuf = fstSliceData(b, &blen);
dengyihao's avatar
dengyihao 已提交
141 142 143 144 145

  uint32_t i, j;
  for (i = 0, j = 0; i < alen && j < blen; i++, j++) {
    uint8_t x = aBuf[i];
    uint8_t y = bBuf[j];
146 147 148 149 150 151 152 153 154 155 156 157 158 159
    if (x == y) {
      continue;
    } else if (x < y) {
      return -1;
    } else {
      return 1;
    };
  }
  if (i < alen) {
    return 1;
  } else if (j < blen) {
    return -1;
  } else {
    return 0;
dengyihao's avatar
dengyihao 已提交
160
  }
161
}
dengyihao's avatar
dengyihao 已提交
162

163
// FstStack* fstStackCreate(size_t elemSize, StackFreeElem freeFn) {
wafwerar's avatar
wafwerar 已提交
164
//  FstStack *s = taosMemoryCalloc(1, sizeof(FstStack));
dengyihao's avatar
dengyihao 已提交
165
//  if (s == NULL) { return NULL; }
166 167 168
//  s->
//  s->freeFn
//
dengyihao's avatar
dengyihao 已提交
169
//}
170 171 172 173 174
// void  *fstStackPush(FstStack *s, void *elem);
// void  *fstStackTop(FstStack *s);
// size_t fstStackLen(FstStack *s);
// void  *fstStackGetAt(FstStack *s, size_t i);
// void   fstStackDestory(FstStack *);