indexFstDfa.c 6.6 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "indexFstDfa.h"
#include "thash.h"

dengyihao's avatar
dengyihao 已提交
19 20
const static uint32_t STATE_LIMIT = 1000;

dengyihao's avatar
dengyihao 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
static int dfaInstsEqual(const void *a, const void *b, size_t size) {
  SArray *ar = (SArray *)a;
  SArray *br = (SArray *)b;
  size_t  al = ar != NULL ? taosArrayGetSize(ar) : 0;
  size_t  bl = br != NULL ? taosArrayGetSize(br) : 0;
  if (al != bl) {
    return -1;
  }
  for (int i = 0; i < al; i++) {
    uint32_t v1 = *(uint32_t *)taosArrayGet(ar, i);
    uint32_t v2 = *(uint32_t *)taosArrayGet(br, i);
    if (v1 != v2) {
      return -1;
    }
  }
  return 0;
}
FstDfaBuilder *dfaBuilderCreate(SArray *insts) {
  FstDfaBuilder *builder = taosMemoryCalloc(1, sizeof(FstDfaBuilder));
  if (builder == NULL) {
    return NULL;
  }

dengyihao's avatar
dengyihao 已提交
44
  SArray *states = taosArrayInit(4, sizeof(DfaState));
dengyihao's avatar
dengyihao 已提交
45 46 47 48 49 50 51 52

  builder->dfa = dfaCreate(insts, states);
  builder->cache = taosHashInit(
      4, taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT),
      false, HASH_NO_LOCK);
  taosHashSetEqualFp(builder->cache, dfaInstsEqual);
  return builder;
}
dengyihao's avatar
dengyihao 已提交
53 54 55 56 57 58 59 60 61 62 63
void dfaBuilderDestroy(FstDfaBuilder *builder) {
  if (builder == NULL) {
    return;
  }
  void *pIter = builder->cache != NULL ? taosHashIterate(builder->cache, NULL) : NULL;
  while (pIter) {
    SArray **key = pIter;
    taosArrayDestroy(*key);
    pIter = taosHashIterate(builder->cache, pIter);
  }
  taosHashCleanup(builder->cache);
dengyihao's avatar
dengyihao 已提交
64
  taosMemoryFree(builder);
dengyihao's avatar
dengyihao 已提交
65
}
dengyihao's avatar
dengyihao 已提交
66

dengyihao's avatar
dengyihao 已提交
67
FstDfa *dfaBuilder(FstDfaBuilder *builder) {
dengyihao's avatar
dengyihao 已提交
68 69 70 71 72
  uint32_t      sz = taosArrayGetSize(builder->dfa->insts);
  FstSparseSet *cur = sparSetCreate(sz);
  FstSparseSet *nxt = sparSetCreate(sz);

  dfaAdd(builder->dfa, cur, 0);
dengyihao's avatar
dengyihao 已提交
73

dengyihao's avatar
dengyihao 已提交
74
  SArray  *states = taosArrayInit(0, sizeof(uint32_t));
dengyihao's avatar
dengyihao 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
  uint32_t result;
  if (dfaBuilderCachedState(builder, cur, &result)) {
    taosArrayPush(states, &result);
  }
  SHashObj *seen = taosHashInit(12, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
  while (taosArrayGetSize(states) != 0) {
    result = *(uint32_t *)taosArrayPop(states);
    for (int i = 0; i < 256; i++) {
      uint32_t ns, dummpy = 0;
      if (dfaBuilderRunState(builder, cur, nxt, result, i, &ns)) {
        if (taosHashGet(seen, &ns, sizeof(ns)) == NULL) {
          taosHashPut(seen, &ns, sizeof(ns), &dummpy, sizeof(dummpy));
          taosArrayPush(states, &ns);
        }
      }
      if (taosArrayGetSize(builder->dfa->states) > STATE_LIMIT) {
        // Too many state;
        //
      }
    }
  }
  taosArrayDestroy(states);
  taosHashCleanup(seen);
  return builder->dfa;
dengyihao's avatar
dengyihao 已提交
99 100
}

dengyihao's avatar
dengyihao 已提交
101 102
FstDfa *dfaBuilderBuild(FstDfaBuilder *builer) { return NULL; }

dengyihao's avatar
dengyihao 已提交
103
bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t byte,
dengyihao's avatar
dengyihao 已提交
104
                        uint32_t *result) {
dengyihao's avatar
dengyihao 已提交
105
  sparSetClear(cur);
dengyihao's avatar
dengyihao 已提交
106
  DfaState *t = taosArrayGet(builder->dfa->states, state);
dengyihao's avatar
dengyihao 已提交
107
  for (int i = 0; i < taosArrayGetSize(t->insts); i++) {
dengyihao's avatar
dengyihao 已提交
108 109 110
    int32_t ip = *(int32_t *)taosArrayGet(t->insts, i);
    bool    succ = sparSetAdd(cur, ip, NULL);
    assert(succ == true);
dengyihao's avatar
dengyihao 已提交
111 112 113 114 115 116 117 118 119 120 121 122
  }
  dfaRun(builder->dfa, cur, next, byte);

  t = taosArrayGet(builder->dfa->states, state);

  uint32_t nxtState;
  if (dfaBuilderCachedState(builder, next, &nxtState)) {
    t->next[byte] = nxtState;
    *result = nxtState;
    return true;
  }
  return false;
dengyihao's avatar
dengyihao 已提交
123 124 125
}

bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result) {
dengyihao's avatar
dengyihao 已提交
126 127 128 129
  SArray *tinsts = taosArrayInit(4, sizeof(uint32_t));
  bool    isMatch = false;

  for (int i = 0; i < sparSetLen(set); i++) {
dengyihao's avatar
dengyihao 已提交
130 131 132
    int32_t ip;

    if (false == sparSetGet(set, i, &ip)) continue;
dengyihao's avatar
dengyihao 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151

    Inst *inst = taosArrayGet(builder->dfa->insts, ip);
    if (inst->ty == JUMP || inst->ty == SPLIT) {
      continue;
    } else if (inst->ty == RANGE) {
      taosArrayPush(tinsts, &ip);
    } else if (inst->ty == MATCH) {
      isMatch = true;
      taosArrayPush(tinsts, &ip);
    }
  }
  if (taosArrayGetSize(tinsts) == 0) {
    return false;
  }
  uint32_t *v = taosHashGet(builder->cache, &tinsts, sizeof(POINTER_BYTES));
  if (v != NULL) {
    *result = *v;
    taosArrayDestroy(tinsts);
  } else {
dengyihao's avatar
dengyihao 已提交
152
    DfaState st;
dengyihao's avatar
dengyihao 已提交
153 154 155 156 157 158 159
    st.insts = tinsts;
    st.isMatch = isMatch;
    taosArrayPush(builder->dfa->states, &st);
    int32_t sz = taosArrayGetSize(builder->dfa->states) - 1;
    taosHashPut(builder->cache, &tinsts, sizeof(POINTER_BYTES), &sz, sizeof(sz));
    *result = sz;
  }
dengyihao's avatar
dengyihao 已提交
160 161 162 163 164 165 166 167 168 169 170 171 172 173
  return true;
}

FstDfa *dfaCreate(SArray *insts, SArray *states) {
  FstDfa *dfa = taosMemoryCalloc(1, sizeof(FstDfa));
  if (dfa == NULL) {
    return NULL;
  }

  dfa->insts = insts;
  dfa->states = states;
  return dfa;
}
bool dfaIsMatch(FstDfa *dfa, uint32_t si) {
dengyihao's avatar
dengyihao 已提交
174 175 176
  if (dfa->states == NULL || si < taosArrayGetSize(dfa->states)) {
    return false;
  }
dengyihao's avatar
dengyihao 已提交
177
  DfaState *st = taosArrayGet(dfa->states, si);
dengyihao's avatar
dengyihao 已提交
178
  return st != NULL ? st->isMatch : false;
dengyihao's avatar
dengyihao 已提交
179 180
}
bool dfaAccept(FstDfa *dfa, uint32_t si, uint8_t byte, uint32_t *result) {
dengyihao's avatar
dengyihao 已提交
181 182 183
  if (dfa->states == NULL || si < taosArrayGetSize(dfa->states)) {
    return false;
  }
dengyihao's avatar
dengyihao 已提交
184
  DfaState *st = taosArrayGet(dfa->states, si);
dengyihao's avatar
dengyihao 已提交
185
  *result = st->next[byte];
dengyihao's avatar
dengyihao 已提交
186 187 188
  return true;
}
void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip) {
dengyihao's avatar
dengyihao 已提交
189 190 191
  if (sparSetContains(set, ip)) {
    return;
  }
dengyihao's avatar
dengyihao 已提交
192 193
  bool succ = sparSetAdd(set, ip, NULL);
  assert(succ == true);
dengyihao's avatar
dengyihao 已提交
194 195 196 197 198 199 200 201 202
  Inst *inst = taosArrayGet(dfa->insts, ip);
  if (inst->ty == MATCH || inst->ty == RANGE) {
    // do nothing
  } else if (inst->ty == JUMP) {
    dfaAdd(dfa, set, inst->jv.step);
  } else if (inst->ty == SPLIT) {
    dfaAdd(dfa, set, inst->sv.len1);
    dfaAdd(dfa, set, inst->sv.len2);
  }
dengyihao's avatar
dengyihao 已提交
203

dengyihao's avatar
dengyihao 已提交
204 205 206
  return;
}
bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte) {
dengyihao's avatar
dengyihao 已提交
207 208 209
  bool isMatch = false;
  sparSetClear(to);
  for (int i = 0; i < sparSetLen(from); i++) {
dengyihao's avatar
dengyihao 已提交
210 211
    int32_t ip;
    if (false == sparSetGet(from, i, &ip)) continue;
dengyihao's avatar
dengyihao 已提交
212 213 214 215 216 217 218 219 220 221 222 223 224 225

    Inst *inst = taosArrayGet(dfa->insts, ip);
    if (inst->ty == JUMP || inst->ty == SPLIT) {
      continue;
    } else if (inst->ty == MATCH) {
      isMatch = true;
    } else if (inst->ty == RANGE) {
      if (inst->rv.start <= byte && byte <= inst->rv.end) {
        dfaAdd(dfa, to, ip + 1);
      }
    }
  }

  return isMatch;
dengyihao's avatar
dengyihao 已提交
226
}