index_fst.h 11.3 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

dengyihao's avatar
dengyihao 已提交
16 17
#ifndef __INDEX_FST_H__
#define __INDEX_FST_H__
dengyihao's avatar
dengyihao 已提交
18

dengyihao's avatar
dengyihao 已提交
19 20 21
#ifdef __cplusplus
extern "C" {
#endif
dengyihao's avatar
dengyihao 已提交
22

dengyihao's avatar
dengyihao 已提交
23
#include "index_fst_automation.h"
24 25 26 27
#include "index_fst_counting_writer.h"
#include "index_fst_registry.h"
#include "index_fst_util.h"
#include "tarray.h"
dengyihao's avatar
dengyihao 已提交
28

29
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
dengyihao's avatar
dengyihao 已提交
30

31 32
typedef struct Fst             Fst;
typedef struct FstNode         FstNode;
dengyihao's avatar
dengyihao 已提交
33
typedef struct StreamWithState StreamWithState;
dengyihao's avatar
dengyihao 已提交
34

35
typedef enum { Included, Excluded, Unbounded } FstBound;
dengyihao's avatar
dengyihao 已提交
36 37

typedef struct FstBoundWithData {
38
  FstSlice data;
dengyihao's avatar
dengyihao 已提交
39 40 41 42
  FstBound type;
} FstBoundWithData;

typedef struct FstStreamBuilder {
43 44 45
  Fst *             fst;
  AutomationCtx *   aut;
  FstBoundWithData *min;
dengyihao's avatar
dengyihao 已提交
46 47
  FstBoundWithData *max;
} FstStreamBuilder, FstStreamWithStateBuilder;
dengyihao's avatar
dengyihao 已提交
48 49 50 51 52 53

typedef struct FstRange {
  uint64_t start;
  uint64_t end;
} FstRange;

54 55 56
typedef enum { GE, GT, LE, LT } RangeType;
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal } State;
typedef enum { Ordered, OutOfOrdered, DuplicateKey } OrderType;
dengyihao's avatar
dengyihao 已提交
57

58 59 60 61
FstBoundWithData *fstBoundStateCreate(FstBound type, FstSlice *data);
bool              fstBoundWithDataExceededBy(FstBoundWithData *bound, FstSlice *slice);
bool              fstBoundWithDataIsEmpty(FstBoundWithData *bound);
bool              fstBoundWithDataIsIncluded(FstBoundWithData *bound);
dengyihao's avatar
dengyihao 已提交
62 63 64 65 66 67

typedef struct FstOutput {
  bool   null;
  Output out;
} FstOutput;

dengyihao's avatar
dengyihao 已提交
68
/*
69
 *
dengyihao's avatar
dengyihao 已提交
70
 * UnFinished node and helper function
71
 * TODO: simple function name
dengyihao's avatar
dengyihao 已提交
72 73
 */
typedef struct FstUnFinishedNodes {
74
  SArray *stack;  // <FstBuilderNodeUnfinished> } FstUnFinishedNodes;
dengyihao's avatar
dengyihao 已提交
75 76
} FstUnFinishedNodes;

77
#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
dengyihao's avatar
dengyihao 已提交
78

79 80 81 82 83 84 85 86 87 88 89 90
FstUnFinishedNodes *fstUnFinishedNodesCreate();
void                fstUnFinishedNodesDestroy(FstUnFinishedNodes *node);
void                fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal);
void                fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out);
void                fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr);
void                fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out);
uint64_t            fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs);
FstBuilderNode *    fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes);
FstBuilderNode *    fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr);
FstBuilderNode *    fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes);

uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out);
dengyihao's avatar
dengyihao 已提交
91 92

typedef struct FstBuilder {
93 94 95 96 97 98
  FstCountingWriter * wrt;         // The FST raw data is written directly to `wtr`.
  FstUnFinishedNodes *unfinished;  // The stack of unfinished nodes
  FstRegistry *       registry;    // A map of finished nodes.
  FstSlice            last;        // The last word added
  CompiledAddr        lastAddr;    // The address of the last compiled node
  uint64_t            len;         // num of keys added
dengyihao's avatar
dengyihao 已提交
99
} FstBuilder;
dengyihao's avatar
dengyihao 已提交
100

dengyihao's avatar
dengyihao 已提交
101
FstBuilder *fstBuilderCreate(void *w, FstType ty);
dengyihao's avatar
dengyihao 已提交
102

103 104 105 106 107 108 109
void         fstBuilderDestroy(FstBuilder *b);
void         fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in);
bool         fstBuilderInsert(FstBuilder *b, FstSlice bs, Output in);
void         fstBuilderCompileFrom(FstBuilder *b, uint64_t istate);
void *       fstBuilerIntoInner(FstBuilder *b);
void         fstBuilderFinish(FstBuilder *b);
OrderType    fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup);
dengyihao's avatar
dengyihao 已提交
110
CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn);
dengyihao's avatar
dengyihao 已提交
111 112

typedef struct FstTransitions {
113 114
  FstNode *node;
  FstRange range;
dengyihao's avatar
dengyihao 已提交
115 116
} FstTransitions;

117
// FstState and relation function
dengyihao's avatar
dengyihao 已提交
118 119

typedef struct FstState {
120
  State   state;
dengyihao's avatar
dengyihao 已提交
121 122 123
  uint8_t val;
} FstState;

124
FstState fstStateCreateFrom(FstSlice *data, CompiledAddr addr);
dengyihao's avatar
dengyihao 已提交
125 126
FstState fstStateCreate(State state);

127
// compile
dengyihao's avatar
dengyihao 已提交
128
void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp);
dengyihao's avatar
dengyihao 已提交
129
void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition *trn);
dengyihao's avatar
dengyihao 已提交
130
void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node);
dengyihao's avatar
dengyihao 已提交
131 132

// set_comm_input
133
void fstStateSetCommInput(FstState *state, uint8_t inp);
dengyihao's avatar
dengyihao 已提交
134 135

// comm_input
136
uint8_t fstStateCommInput(FstState *state, bool *null);
dengyihao's avatar
dengyihao 已提交
137 138 139

// input_len

140
uint64_t fstStateInputLen(FstState *state);
dengyihao's avatar
dengyihao 已提交
141

142 143
// end_addr
uint64_t fstStateEndAddrForOneTransNext(FstState *state, FstSlice *data);
dengyihao's avatar
dengyihao 已提交
144
uint64_t fstStateEndAddrForOneTrans(FstState *state, FstSlice *data, PackSizes sizes);
145 146 147 148 149
uint64_t fstStateEndAddrForAnyTrans(
    FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans);
// input
uint8_t fstStateInput(FstState *state, FstNode *node);
uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i);
dengyihao's avatar
dengyihao 已提交
150 151 152 153 154

// trans_addr
CompiledAddr fstStateTransAddr(FstState *state, FstNode *node);
CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i);

155
// sizes
dengyihao's avatar
dengyihao 已提交
156
PackSizes fstStateSizes(FstState *state, FstSlice *data);
157
// Output
dengyihao's avatar
dengyihao 已提交
158 159 160 161 162 163
Output fstStateOutput(FstState *state, FstNode *node);
Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i);

// anyTrans specify function

void fstStateSetFinalState(FstState *state, bool yes);
164
bool fstStateIsFinalState(FstState *state);
dengyihao's avatar
dengyihao 已提交
165 166
void fstStateSetStateNtrans(FstState *state, uint8_t n);
// state_ntrans
167
uint8_t  fstStateStateNtrans(FstState *state, bool *null);
dengyihao's avatar
dengyihao 已提交
168 169 170
uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans);
uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans);
uint64_t fstStateNtransLen(FstState *state);
dengyihao's avatar
dengyihao 已提交
171
uint64_t fstStateNtrans(FstState *state, FstSlice *slice);
dengyihao's avatar
dengyihao 已提交
172
Output   fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans);
dengyihao's avatar
dengyihao 已提交
173
uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b, bool *null);
dengyihao's avatar
dengyihao 已提交
174

175
#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext)
dengyihao's avatar
dengyihao 已提交
176 177
#define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans)
#define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans)
178
#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal)
dengyihao's avatar
dengyihao 已提交
179 180 181 182 183 184

typedef struct FstLastTransition {
  uint8_t inp;
  Output  out;
} FstLastTransition;

185
/*
dengyihao's avatar
dengyihao 已提交
186
 * FstBuilderNodeUnfinished and helper function
187
 * TODO: simple function name
dengyihao's avatar
dengyihao 已提交
188
 */
dengyihao's avatar
dengyihao 已提交
189
typedef struct FstBuilderNodeUnfinished {
190 191
  FstBuilderNode *   node;
  FstLastTransition *last;
dengyihao's avatar
dengyihao 已提交
192 193
} FstBuilderNodeUnfinished;

dengyihao's avatar
dengyihao 已提交
194
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
195

dengyihao's avatar
dengyihao 已提交
196
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out);
dengyihao's avatar
dengyihao 已提交
197 198

/*
199
 * FstNode and helper function
dengyihao's avatar
dengyihao 已提交
200
 */
dengyihao's avatar
dengyihao 已提交
201
typedef struct FstNode {
dengyihao's avatar
dengyihao 已提交
202
  FstSlice     data;
203
  uint64_t     version;
dengyihao's avatar
dengyihao 已提交
204
  FstState     state;
205 206
  CompiledAddr start;
  CompiledAddr end;
dengyihao's avatar
dengyihao 已提交
207 208 209
  bool         isFinal;
  uint64_t     nTrans;
  PackSizes    sizes;
210
  Output       finalOutput;
dengyihao's avatar
dengyihao 已提交
211 212
} FstNode;

213 214
// If this node is final and has a terminal output value, then it is,  returned.
// Otherwise, a zero output is returned
dengyihao's avatar
dengyihao 已提交
215
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
216 217
// Returns true if and only if this node corresponds to a final or "match",
// state in the finite state transducer.
dengyihao's avatar
dengyihao 已提交
218
#define FST_NODE_IS_FINAL(node) node->isFinal
219 220
// Returns the number of transitions in this node, The maximum number of
// transitions is 256.
dengyihao's avatar
dengyihao 已提交
221 222 223 224
#define FST_NODE_LEN(node) node->nTrans
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
// Return the address of this node.
225
#define FST_NODE_ADDR(node) node->start
dengyihao's avatar
dengyihao 已提交
226

dengyihao's avatar
dengyihao 已提交
227
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
228
void     fstNodeDestroy(FstNode *fstNode);
dengyihao's avatar
dengyihao 已提交
229

230 231 232 233 234
FstTransitions  fstNodeTransitionIter(FstNode *node);
FstTransitions *fstNodeTransitions(FstNode *node);
bool            fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res);
bool            fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res);
bool            fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res);
dengyihao's avatar
dengyihao 已提交
235

236 237 238 239 240
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode);

FstSlice fstNodeAsSlice(FstNode *node);

// ops
dengyihao's avatar
dengyihao 已提交
241 242 243 244 245 246 247

typedef struct FstIndexedValue {
  uint64_t index;
  uint64_t value;
} FstIndexedValue;

FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out);
248
void               fstLastTransitionDestroy(FstLastTransition *trn);
dengyihao's avatar
dengyihao 已提交
249

dengyihao's avatar
dengyihao 已提交
250 251
typedef struct FstMeta {
  uint64_t     version;
252
  CompiledAddr rootAddr;
dengyihao's avatar
dengyihao 已提交
253 254 255 256 257 258
  FstType      ty;
  uint64_t     len;
  uint32_t     checkSum;
} FstMeta;

typedef struct Fst {
259 260 261
  FstMeta * meta;
  FstSlice *data;  //
  FstNode * root;  //
dengyihao's avatar
dengyihao 已提交
262
} Fst;
dengyihao's avatar
dengyihao 已提交
263

264
// refactor simple function
dengyihao's avatar
dengyihao 已提交
265

266 267
Fst *fstCreate(FstSlice *data);
void fstDestroy(Fst *fst);
dengyihao's avatar
dengyihao 已提交
268

269 270 271 272 273 274
bool              fstGet(Fst *fst, FstSlice *b, Output *out);
FstNode *         fstGetNode(Fst *fst, CompiledAddr);
FstNode *         fstGetRoot(Fst *fst);
FstType           fstGetType(Fst *fst);
CompiledAddr      fstGetRootAddr(Fst *fst);
Output            fstEmptyFinalOutput(Fst *fst, bool *null);
dengyihao's avatar
dengyihao 已提交
275
FstStreamBuilder *fstSearch(Fst *fst, AutomationCtx *ctx);
dengyihao's avatar
dengyihao 已提交
276

277 278 279
FstStreamWithStateBuilder *fstSearchWithState(Fst *fst, AutomationCtx *ctx);
// into stream to expand later
StreamWithState *streamBuilderIntoStream(FstStreamBuilder *sb);
dengyihao's avatar
dengyihao 已提交
280

281
bool fstVerify(Fst *fst);
dengyihao's avatar
dengyihao 已提交
282

283 284
// refactor this function
bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr);
dengyihao's avatar
dengyihao 已提交
285 286

typedef struct StreamState {
287
  FstNode * node;
dengyihao's avatar
dengyihao 已提交
288
  uint64_t  trans;
289 290 291
  FstOutput out;
  void *    autState;
} StreamState;
dengyihao's avatar
dengyihao 已提交
292

dengyihao's avatar
dengyihao 已提交
293 294
void streamStateDestroy(void *s);

dengyihao's avatar
dengyihao 已提交
295
typedef struct StreamWithState {
296 297 298 299 300
  Fst *             fst;
  AutomationCtx *   aut;
  SArray *          inp;
  FstOutput         emptyOutput;
  SArray *          stack;  // <StreamState>
dengyihao's avatar
dengyihao 已提交
301
  FstBoundWithData *endAt;
dengyihao's avatar
dengyihao 已提交
302
} StreamWithState;
dengyihao's avatar
dengyihao 已提交
303

dengyihao's avatar
dengyihao 已提交
304
typedef struct StreamWithStateResult {
305
  FstSlice  data;
dengyihao's avatar
dengyihao 已提交
306
  FstOutput out;
307
  void *    state;
dengyihao's avatar
dengyihao 已提交
308 309 310
} StreamWithStateResult;

StreamWithStateResult *swsResultCreate(FstSlice *data, FstOutput fOut, void *state);
311 312 313 314 315
void                   swsResultDestroy(StreamWithStateResult *result);

typedef void *(*StreamCallback)(void *);
StreamWithState *streamWithStateCreate(
    Fst *fst, AutomationCtx *automation, FstBoundWithData *min, FstBoundWithData *max);
dengyihao's avatar
dengyihao 已提交
316

dengyihao's avatar
dengyihao 已提交
317
void streamWithStateDestroy(StreamWithState *sws);
dengyihao's avatar
dengyihao 已提交
318

319
bool streamWithStateSeekMin(StreamWithState *sws, FstBoundWithData *min);
dengyihao's avatar
dengyihao 已提交
320

321 322 323
StreamWithStateResult *streamWithStateNextWith(StreamWithState *sws, StreamCallback callback);

FstStreamBuilder *fstStreamBuilderCreate(Fst *fst, AutomationCtx *aut);
dengyihao's avatar
dengyihao 已提交
324
// set up bound range
325
// refator, simple code by marco
dengyihao's avatar
dengyihao 已提交
326 327 328

FstStreamBuilder *fstStreamBuilderRange(FstStreamBuilder *b, FstSlice *val, RangeType type);

dengyihao's avatar
dengyihao 已提交
329 330 331 332
#ifdef __cplusplus
}
#endif

dengyihao's avatar
dengyihao 已提交
333
#endif