indexFst.h 11.0 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

dengyihao's avatar
dengyihao 已提交
16 17
#ifndef __INDEX_FST_H__
#define __INDEX_FST_H__
dengyihao's avatar
dengyihao 已提交
18

dengyihao's avatar
dengyihao 已提交
19 20 21
#ifdef __cplusplus
extern "C" {
#endif
dengyihao's avatar
dengyihao 已提交
22

dengyihao's avatar
dengyihao 已提交
23
#include "indexFstAutomation.h"
dengyihao's avatar
dengyihao 已提交
24
#include "indexFstFile.h"
dengyihao's avatar
dengyihao 已提交
25 26 27
#include "indexFstNode.h"
#include "indexFstRegistry.h"
#include "indexFstUtil.h"
S
Shengliang Guan 已提交
28
#include "indexInt.h"
dengyihao's avatar
dengyihao 已提交
29

30
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
dengyihao's avatar
dengyihao 已提交
31

dengyihao's avatar
dengyihao 已提交
32 33 34
typedef struct Fst     Fst;
typedef struct FstNode FstNode;
typedef struct FStmSt  FStmSt;
dengyihao's avatar
dengyihao 已提交
35

36
typedef enum { Included, Excluded, Unbounded } FstBound;
dengyihao's avatar
dengyihao 已提交
37 38

typedef struct FstBoundWithData {
39
  FstSlice data;
dengyihao's avatar
dengyihao 已提交
40 41 42
  FstBound type;
} FstBoundWithData;

dengyihao's avatar
dengyihao 已提交
43
typedef struct FStmBuilder {
dengyihao's avatar
dengyihao 已提交
44
  Fst*              fst;
dengyihao's avatar
dengyihao 已提交
45
  FAutoCtx*         aut;
dengyihao's avatar
dengyihao 已提交
46 47
  FstBoundWithData* min;
  FstBoundWithData* max;
dengyihao's avatar
dengyihao 已提交
48
} FStmBuilder, FStmStBuilder;
dengyihao's avatar
dengyihao 已提交
49 50 51 52 53 54

typedef struct FstRange {
  uint64_t start;
  uint64_t end;
} FstRange;

55
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal } State;
dengyihao's avatar
dengyihao 已提交
56
typedef enum { Ordered, OutOfOrdered, DuplicateKey } FstOrderType;
dengyihao's avatar
dengyihao 已提交
57

dengyihao's avatar
dengyihao 已提交
58 59 60 61
FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice* data);
bool              fstBoundWithDataExceededBy(FstBoundWithData* bound, FstSlice* slice);
bool              fstBoundWithDataIsEmpty(FstBoundWithData* bound);
bool              fstBoundWithDataIsIncluded(FstBoundWithData* bound);
dengyihao's avatar
dengyihao 已提交
62 63 64 65 66 67

typedef struct FstOutput {
  bool   null;
  Output out;
} FstOutput;

dengyihao's avatar
dengyihao 已提交
68
/*
69
 *
dengyihao's avatar
dengyihao 已提交
70
 * UnFinished node and helper function
71
 * TODO: simple function name
dengyihao's avatar
dengyihao 已提交
72 73
 */
typedef struct FstUnFinishedNodes {
dengyihao's avatar
dengyihao 已提交
74
  SArray* stack;  // <FstBuilderNodeUnfinished> } FstUnFinishedNodes;
dengyihao's avatar
dengyihao 已提交
75 76
} FstUnFinishedNodes;

77
#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
dengyihao's avatar
dengyihao 已提交
78

dengyihao's avatar
dengyihao 已提交
79 80 81 82 83 84 85 86 87 88
FstUnFinishedNodes* fstUnFinishedNodesCreate();
void                fstUnFinishedNodesDestroy(FstUnFinishedNodes* node);
void                fstUnFinishedNodesPushEmpty(FstUnFinishedNodes* nodes, bool isFinal);
void                fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes* node, Output out);
void                fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes* node, CompiledAddr addr);
void                fstUnFinishedNodesAddSuffix(FstUnFinishedNodes* node, FstSlice bs, Output out);
uint64_t            fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes* node, FstSlice bs);
FstBuilderNode*     fstUnFinishedNodesPopRoot(FstUnFinishedNodes* nodes);
FstBuilderNode*     fstUnFinishedNodesPopFreeze(FstUnFinishedNodes* nodes, CompiledAddr addr);
FstBuilderNode*     fstUnFinishedNodesPopEmpty(FstUnFinishedNodes* nodes);
89

dengyihao's avatar
dengyihao 已提交
90
uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes* node, FstSlice bs, Output in, Output* out);
dengyihao's avatar
dengyihao 已提交
91 92

typedef struct FstBuilder {
dengyihao's avatar
dengyihao 已提交
93 94
  IdxFstFile*         wrt;         // The FST raw data is written directly to `wtr`.
  FstUnFinishedNodes* unfinished;  // The stack of unfinished  nodes
dengyihao's avatar
dengyihao 已提交
95
  FstRegistry*        registry;    // A map of finished nodes.
96 97 98
  FstSlice            last;        // The last word added
  CompiledAddr        lastAddr;    // The address of the last compiled node
  uint64_t            len;         // num of keys added
dengyihao's avatar
dengyihao 已提交
99
} FstBuilder;
dengyihao's avatar
dengyihao 已提交
100

dengyihao's avatar
dengyihao 已提交
101
FstBuilder* fstBuilderCreate(void* w, FstType ty);
dengyihao's avatar
dengyihao 已提交
102

dengyihao's avatar
dengyihao 已提交
103 104 105 106 107 108
void         fstBuilderDestroy(FstBuilder* b);
void         fstBuilderInsertOutput(FstBuilder* b, FstSlice bs, Output in);
bool         fstBuilderInsert(FstBuilder* b, FstSlice bs, Output in);
void         fstBuilderCompileFrom(FstBuilder* b, uint64_t istate);
void*        fstBuilerIntoInner(FstBuilder* b);
void         fstBuilderFinish(FstBuilder* b);
dengyihao's avatar
dengyihao 已提交
109
FstOrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup);
dengyihao's avatar
dengyihao 已提交
110
CompiledAddr fstBuilderCompile(FstBuilder* b, FstBuilderNode* bn);
dengyihao's avatar
dengyihao 已提交
111 112

typedef struct FstTransitions {
dengyihao's avatar
dengyihao 已提交
113
  FstNode* node;
114
  FstRange range;
dengyihao's avatar
dengyihao 已提交
115 116
} FstTransitions;

117
// FstState and relation function
dengyihao's avatar
dengyihao 已提交
118 119

typedef struct FstState {
120
  State   state;
dengyihao's avatar
dengyihao 已提交
121 122 123
  uint8_t val;
} FstState;

dengyihao's avatar
dengyihao 已提交
124
FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr);
dengyihao's avatar
dengyihao 已提交
125 126
FstState fstStateCreate(State state);

127
// compile
dengyihao's avatar
dengyihao 已提交
128 129 130
void fstStateCompileForOneTransNext(IdxFstFile* w, CompiledAddr addr, uint8_t inp);
void fstStateCompileForOneTrans(IdxFstFile* w, CompiledAddr addr, FstTransition* trn);
void fstStateCompileForAnyTrans(IdxFstFile* w, CompiledAddr addr, FstBuilderNode* node);
dengyihao's avatar
dengyihao 已提交
131 132

// set_comm_input
dengyihao's avatar
dengyihao 已提交
133
void fstStateSetCommInput(FstState* state, uint8_t inp);
dengyihao's avatar
dengyihao 已提交
134 135

// comm_input
dengyihao's avatar
dengyihao 已提交
136
uint8_t fstStateCommInput(FstState* state, bool* null);
dengyihao's avatar
dengyihao 已提交
137 138 139

// input_len

dengyihao's avatar
dengyihao 已提交
140
uint64_t fstStateInputLen(FstState* state);
dengyihao's avatar
dengyihao 已提交
141

142
// end_addr
dengyihao's avatar
dengyihao 已提交
143 144
uint64_t fstStateEndAddrForOneTransNext(FstState* state, FstSlice* data);
uint64_t fstStateEndAddrForOneTrans(FstState* state, FstSlice* data, PackSizes sizes);
dengyihao's avatar
dengyihao 已提交
145 146
uint64_t fstStateEndAddrForAnyTrans(FstState* state, uint64_t version, FstSlice* date, PackSizes sizes,
                                    uint64_t nTrans);
147
// input
dengyihao's avatar
dengyihao 已提交
148 149
uint8_t fstStateInput(FstState* state, FstNode* node);
uint8_t fstStateInputForAnyTrans(FstState* state, FstNode* node, uint64_t i);
dengyihao's avatar
dengyihao 已提交
150 151

// trans_addr
dengyihao's avatar
dengyihao 已提交
152 153
CompiledAddr fstStateTransAddr(FstState* state, FstNode* node);
CompiledAddr fstStateTransAddrForAnyTrans(FstState* state, FstNode* node, uint64_t i);
dengyihao's avatar
dengyihao 已提交
154

155
// sizes
dengyihao's avatar
dengyihao 已提交
156
PackSizes fstStateSizes(FstState* state, FstSlice* data);
157
// Output
dengyihao's avatar
dengyihao 已提交
158 159
Output fstStateOutput(FstState* state, FstNode* node);
Output fstStateOutputForAnyTrans(FstState* state, FstNode* node, uint64_t i);
dengyihao's avatar
dengyihao 已提交
160 161 162

// anyTrans specify function

dengyihao's avatar
dengyihao 已提交
163 164 165
void fstStateSetFinalState(FstState* state, bool yes);
bool fstStateIsFinalState(FstState* state);
void fstStateSetStateNtrans(FstState* state, uint8_t n);
dengyihao's avatar
dengyihao 已提交
166
// state_ntrans
dengyihao's avatar
dengyihao 已提交
167 168 169 170 171 172 173
uint8_t  fstStateStateNtrans(FstState* state, bool* null);
uint64_t fstStateTotalTransSize(FstState* state, uint64_t version, PackSizes size, uint64_t nTrans);
uint64_t fstStateTransIndexSize(FstState* state, uint64_t version, uint64_t nTrans);
uint64_t fstStateNtransLen(FstState* state);
uint64_t fstStateNtrans(FstState* state, FstSlice* slice);
Output   fstStateFinalOutput(FstState* state, uint64_t version, FstSlice* date, PackSizes sizes, uint64_t nTrans);
uint64_t fstStateFindInput(FstState* state, FstNode* node, uint8_t b, bool* null);
dengyihao's avatar
dengyihao 已提交
174

175
#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext)
dengyihao's avatar
dengyihao 已提交
176 177 178
#define FST_STATE_ONE_TRNAS(node)      (node->state.state == OneTrans)
#define FST_STATE_ANY_TRANS(node)      (node->state.state == AnyTrans)
#define FST_STATE_EMPTY_FINAL(node)    (node->state.state == EmptyFinal)
dengyihao's avatar
dengyihao 已提交
179 180 181 182 183 184

typedef struct FstLastTransition {
  uint8_t inp;
  Output  out;
} FstLastTransition;

185
/*
dengyihao's avatar
dengyihao 已提交
186
 * FstBuilderNodeUnfinished and helper function
187
 * TODO: simple function name
dengyihao's avatar
dengyihao 已提交
188
 */
dengyihao's avatar
dengyihao 已提交
189
typedef struct FstBuilderNodeUnfinished {
dengyihao's avatar
dengyihao 已提交
190 191
  FstBuilderNode*    node;
  FstLastTransition* last;
dengyihao's avatar
dengyihao 已提交
192 193
} FstBuilderNodeUnfinished;

dengyihao's avatar
dengyihao 已提交
194
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished* node, CompiledAddr addr);
195

dengyihao's avatar
dengyihao 已提交
196
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished* node, Output out);
dengyihao's avatar
dengyihao 已提交
197 198

/*
199
 * FstNode and helper function
dengyihao's avatar
dengyihao 已提交
200
 */
dengyihao's avatar
dengyihao 已提交
201
typedef struct FstNode {
dengyihao's avatar
dengyihao 已提交
202
  FstSlice     data;
203
  uint64_t     version;
dengyihao's avatar
dengyihao 已提交
204
  FstState     state;
205 206
  CompiledAddr start;
  CompiledAddr end;
dengyihao's avatar
dengyihao 已提交
207 208 209
  bool         isFinal;
  uint64_t     nTrans;
  PackSizes    sizes;
210
  Output       finalOutput;
dengyihao's avatar
dengyihao 已提交
211 212
} FstNode;

213 214
// If this node is final and has a terminal output value, then it is,  returned.
// Otherwise, a zero output is returned
dengyihao's avatar
dengyihao 已提交
215
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
dengyihao's avatar
dengyihao 已提交
216

217 218
// Returns true if and only if this node corresponds to a final or "match",
// state in the finite state transducer.
dengyihao's avatar
dengyihao 已提交
219
#define FST_NODE_IS_FINAL(node) node->isFinal
dengyihao's avatar
dengyihao 已提交
220

221 222
// Returns the number of transitions in this node, The maximum number of
// transitions is 256.
dengyihao's avatar
dengyihao 已提交
223
#define FST_NODE_LEN(node) node->nTrans
dengyihao's avatar
dengyihao 已提交
224

dengyihao's avatar
dengyihao 已提交
225 226
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
dengyihao's avatar
dengyihao 已提交
227

dengyihao's avatar
dengyihao 已提交
228
// Return the address of this node.
229
#define FST_NODE_ADDR(node) node->start
dengyihao's avatar
dengyihao 已提交
230

dengyihao's avatar
dengyihao 已提交
231 232
FstNode* fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice* data);
void     fstNodeDestroy(FstNode* fstNode);
dengyihao's avatar
dengyihao 已提交
233

dengyihao's avatar
dengyihao 已提交
234 235 236 237 238
FstTransitions  fstNodeTransitionIter(FstNode* node);
FstTransitions* fstNodeTransitions(FstNode* node);
bool            fstNodeGetTransitionAt(FstNode* node, uint64_t i, FstTransition* res);
bool            fstNodeGetTransitionAddrAt(FstNode* node, uint64_t i, CompiledAddr* res);
bool            fstNodeFindInput(FstNode* node, uint8_t b, uint64_t* res);
dengyihao's avatar
dengyihao 已提交
239

dengyihao's avatar
dengyihao 已提交
240
bool fstNodeCompile(FstNode* node, void* w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode* builderNode);
241

dengyihao's avatar
dengyihao 已提交
242
FstSlice fstNodeAsSlice(FstNode* node);
243 244

// ops
dengyihao's avatar
dengyihao 已提交
245 246 247 248 249 250

typedef struct FstIndexedValue {
  uint64_t index;
  uint64_t value;
} FstIndexedValue;

dengyihao's avatar
dengyihao 已提交
251 252
FstLastTransition* fstLastTransitionCreate(uint8_t inp, Output out);
void               fstLastTransitionDestroy(FstLastTransition* trn);
dengyihao's avatar
dengyihao 已提交
253

dengyihao's avatar
dengyihao 已提交
254 255
typedef struct FstMeta {
  uint64_t     version;
256
  CompiledAddr rootAddr;
dengyihao's avatar
dengyihao 已提交
257 258 259 260 261 262
  FstType      ty;
  uint64_t     len;
  uint32_t     checkSum;
} FstMeta;

typedef struct Fst {
dengyihao's avatar
dengyihao 已提交
263 264 265
  FstMeta*      meta;
  FstSlice*     data;  //
  FstNode*      root;  //
wafwerar's avatar
wafwerar 已提交
266
  TdThreadMutex mtx;
dengyihao's avatar
dengyihao 已提交
267
} Fst;
dengyihao's avatar
dengyihao 已提交
268

269
// refactor simple function
dengyihao's avatar
dengyihao 已提交
270

dengyihao's avatar
dengyihao 已提交
271 272
Fst* fstCreate(FstSlice* data);
void fstDestroy(Fst* fst);
dengyihao's avatar
dengyihao 已提交
273

dengyihao's avatar
dengyihao 已提交
274 275 276 277 278 279 280
bool         fstGet(Fst* fst, FstSlice* b, Output* out);
FstNode*     fstGetNode(Fst* fst, CompiledAddr);
FstNode*     fstGetRoot(Fst* fst);
FstType      fstGetType(Fst* fst);
CompiledAddr fstGetRootAddr(Fst* fst);
Output       fstEmptyFinalOutput(Fst* fst, bool* null);
FStmBuilder* fstSearch(Fst* fst, FAutoCtx* ctx);
dengyihao's avatar
dengyihao 已提交
281

dengyihao's avatar
dengyihao 已提交
282
FStmStBuilder* fstSearchWithState(Fst* fst, FAutoCtx* ctx);
283
// into stream to expand later
dengyihao's avatar
dengyihao 已提交
284 285
//

dengyihao's avatar
dengyihao 已提交
286
FStmSt* stmBuilderIntoStm(FStmBuilder* sb);
dengyihao's avatar
dengyihao 已提交
287

dengyihao's avatar
dengyihao 已提交
288
bool fstVerify(Fst* fst);
dengyihao's avatar
dengyihao 已提交
289

290
// refactor this function
dengyihao's avatar
dengyihao 已提交
291
bool fstBuilderNodeCompileTo(FstBuilderNode* b, IdxFstFile* wrt, CompiledAddr lastAddr, CompiledAddr startAddr);
dengyihao's avatar
dengyihao 已提交
292 293

typedef struct StreamState {
dengyihao's avatar
dengyihao 已提交
294
  FstNode*  node;
dengyihao's avatar
dengyihao 已提交
295
  uint64_t  trans;
296
  FstOutput out;
dengyihao's avatar
dengyihao 已提交
297
  void*     autState;
298
} StreamState;
dengyihao's avatar
dengyihao 已提交
299

dengyihao's avatar
dengyihao 已提交
300
void streamStateDestroy(void* s);
dengyihao's avatar
dengyihao 已提交
301

dengyihao's avatar
dengyihao 已提交
302
typedef struct FStmSt {
dengyihao's avatar
dengyihao 已提交
303
  Fst*              fst;
dengyihao's avatar
dengyihao 已提交
304
  FAutoCtx*         aut;
dengyihao's avatar
dengyihao 已提交
305
  SArray*           inp;
306
  FstOutput         emptyOutput;
dengyihao's avatar
dengyihao 已提交
307 308
  SArray*           stack;  // <StreamState>
  FstBoundWithData* endAt;
dengyihao's avatar
dengyihao 已提交
309
} FStmSt;
dengyihao's avatar
dengyihao 已提交
310

dengyihao's avatar
dengyihao 已提交
311
typedef struct FStmStRslt {
312
  FstSlice  data;
dengyihao's avatar
dengyihao 已提交
313
  FstOutput out;
dengyihao's avatar
dengyihao 已提交
314
  void*     state;
dengyihao's avatar
dengyihao 已提交
315
} FStmStRslt;
dengyihao's avatar
dengyihao 已提交
316

dengyihao's avatar
dengyihao 已提交
317 318
FStmStRslt* swsResultCreate(FstSlice* data, FstOutput fOut, void* state);
void        swsResultDestroy(FStmStRslt* result);
319

dengyihao's avatar
dengyihao 已提交
320
typedef void* (*StreamCallback)(void*);
dengyihao's avatar
dengyihao 已提交
321
FStmSt* stmStCreate(Fst* fst, FAutoCtx* automation, FstBoundWithData* min, FstBoundWithData* max);
dengyihao's avatar
dengyihao 已提交
322

dengyihao's avatar
dengyihao 已提交
323
void stmStDestroy(FStmSt* sws);
dengyihao's avatar
dengyihao 已提交
324

dengyihao's avatar
dengyihao 已提交
325
bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min);
dengyihao's avatar
dengyihao 已提交
326

dengyihao's avatar
dengyihao 已提交
327
FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback);
328

dengyihao's avatar
dengyihao 已提交
329
FStmBuilder* stmBuilderCreate(Fst* fst, FAutoCtx* aut);
dengyihao's avatar
dengyihao 已提交
330

dengyihao's avatar
dengyihao 已提交
331
void stmBuilderDestroy(FStmBuilder* b);
dengyihao's avatar
dengyihao 已提交
332

dengyihao's avatar
dengyihao 已提交
333
// set up bound range
dengyihao's avatar
dengyihao 已提交
334 335
// refator later
// simple code by marco
dengyihao's avatar
dengyihao 已提交
336
void stmBuilderSetRange(FStmBuilder* b, FstSlice* val, RangeType type);
dengyihao's avatar
dengyihao 已提交
337

dengyihao's avatar
dengyihao 已提交
338 339 340 341
#ifdef __cplusplus
}
#endif

dengyihao's avatar
dengyihao 已提交
342
#endif