index_fst.h 6.0 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

dengyihao's avatar
dengyihao 已提交
16 17
#ifndef __INDEX_FST_H__
#define __INDEX_FST_H__
dengyihao's avatar
dengyihao 已提交
18 19


dengyihao's avatar
dengyihao 已提交
20 21 22
#include "tarray.h"
#include "index_fst_util.h"
#include "index_fst_registry.h"
dengyihao's avatar
dengyihao 已提交
23
#include "index_fst_counting_writer.h"
dengyihao's avatar
dengyihao 已提交
24 25


dengyihao's avatar
dengyihao 已提交
26 27
typedef struct FstNode FstNode;
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a) 
dengyihao's avatar
dengyihao 已提交
28 29 30 31 32 33 34 35


typedef struct FstRange {
  uint64_t start;
  uint64_t end;
} FstRange;


dengyihao's avatar
dengyihao 已提交
36 37
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
typedef enum { Included, Excluded, Unbounded} FstBound; 
dengyihao's avatar
dengyihao 已提交
38

dengyihao's avatar
dengyihao 已提交
39 40
typedef enum {Ordered, OutOfOrdered, DuplicateKey} OrderType;

dengyihao's avatar
dengyihao 已提交
41

dengyihao's avatar
dengyihao 已提交
42 43 44 45 46 47 48 49 50 51 52 53

/*
 * 
 * UnFinished node and helper function
 * TODO: simple function name 
 */
typedef struct FstUnFinishedNodes {
  SArray *stack; // <FstBuilderNodeUnfinished> } FstUnFinishedNodes; 
} FstUnFinishedNodes;

#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack) 

dengyihao's avatar
dengyihao 已提交
54
FstUnFinishedNodes *fstUnFinishedNodesCreate();  
dengyihao's avatar
dengyihao 已提交
55
void fstUnFinishedNodesDestroy(FstUnFinishedNodes *node);
dengyihao's avatar
dengyihao 已提交
56 57 58 59 60 61 62 63
void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal);
FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes);
FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr);
FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes);
void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out); 
void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr);
void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out);
uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs);
dengyihao's avatar
dengyihao 已提交
64
uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out);
dengyihao's avatar
dengyihao 已提交
65

dengyihao's avatar
dengyihao 已提交
66 67

typedef struct FstBuilder {
dengyihao's avatar
dengyihao 已提交
68
  FstCountingWriter  *wrt;         // The FST raw data is written directly to `wtr`.  
dengyihao's avatar
dengyihao 已提交
69
  FstUnFinishedNodes *unfinished; // The stack of unfinished nodes   
dengyihao's avatar
dengyihao 已提交
70
  FstRegistry*        registry;    // A map of finished nodes.        
dengyihao's avatar
dengyihao 已提交
71
  FstSlice            last;        // The last word added 
dengyihao's avatar
dengyihao 已提交
72 73 74
  CompiledAddr       lastAddr;    // The address of the last compiled node  
  uint64_t           len;         // num of keys added
} FstBuilder;
dengyihao's avatar
dengyihao 已提交
75

dengyihao's avatar
dengyihao 已提交
76

dengyihao's avatar
dengyihao 已提交
77
FstBuilder *fstBuilderCreate(void *w, FstType ty);
dengyihao's avatar
dengyihao 已提交
78 79
void fstBuilderDestroy(FstBuilder *b);
void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in);
dengyihao's avatar
dengyihao 已提交
80
OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup);
dengyihao's avatar
dengyihao 已提交
81
void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate);
dengyihao's avatar
dengyihao 已提交
82 83 84
CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn);


dengyihao's avatar
dengyihao 已提交
85 86 87 88 89 90

typedef struct FstTransitions {
  FstNode    *node; 
  FstRange   range;   
} FstTransitions;

dengyihao's avatar
dengyihao 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103
//FstState and relation function

typedef struct FstState {
  State state;
  uint8_t val;
} FstState;

FstState fstStateCreate(FstSlice* data, CompiledAddr addr);

#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext) 
#define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans)
#define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans)
#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal) 
dengyihao's avatar
dengyihao 已提交
104 105 106 107 108 109 110


typedef struct FstLastTransition {
  uint8_t inp;
  Output  out;
} FstLastTransition;

dengyihao's avatar
dengyihao 已提交
111 112 113 114
/* 
 * FstBuilderNodeUnfinished and helper function
 * TODO: simple function name 
 */
dengyihao's avatar
dengyihao 已提交
115
typedef struct FstBuilderNodeUnfinished {
dengyihao's avatar
dengyihao 已提交
116 117
  FstBuilderNode *node; 
  FstLastTransition* last; 
dengyihao's avatar
dengyihao 已提交
118 119
} FstBuilderNodeUnfinished;

dengyihao's avatar
dengyihao 已提交
120 121


dengyihao's avatar
dengyihao 已提交
122
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
dengyihao's avatar
dengyihao 已提交
123
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out);
dengyihao's avatar
dengyihao 已提交
124 125 126 127

/*
 * FstNode and helper function  
 */
dengyihao's avatar
dengyihao 已提交
128
typedef struct FstNode {
dengyihao's avatar
dengyihao 已提交
129
  FstSlice     data;
dengyihao's avatar
dengyihao 已提交
130
  uint64_t     version; 
dengyihao's avatar
dengyihao 已提交
131
  FstState     state;
dengyihao's avatar
dengyihao 已提交
132 133 134 135 136 137 138 139
  CompiledAddr start; 
  CompiledAddr end;  
  bool         isFinal;
  uint64_t     nTrans;
  PackSizes    sizes;
  Output      finalOutput;
} FstNode;

dengyihao's avatar
dengyihao 已提交
140 141 142 143 144 145 146 147 148 149 150 151
// If this node is final and has a terminal output value, then it is,  returned. Otherwise, a zero output is returned 
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
#define FST_NODE_IS_FINAL(node) node->isFinal
// Returns the number of transitions in this node, The maximum number of transitions is 256.
#define FST_NODE_LEN(node) node->nTrans
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start 

FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
dengyihao's avatar
dengyihao 已提交
152 153
void fstNodeDestroy(FstNode *fstNode);

dengyihao's avatar
dengyihao 已提交
154 155 156 157 158 159 160 161 162 163
FstTransitions fstNodeTransitionIter(FstNode *node);
FstTransitions* fstNodeTransitions(FstNode *node);
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res);
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res);
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res);
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode); 
FstSlice  fstNodeAsSlice(FstNode *node); 



dengyihao's avatar
dengyihao 已提交
164 165 166 167 168 169 170 171 172 173 174
typedef struct FstMeta {
  uint64_t     version;
  CompiledAddr rootAddr;  
  FstType      ty;
  uint64_t     len;
  uint32_t     checkSum;
} FstMeta;

typedef struct Fst {
  FstMeta meta; 
  void    *data;  //  
dengyihao's avatar
dengyihao 已提交
175
} Fst;
dengyihao's avatar
dengyihao 已提交
176

dengyihao's avatar
dengyihao 已提交
177
// ops  
dengyihao's avatar
dengyihao 已提交
178 179 180 181

typedef struct FstIndexedValue {
  uint64_t index;
  uint64_t value;
dengyihao's avatar
dengyihao 已提交
182
} FstIndexedValue;
dengyihao's avatar
dengyihao 已提交
183

dengyihao's avatar
dengyihao 已提交
184 185 186
FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out);
void fstLastTransitionDestroy(FstLastTransition *trn);

dengyihao's avatar
dengyihao 已提交
187 188 189


#endif