index_fst.h 4.9 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

dengyihao's avatar
dengyihao 已提交
16 17
#ifndef __INDEX_FST_H__
#define __INDEX_FST_H__
dengyihao's avatar
dengyihao 已提交
18 19


dengyihao's avatar
dengyihao 已提交
20 21 22
#include "tarray.h"
#include "index_fst_util.h"
#include "index_fst_registry.h"
dengyihao's avatar
dengyihao 已提交
23
#include "index_fst_counting_writer.h"
dengyihao's avatar
dengyihao 已提交
24 25


dengyihao's avatar
dengyihao 已提交
26 27
typedef struct FstNode FstNode;
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a) 
dengyihao's avatar
dengyihao 已提交
28 29 30 31 32 33 34 35


typedef struct FstRange {
  uint64_t start;
  uint64_t end;
} FstRange;


dengyihao's avatar
dengyihao 已提交
36 37
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
typedef enum { Included, Excluded, Unbounded} FstBound; 
dengyihao's avatar
dengyihao 已提交
38 39


dengyihao's avatar
dengyihao 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61

/*
 * 
 * UnFinished node and helper function
 * TODO: simple function name 
 */
typedef struct FstUnFinishedNodes {
  SArray *stack; // <FstBuilderNodeUnfinished> } FstUnFinishedNodes; 
} FstUnFinishedNodes;

#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack) 

FstUnFinishedNodes *FstUnFinishedNodesCreate();  
void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal);
FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes);
FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr);
FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes);
void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out); 
void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr);
void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out);
uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs);
uint64_t FstUnFinishedNodesFindCommPreifxAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out);
dengyihao's avatar
dengyihao 已提交
62

dengyihao's avatar
dengyihao 已提交
63 64 65 66 67 68 69 70 71

typedef struct FstBuilder {
  FstCountingWriter  wtr;         // The FST raw data is written directly to `wtr`.  
  FstUnFinishedNodes *unfinished; // The stack of unfinished nodes   
  FstRegistry        registry;    // A map of finished nodes.        
  SArray*            last;        // The last word added 
  CompiledAddr       lastAddr;    // The address of the last compiled node  
  uint64_t           len;         // num of keys added
} FstBuilder;
dengyihao's avatar
dengyihao 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85


typedef struct FstTransitions {
  FstNode    *node; 
  FstRange   range;   
} FstTransitions;



typedef struct FstLastTransition {
  uint8_t inp;
  Output  out;
} FstLastTransition;

dengyihao's avatar
dengyihao 已提交
86 87 88 89
/* 
 * FstBuilderNodeUnfinished and helper function
 * TODO: simple function name 
 */
dengyihao's avatar
dengyihao 已提交
90
typedef struct FstBuilderNodeUnfinished {
dengyihao's avatar
dengyihao 已提交
91 92
  FstBuilderNode *node; 
  FstLastTransition* last; 
dengyihao's avatar
dengyihao 已提交
93 94
} FstBuilderNodeUnfinished;

dengyihao's avatar
dengyihao 已提交
95 96 97 98 99 100
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr);

/*
 * FstNode and helper function  
 */
dengyihao's avatar
dengyihao 已提交
101
typedef struct FstNode {
dengyihao's avatar
dengyihao 已提交
102
  FstSlice     data;
dengyihao's avatar
dengyihao 已提交
103 104 105 106 107 108 109 110 111 112
  uint64_t     version; 
  State        state;
  CompiledAddr start; 
  CompiledAddr end;  
  bool         isFinal;
  uint64_t     nTrans;
  PackSizes    sizes;
  Output      finalOutput;
} FstNode;

dengyihao's avatar
dengyihao 已提交
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
// If this node is final and has a terminal output value, then it is,  returned. Otherwise, a zero output is returned 
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
#define FST_NODE_IS_FINAL(node) node->isFinal
// Returns the number of transitions in this node, The maximum number of transitions is 256.
#define FST_NODE_LEN(node) node->nTrans
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start 

FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
FstTransitions fstNodeTransitionIter(FstNode *node);
FstTransitions* fstNodeTransitions(FstNode *node);
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res);
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res);
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res);
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode); 
FstSlice  fstNodeAsSlice(FstNode *node); 



dengyihao's avatar
dengyihao 已提交
135 136 137 138 139 140 141 142 143 144 145
typedef struct FstMeta {
  uint64_t     version;
  CompiledAddr rootAddr;  
  FstType      ty;
  uint64_t     len;
  uint32_t     checkSum;
} FstMeta;

typedef struct Fst {
  FstMeta meta; 
  void    *data;  //  
dengyihao's avatar
dengyihao 已提交
146
} Fst;
dengyihao's avatar
dengyihao 已提交
147

dengyihao's avatar
dengyihao 已提交
148
// ops  
dengyihao's avatar
dengyihao 已提交
149 150 151 152

typedef struct FstIndexedValue {
  uint64_t index;
  uint64_t value;
dengyihao's avatar
dengyihao 已提交
153
} FstIndexedValue;
dengyihao's avatar
dengyihao 已提交
154 155 156 157 158 159





#endif