/* * Copyright (c) 2019 TAOS Data, Inc. * * This program is free software: you can use, redistribute, and/or modify * it under the terms of the GNU Affero General Public License, version 3 * or later ("AGPL"), as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #ifndef __INDEX_FST_H__ #define __INDEX_FST_H__ #ifdef __cplusplus extern "C" { #endif #include "tarray.h" #include "index_fst_util.h" #include "index_fst_registry.h" #include "index_fst_counting_writer.h" #include "index_fst_automation.h" typedef struct FstNode FstNode; #define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a) typedef struct FstRange { uint64_t start; uint64_t end; } FstRange; typedef enum {GE, GT, LE, LT} RangeType; typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State; typedef enum {Ordered, OutOfOrdered, DuplicateKey} OrderType; typedef enum { Included, Excluded, Unbounded} FstBound; typedef struct FstBoundWithData { FstSlice data; FstBound type; } FstBoundWithData; FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice *data); bool fstBoundWithDataExceededBy(FstBoundWithData *bound, FstSlice *slice); bool fstBoundWithDataIsEmpty(FstBoundWithData *bound); bool fstBoundWithDataIsIncluded(FstBoundWithData *bound); typedef struct FstOutput { bool null; Output out; } FstOutput; /* * * UnFinished node and helper function * TODO: simple function name */ typedef struct FstUnFinishedNodes { SArray *stack; // } FstUnFinishedNodes; } FstUnFinishedNodes; #define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack) FstUnFinishedNodes *fstUnFinishedNodesCreate(); void fstUnFinishedNodesDestroy(FstUnFinishedNodes *node); void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal); FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes); FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr); FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes); void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out); void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr); void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out); uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs); uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out); typedef struct FstBuilder { FstCountingWriter *wrt; // The FST raw data is written directly to `wtr`. FstUnFinishedNodes *unfinished; // The stack of unfinished nodes FstRegistry* registry; // A map of finished nodes. FstSlice last; // The last word added CompiledAddr lastAddr; // The address of the last compiled node uint64_t len; // num of keys added } FstBuilder; FstBuilder *fstBuilderCreate(void *w, FstType ty); void fstBuilderDestroy(FstBuilder *b); void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in); bool fstBuilderInsert(FstBuilder *b, FstSlice bs, Output in); OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup); void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate); CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn); void* fstBuilerIntoInner(FstBuilder *b); void fstBuilderFinish(FstBuilder *b); typedef struct FstTransitions { FstNode *node; FstRange range; } FstTransitions; //FstState and relation function typedef struct FstState { State state; uint8_t val; } FstState; FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr); FstState fstStateCreate(State state); //compile void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp); void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition *trn); void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); // set_comm_input void fstStateSetCommInput(FstState* state, uint8_t inp); // comm_input uint8_t fstStateCommInput(FstState* state, bool *null); // input_len uint64_t fstStateInputLen(FstState* state); // end_addr uint64_t fstStateEndAddrForOneTransNext(FstState* state, FstSlice *data); uint64_t fstStateEndAddrForOneTrans(FstState *state, FstSlice *data, PackSizes sizes); uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); // input uint8_t fstStateInput(FstState *state, FstNode *node); uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i); // trans_addr CompiledAddr fstStateTransAddr(FstState *state, FstNode *node); CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i); // sizes PackSizes fstStateSizes(FstState *state, FstSlice *data); // Output Output fstStateOutput(FstState *state, FstNode *node); Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i); // anyTrans specify function void fstStateSetFinalState(FstState *state, bool yes); bool fstStateIsFinalState(FstState *state); void fstStateSetStateNtrans(FstState *state, uint8_t n); // state_ntrans uint8_t fstStateStateNtrans(FstState *state, bool *null); uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans); uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans); uint64_t fstStateNtransLen(FstState *state); uint64_t fstStateNtrans(FstState *state, FstSlice *slice); Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b, bool *null); #define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext) #define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans) #define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans) #define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal) typedef struct FstLastTransition { uint8_t inp; Output out; } FstLastTransition; /* * FstBuilderNodeUnfinished and helper function * TODO: simple function name */ typedef struct FstBuilderNodeUnfinished { FstBuilderNode *node; FstLastTransition* last; } FstBuilderNodeUnfinished; void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr); void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out); /* * FstNode and helper function */ typedef struct FstNode { FstSlice data; uint64_t version; FstState state; CompiledAddr start; CompiledAddr end; bool isFinal; uint64_t nTrans; PackSizes sizes; Output finalOutput; } FstNode; // If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned #define FST_NODE_FINAL_OUTPUT(node) node->finalOutput // Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer. #define FST_NODE_IS_FINAL(node) node->isFinal // Returns the number of transitions in this node, The maximum number of transitions is 256. #define FST_NODE_LEN(node) node->nTrans // Returns true if and only if this node has zero transitions. #define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0) // Return the address of this node. #define FST_NODE_ADDR(node) node->start FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data); void fstNodeDestroy(FstNode *fstNode); FstTransitions fstNodeTransitionIter(FstNode *node); FstTransitions* fstNodeTransitions(FstNode *node); bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res); bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res); bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res); bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode); FstSlice fstNodeAsSlice(FstNode *node); // ops typedef struct FstIndexedValue { uint64_t index; uint64_t value; } FstIndexedValue; FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out); void fstLastTransitionDestroy(FstLastTransition *trn); typedef struct FstMeta { uint64_t version; CompiledAddr rootAddr; FstType ty; uint64_t len; uint32_t checkSum; } FstMeta; typedef struct Fst { FstMeta *meta; FstSlice *data; // FstNode *root; // } Fst; // refactor simple function Fst* fstCreate(FstSlice *data); void fstDestroy(Fst *fst); bool fstGet(Fst *fst, FstSlice *b, Output *out); FstNode* fstGetNode(Fst *fst, CompiledAddr); FstNode* fstGetRoot(Fst *fst); FstType fstGetType(Fst *fst); CompiledAddr fstGetRootAddr(Fst *fst); Output fstEmptyFinalOutput(Fst *fst, bool *null); bool fstVerify(Fst *fst); //refactor this function bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr); typedef struct StreamState { FstNode *node; uint64_t trans; FstOutput out; void *autState; } StreamState; void streamStateDestroy(void *s); typedef struct StreamWithState { Fst *fst; Automation *aut; SArray *inp; FstOutput emptyOutput; SArray *stack; // FstBoundWithData *endAt; } StreamWithState ; typedef struct StreamWithStateResult { FstSlice data; FstOutput out; void *state; } StreamWithStateResult; StreamWithStateResult *swsResultCreate(FstSlice *data, FstOutput fOut, void *state); void swsResultDestroy(StreamWithStateResult *result); typedef void* (*StreamCallback)(void *); StreamWithState *streamWithStateCreate(Fst *fst, Automation *automation, FstBoundWithData *min, FstBoundWithData *max) ; void streamWithStateDestroy(StreamWithState *sws); bool streamWithStateSeekMin(StreamWithState *sws, FstBoundWithData *min); StreamWithStateResult* streamWithStateNextWith(StreamWithState *sws, StreamCallback callback); typedef struct FstStreamBuilder { Fst *fst; Automation *aut; FstBoundWithData *min; FstBoundWithData *max; } FstStreamBuilder; FstStreamBuilder *fstStreamBuilderCreate(Fst *fst, Automation *aut); // set up bound range // refator, simple code by marco FstStreamBuilder *fstStreamBuilderRange(FstStreamBuilder *b, FstSlice *val, RangeType type); #ifdef __cplusplus } #endif #endif