From 5d4d7b47a0f7874cc271cf6cb34d796828ece527 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 21 Dec 2021 16:36:31 +0800 Subject: [PATCH] update index TFile manage and format code --- include/libs/index/index.h | 63 +- source/libs/index/inc/indexInt.h | 121 +- source/libs/index/inc/index_cache.h | 20 +- source/libs/index/inc/index_fst.h | 286 ++-- source/libs/index/inc/index_fst_automation.h | 48 +- source/libs/index/inc/index_fst_common.h | 4 +- .../index/inc/index_fst_counting_writer.h | 35 +- source/libs/index/inc/index_fst_node.h | 23 +- source/libs/index/inc/index_fst_registry.h | 47 +- source/libs/index/inc/index_fst_util.h | 102 +- source/libs/index/inc/index_tfile.h | 89 +- source/libs/index/inc/index_util.h | 33 +- source/libs/index/src/index.c | 286 ++-- source/libs/index/src/index_cache.c | 126 +- source/libs/index/src/index_fst.c | 1318 ++++++++--------- source/libs/index/src/index_fst_automation.c | 132 +- source/libs/index/src/index_fst_common.c | 800 ++++++---- .../index/src/index_fst_counting_writer.c | 106 +- source/libs/index/src/index_fst_node.c | 107 +- source/libs/index/src/index_fst_registry.c | 158 +- source/libs/index/src/index_fst_util.c | 137 +- source/libs/index/src/index_tfile.c | 201 +-- 22 files changed, 2212 insertions(+), 2030 deletions(-) diff --git a/include/libs/index/index.h b/include/libs/index/index.h index 2535ec8a5b..f4d45477c1 100644 --- a/include/libs/index/index.h +++ b/include/libs/index/index.h @@ -23,64 +23,63 @@ extern "C" { #endif -typedef struct SIndex SIndex; -typedef struct SIndexTerm SIndexTerm; -typedef struct SIndexOpts SIndexOpts; +typedef struct SIndex SIndex; +typedef struct SIndexTerm SIndexTerm; +typedef struct SIndexOpts SIndexOpts; typedef struct SIndexMultiTermQuery SIndexMultiTermQuery; typedef struct SArray SIndexMultiTerm; -typedef enum { - ADD_VALUE, // add index colume value - DEL_VALUE, // delete index column value - UPDATE_VALUE, // update index column value - ADD_INDEX, // add index on specify column - DROP_INDEX, // drop existed index - DROP_SATBLE // drop stable +typedef enum { + ADD_VALUE, // add index colume value + DEL_VALUE, // delete index column value + UPDATE_VALUE, // update index column value + ADD_INDEX, // add index on specify column + DROP_INDEX, // drop existed index + DROP_SATBLE // drop stable } SIndexOperOnColumn; -typedef enum { MUST = 0, SHOULD = 1, NOT = 2 } EIndexOperatorType; -typedef enum { QUERY_TERM = 0, QUERY_PREFIX = 1, QUERY_SUFFIX = 2,QUERY_REGEX = 3} EIndexQueryType; +typedef enum { MUST = 0, SHOULD = 1, NOT = 2 } EIndexOperatorType; +typedef enum { QUERY_TERM = 0, QUERY_PREFIX = 1, QUERY_SUFFIX = 2, QUERY_REGEX = 3 } EIndexQueryType; /* - * @param: oper + * @param: oper * -*/ + */ SIndexMultiTermQuery *indexMultiTermQueryCreate(EIndexOperatorType oper); -void indexMultiTermQueryDestroy(SIndexMultiTermQuery *pQuery); -int indexMultiTermQueryAdd(SIndexMultiTermQuery *pQuery, SIndexTerm *term, EIndexQueryType type); -/* - * @param: +void indexMultiTermQueryDestroy(SIndexMultiTermQuery *pQuery); +int indexMultiTermQueryAdd(SIndexMultiTermQuery *pQuery, SIndexTerm *term, EIndexQueryType type); +/* + * @param: * @param: */ -int indexOpen(SIndexOpts *opt, const char *path, SIndex **index); -void indexClose(SIndex *index); -int indexPut(SIndex *index, SIndexMultiTerm *terms, uint64_t uid); -int indexDelete(SIndex *index, SIndexMultiTermQuery *query); -int indexSearch(SIndex *index, SIndexMultiTermQuery *query, SArray *result); -int indexRebuild(SIndex *index, SIndexOpts *opt); +int indexOpen(SIndexOpts *opt, const char *path, SIndex **index); +void indexClose(SIndex *index); +int indexPut(SIndex *index, SIndexMultiTerm *terms, uint64_t uid); +int indexDelete(SIndex *index, SIndexMultiTermQuery *query); +int indexSearch(SIndex *index, SIndexMultiTermQuery *query, SArray *result); +int indexRebuild(SIndex *index, SIndexOpts *opt); /* * @param * @param */ -SIndexMultiTerm *indexMultiTermCreate(); -int indexMultiTermAdd(SIndexMultiTerm *terms, SIndexTerm *term); -void indexMultiTermDestroy(SIndexMultiTerm *terms); +SIndexMultiTerm *indexMultiTermCreate(); +int indexMultiTermAdd(SIndexMultiTerm *terms, SIndexTerm *term); +void indexMultiTermDestroy(SIndexMultiTerm *terms); /* - * @param: + * @param: * @param: */ SIndexOpts *indexOptsCreate(); -void indexOptsDestroy(SIndexOpts *opts); +void indexOptsDestroy(SIndexOpts *opts); /* * @param: * @param: */ -SIndexTerm *indexTermCreate(int64_t suid, SIndexOperOnColumn operType, uint8_t colType, - const char *colName, int32_t nColName, const char *colVal, int32_t nColVal); +SIndexTerm *indexTermCreate(int64_t suid, SIndexOperOnColumn operType, uint8_t colType, const char *colName, + int32_t nColName, const char *colVal, int32_t nColVal); void indexTermDestroy(SIndexTerm *p); - #ifdef __cplusplus } #endif diff --git a/source/libs/index/inc/indexInt.h b/source/libs/index/inc/indexInt.h index 22bb9d1d0f..72415a9716 100644 --- a/source/libs/index/inc/indexInt.h +++ b/source/libs/index/inc/indexInt.h @@ -18,87 +18,112 @@ #include "index.h" #include "index_fst.h" -#include "tlog.h" -#include "thash.h" #include "taos.h" +#include "thash.h" +#include "tlog.h" #ifdef USE_LUCENE #include #endif - #ifdef __cplusplus extern "C" { #endif -typedef enum {kTypeValue, kTypeDeletion} STermValueType ; +typedef enum { kTypeValue, kTypeDeletion } STermValueType; typedef struct SIndexStat { - int32_t totalAdded; // - int32_t totalDeled; // - int32_t totalUpdated; // - int32_t totalTerms; // - int32_t distinctCol; // distinct column -} SIndexStat; + int32_t totalAdded; // + int32_t totalDeled; // + int32_t totalUpdated; // + int32_t totalTerms; // + int32_t distinctCol; // distinct column +} SIndexStat; struct SIndex { -#ifdef USE_LUCENE - index_t *index; -#endif - void *cache; - void *tindex; - SHashObj *colObj;// < field name, field id> - - int64_t suid; // current super table id, -1 is normal table - int colId; // field id allocated to cache - int32_t cVersion; // current version allocated to cache - - SIndexStat stat; - pthread_mutex_t mtx; -}; +#ifdef USE_LUCENE + index_t *index; +#endif + void * cache; + void * tindex; + SHashObj *colObj; // < field name, field id> + + int64_t suid; // current super table id, -1 is normal table + int colId; // field id allocated to cache + int32_t cVersion; // current version allocated to cache + + SIndexStat stat; + pthread_mutex_t mtx; +}; struct SIndexOpts { -#ifdef USE_LUCENE - void *opts; -#endif +#ifdef USE_LUCENE + void *opts; +#endif #ifdef USE_INVERTED_INDEX - int32_t cacheSize; // MB + int32_t cacheSize; // MB // add cache module later #endif - }; struct SIndexMultiTermQuery { - EIndexOperatorType opera; - SArray *query; + EIndexOperatorType opera; + SArray * query; }; // field and key; typedef struct SIndexTerm { - int64_t suid; - SIndexOperOnColumn operType; // oper type, add/del/update - uint8_t colType; // term data type, str/interger/json - char *colName; - int32_t nColName; - char *colVal; - int32_t nColVal; + int64_t suid; + SIndexOperOnColumn operType; // oper type, add/del/update + uint8_t colType; // term data type, str/interger/json + char * colName; + int32_t nColName; + char * colVal; + int32_t nColVal; } SIndexTerm; typedef struct SIndexTermQuery { - SIndexTerm* term; + SIndexTerm * term; EIndexQueryType qType; } SIndexTermQuery; - - -#define indexFatal(...) do { if (sDebugFlag & DEBUG_FATAL) { taosPrintLog("index FATAL ", 255, __VA_ARGS__); }} while(0) -#define indexError(...) do { if (sDebugFlag & DEBUG_ERROR) { taosPrintLog("index ERROR ", 255, __VA_ARGS__); }} while(0) -#define indexWarn(...) do { if (sDebugFlag & DEBUG_WARN) { taosPrintLog("index WARN ", 255, __VA_ARGS__); }} while(0) -#define indexInfo(...) do { if (sDebugFlag & DEBUG_INFO) { taosPrintLog("index ", 255, __VA_ARGS__); }} while(0) -#define indexDebug(...) do { if (sDebugFlag & DEBUG_DEBUG) { taosPrintLog("index ", sDebugFlag, __VA_ARGS__); }} while(0) -#define indexTrace(...) do { if (sDebugFlag & DEBUG_TRACE) { taosPrintLog("index ", sDebugFlag, __VA_ARGS__); }} while(0) - +#define indexFatal(...) \ + do { \ + if (sDebugFlag & DEBUG_FATAL) { \ + taosPrintLog("index FATAL ", 255, __VA_ARGS__); \ + } \ + } while (0) +#define indexError(...) \ + do { \ + if (sDebugFlag & DEBUG_ERROR) { \ + taosPrintLog("index ERROR ", 255, __VA_ARGS__); \ + } \ + } while (0) +#define indexWarn(...) \ + do { \ + if (sDebugFlag & DEBUG_WARN) { \ + taosPrintLog("index WARN ", 255, __VA_ARGS__); \ + } \ + } while (0) +#define indexInfo(...) \ + do { \ + if (sDebugFlag & DEBUG_INFO) { \ + taosPrintLog("index ", 255, __VA_ARGS__); \ + } \ + } while (0) +#define indexDebug(...) \ + do { \ + if (sDebugFlag & DEBUG_DEBUG) { \ + taosPrintLog("index ", sDebugFlag, __VA_ARGS__); \ + } \ + } while (0) +#define indexTrace(...) \ + do { \ + if (sDebugFlag & DEBUG_TRACE) { \ + taosPrintLog("index ", sDebugFlag, __VA_ARGS__); \ + } \ + } while (0) #ifdef __cplusplus } diff --git a/source/libs/index/inc/index_cache.h b/source/libs/index/inc/index_cache.h index 97a7b835f6..9d5083d23d 100644 --- a/source/libs/index/inc/index_cache.h +++ b/source/libs/index/inc/index_cache.h @@ -22,8 +22,10 @@ // ----------------- key structure in skiplist --------------------- /* A data row, the format is like below: - * content: |<--totalLen-->|<-- fieldid-->|<--field type -->|<-- value len--->|<-- value -->|<-- uid -->|<--version--->|<-- itermType -->| - * len : |<--int32_t -->|<-- int16_t-->|<-- int8_t --->|<--- int32_t --->|<--valuelen->|<--uint64_t->|<-- int32_t-->|<-- int8_t --->| + * content: |<--totalLen-->|<-- fieldid-->|<--field type-->|<-- value len--->| + * |<-- value -->|<--uid -->|<--version--->|<-- itermType -->| + * len : |<--int32_t -->|<-- int16_t-->|<-- int8_t --->|<--- int32_t --->| + * <--valuelen->|<--uint64_t->| * <-- int32_t-->|<-- int8_t --->| */ #ifdef __cplusplus @@ -31,25 +33,23 @@ extern "C" { #endif typedef struct IndexCache { - T_REF_DECLARE() + T_REF_DECLARE() SSkipList *skiplist; } IndexCache; - -// +// IndexCache *indexCacheCreate(); void indexCacheDestroy(void *cache); -int indexCachePut(void *cache, SIndexTerm *term, int16_t colId, int32_t version, uint64_t uid); +int indexCachePut(void *cache, SIndexTerm *term, int16_t colId, int32_t version, uint64_t uid); -//int indexCacheGet(void *cache, uint64_t *rst); -int indexCacheSearch(void *cache, SIndexTermQuery *query, int16_t colId, int32_t version, SArray *result, STermValueType *s); +// int indexCacheGet(void *cache, uint64_t *rst); +int indexCacheSearch( + void *cache, SIndexTermQuery *query, int16_t colId, int32_t version, SArray *result, STermValueType *s); #ifdef __cplusplus } #endif - - #endif diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 0dcc25831c..67438c092b 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -20,29 +20,29 @@ extern "C" { #endif -#include "tarray.h" -#include "index_fst_util.h" -#include "index_fst_registry.h" -#include "index_fst_counting_writer.h" #include "index_fst_automation.h" +#include "index_fst_counting_writer.h" +#include "index_fst_registry.h" +#include "index_fst_util.h" +#include "tarray.h" -#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a) +#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a) -typedef struct Fst Fst; -typedef struct FstNode FstNode; +typedef struct Fst Fst; +typedef struct FstNode FstNode; typedef struct StreamWithState StreamWithState; -typedef enum { Included, Excluded, Unbounded} FstBound; +typedef enum { Included, Excluded, Unbounded } FstBound; typedef struct FstBoundWithData { - FstSlice data; + FstSlice data; FstBound type; } FstBoundWithData; typedef struct FstStreamBuilder { - Fst *fst; - AutomationCtx *aut; - FstBoundWithData *min; + Fst * fst; + AutomationCtx * aut; + FstBoundWithData *min; FstBoundWithData *max; } FstStreamBuilder, FstStreamWithStateBuilder; @@ -51,17 +51,14 @@ typedef struct FstRange { uint64_t end; } FstRange; +typedef enum { GE, GT, LE, LT } RangeType; +typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal } State; +typedef enum { Ordered, OutOfOrdered, DuplicateKey } OrderType; -typedef enum {GE, GT, LE, LT} RangeType; -typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State; -typedef enum {Ordered, OutOfOrdered, DuplicateKey} OrderType; - - -FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice *data); -bool fstBoundWithDataExceededBy(FstBoundWithData *bound, FstSlice *slice); -bool fstBoundWithDataIsEmpty(FstBoundWithData *bound); -bool fstBoundWithDataIsIncluded(FstBoundWithData *bound); - +FstBoundWithData *fstBoundStateCreate(FstBound type, FstSlice *data); +bool fstBoundWithDataExceededBy(FstBoundWithData *bound, FstSlice *slice); +bool fstBoundWithDataIsEmpty(FstBoundWithData *bound); +bool fstBoundWithDataIsIncluded(FstBoundWithData *bound); typedef struct FstOutput { bool null; @@ -69,110 +66,105 @@ typedef struct FstOutput { } FstOutput; /* - * + * * UnFinished node and helper function - * TODO: simple function name + * TODO: simple function name */ typedef struct FstUnFinishedNodes { - SArray *stack; // } FstUnFinishedNodes; + SArray *stack; // } FstUnFinishedNodes; } FstUnFinishedNodes; -#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack) - -FstUnFinishedNodes *fstUnFinishedNodesCreate(); -void fstUnFinishedNodesDestroy(FstUnFinishedNodes *node); -void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal); -FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes); -FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr); -FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes); -void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out); -void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr); -void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out); -uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs); -uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out); +#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack) +FstUnFinishedNodes *fstUnFinishedNodesCreate(); +void fstUnFinishedNodesDestroy(FstUnFinishedNodes *node); +void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal); +void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out); +void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr); +void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out); +uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs); +FstBuilderNode * fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes); +FstBuilderNode * fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr); +FstBuilderNode * fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes); + +uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out); typedef struct FstBuilder { - FstCountingWriter *wrt; // The FST raw data is written directly to `wtr`. - FstUnFinishedNodes *unfinished; // The stack of unfinished nodes - FstRegistry* registry; // A map of finished nodes. - FstSlice last; // The last word added - CompiledAddr lastAddr; // The address of the last compiled node - uint64_t len; // num of keys added + FstCountingWriter * wrt; // The FST raw data is written directly to `wtr`. + FstUnFinishedNodes *unfinished; // The stack of unfinished nodes + FstRegistry * registry; // A map of finished nodes. + FstSlice last; // The last word added + CompiledAddr lastAddr; // The address of the last compiled node + uint64_t len; // num of keys added } FstBuilder; - FstBuilder *fstBuilderCreate(void *w, FstType ty); - -void fstBuilderDestroy(FstBuilder *b); -void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in); -bool fstBuilderInsert(FstBuilder *b, FstSlice bs, Output in); -OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup); -void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate); +void fstBuilderDestroy(FstBuilder *b); +void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in); +bool fstBuilderInsert(FstBuilder *b, FstSlice bs, Output in); +void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate); +void * fstBuilerIntoInner(FstBuilder *b); +void fstBuilderFinish(FstBuilder *b); +OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup); CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn); -void* fstBuilerIntoInner(FstBuilder *b); -void fstBuilderFinish(FstBuilder *b); - - - typedef struct FstTransitions { - FstNode *node; - FstRange range; + FstNode *node; + FstRange range; } FstTransitions; -//FstState and relation function +// FstState and relation function typedef struct FstState { - State state; + State state; uint8_t val; } FstState; -FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr); +FstState fstStateCreateFrom(FstSlice *data, CompiledAddr addr); FstState fstStateCreate(State state); -//compile +// compile void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp); void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition *trn); void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); // set_comm_input -void fstStateSetCommInput(FstState* state, uint8_t inp); +void fstStateSetCommInput(FstState *state, uint8_t inp); // comm_input -uint8_t fstStateCommInput(FstState* state, bool *null); +uint8_t fstStateCommInput(FstState *state, bool *null); // input_len -uint64_t fstStateInputLen(FstState* state); +uint64_t fstStateInputLen(FstState *state); - -// end_addr -uint64_t fstStateEndAddrForOneTransNext(FstState* state, FstSlice *data); +// end_addr +uint64_t fstStateEndAddrForOneTransNext(FstState *state, FstSlice *data); uint64_t fstStateEndAddrForOneTrans(FstState *state, FstSlice *data, PackSizes sizes); -uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); -// input -uint8_t fstStateInput(FstState *state, FstNode *node); -uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i); +uint64_t fstStateEndAddrForAnyTrans( + FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); +// input +uint8_t fstStateInput(FstState *state, FstNode *node); +uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i); // trans_addr CompiledAddr fstStateTransAddr(FstState *state, FstNode *node); CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i); -// sizes +// sizes PackSizes fstStateSizes(FstState *state, FstSlice *data); -// Output +// Output Output fstStateOutput(FstState *state, FstNode *node); Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i); // anyTrans specify function void fstStateSetFinalState(FstState *state, bool yes); -bool fstStateIsFinalState(FstState *state); +bool fstStateIsFinalState(FstState *state); void fstStateSetStateNtrans(FstState *state, uint8_t n); // state_ntrans -uint8_t fstStateStateNtrans(FstState *state, bool *null); +uint8_t fstStateStateNtrans(FstState *state, bool *null); uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans); uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans); uint64_t fstStateNtransLen(FstState *state); @@ -180,72 +172,72 @@ uint64_t fstStateNtrans(FstState *state, FstSlice *slice); Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b, bool *null); - - -#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext) +#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext) #define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans) #define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans) -#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal) - +#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal) typedef struct FstLastTransition { uint8_t inp; Output out; } FstLastTransition; -/* +/* * FstBuilderNodeUnfinished and helper function - * TODO: simple function name + * TODO: simple function name */ typedef struct FstBuilderNodeUnfinished { - FstBuilderNode *node; - FstLastTransition* last; + FstBuilderNode * node; + FstLastTransition *last; } FstBuilderNodeUnfinished; - - void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr); + void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out); /* - * FstNode and helper function + * FstNode and helper function */ typedef struct FstNode { FstSlice data; - uint64_t version; + uint64_t version; FstState state; - CompiledAddr start; - CompiledAddr end; + CompiledAddr start; + CompiledAddr end; bool isFinal; uint64_t nTrans; PackSizes sizes; - Output finalOutput; + Output finalOutput; } FstNode; -// If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned +// If this node is final and has a terminal output value, then it is, returned. +// Otherwise, a zero output is returned #define FST_NODE_FINAL_OUTPUT(node) node->finalOutput -// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer. +// Returns true if and only if this node corresponds to a final or "match", +// state in the finite state transducer. #define FST_NODE_IS_FINAL(node) node->isFinal -// Returns the number of transitions in this node, The maximum number of transitions is 256. +// Returns the number of transitions in this node, The maximum number of +// transitions is 256. #define FST_NODE_LEN(node) node->nTrans // Returns true if and only if this node has zero transitions. #define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0) // Return the address of this node. -#define FST_NODE_ADDR(node) node->start - +#define FST_NODE_ADDR(node) node->start FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data); -void fstNodeDestroy(FstNode *fstNode); +void fstNodeDestroy(FstNode *fstNode); -FstTransitions fstNodeTransitionIter(FstNode *node); -FstTransitions* fstNodeTransitions(FstNode *node); -bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res); -bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res); -bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res); -bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode); -FstSlice fstNodeAsSlice(FstNode *node); +FstTransitions fstNodeTransitionIter(FstNode *node); +FstTransitions *fstNodeTransitions(FstNode *node); +bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res); +bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res); +bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res); -// ops +bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode); + +FstSlice fstNodeAsSlice(FstNode *node); + +// ops typedef struct FstIndexedValue { uint64_t index; @@ -253,87 +245,87 @@ typedef struct FstIndexedValue { } FstIndexedValue; FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out); -void fstLastTransitionDestroy(FstLastTransition *trn); - +void fstLastTransitionDestroy(FstLastTransition *trn); typedef struct FstMeta { uint64_t version; - CompiledAddr rootAddr; + CompiledAddr rootAddr; FstType ty; uint64_t len; uint32_t checkSum; } FstMeta; typedef struct Fst { - FstMeta *meta; - FstSlice *data; // - FstNode *root; // + FstMeta * meta; + FstSlice *data; // + FstNode * root; // } Fst; -// refactor simple function +// refactor simple function -Fst* fstCreate(FstSlice *data); -void fstDestroy(Fst *fst); +Fst *fstCreate(FstSlice *data); +void fstDestroy(Fst *fst); -bool fstGet(Fst *fst, FstSlice *b, Output *out); -FstNode* fstGetNode(Fst *fst, CompiledAddr); -FstNode* fstGetRoot(Fst *fst); -FstType fstGetType(Fst *fst); -CompiledAddr fstGetRootAddr(Fst *fst); -Output fstEmptyFinalOutput(Fst *fst, bool *null); +bool fstGet(Fst *fst, FstSlice *b, Output *out); +FstNode * fstGetNode(Fst *fst, CompiledAddr); +FstNode * fstGetRoot(Fst *fst); +FstType fstGetType(Fst *fst); +CompiledAddr fstGetRootAddr(Fst *fst); +Output fstEmptyFinalOutput(Fst *fst, bool *null); FstStreamBuilder *fstSearch(Fst *fst, AutomationCtx *ctx); -FstStreamWithStateBuilder *fstSearchWithState(Fst *fst, AutomationCtx *ctx); - -// into stream to expand later -StreamWithState* streamBuilderIntoStream(FstStreamBuilder *sb); -bool fstVerify(Fst *fst); +FstStreamWithStateBuilder *fstSearchWithState(Fst *fst, AutomationCtx *ctx); +// into stream to expand later +StreamWithState *streamBuilderIntoStream(FstStreamBuilder *sb); +bool fstVerify(Fst *fst); -//refactor this function -bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr); +// refactor this function +bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr); typedef struct StreamState { - FstNode *node; + FstNode * node; uint64_t trans; - FstOutput out; - void *autState; -} StreamState; + FstOutput out; + void * autState; +} StreamState; void streamStateDestroy(void *s); typedef struct StreamWithState { - Fst *fst; - AutomationCtx *aut; - SArray *inp; - FstOutput emptyOutput; - SArray *stack; // + Fst * fst; + AutomationCtx * aut; + SArray * inp; + FstOutput emptyOutput; + SArray * stack; // FstBoundWithData *endAt; } StreamWithState; typedef struct StreamWithStateResult { - FstSlice data; + FstSlice data; FstOutput out; - void *state; + void * state; } StreamWithStateResult; StreamWithStateResult *swsResultCreate(FstSlice *data, FstOutput fOut, void *state); -void swsResultDestroy(StreamWithStateResult *result); +void swsResultDestroy(StreamWithStateResult *result); + +typedef void *(*StreamCallback)(void *); +StreamWithState *streamWithStateCreate( + Fst *fst, AutomationCtx *automation, FstBoundWithData *min, FstBoundWithData *max); -typedef void* (*StreamCallback)(void *); -StreamWithState *streamWithStateCreate(Fst *fst, AutomationCtx *automation, FstBoundWithData *min, FstBoundWithData *max) ; void streamWithStateDestroy(StreamWithState *sws); -bool streamWithStateSeekMin(StreamWithState *sws, FstBoundWithData *min); -StreamWithStateResult* streamWithStateNextWith(StreamWithState *sws, StreamCallback callback); +bool streamWithStateSeekMin(StreamWithState *sws, FstBoundWithData *min); -FstStreamBuilder *fstStreamBuilderCreate(Fst *fst, AutomationCtx *aut); +StreamWithStateResult *streamWithStateNextWith(StreamWithState *sws, StreamCallback callback); + +FstStreamBuilder *fstStreamBuilderCreate(Fst *fst, AutomationCtx *aut); // set up bound range -// refator, simple code by marco +// refator, simple code by marco FstStreamBuilder *fstStreamBuilderRange(FstStreamBuilder *b, FstSlice *val, RangeType type); - #ifdef __cplusplus } #endif diff --git a/source/libs/index/inc/index_fst_automation.h b/source/libs/index/inc/index_fst_automation.h index 8050b85b08..4e5309bf07 100644 --- a/source/libs/index/inc/index_fst_automation.h +++ b/source/libs/index/inc/index_fst_automation.h @@ -21,62 +21,56 @@ extern "C" { #include "index_fst_util.h" - typedef struct AutomationCtx AutomationCtx; -typedef enum AutomationType { - AUTOMATION_PREFIX, - AUTMMATION_MATCH -} AutomationType; +typedef enum AutomationType { AUTOMATION_PREFIX, AUTMMATION_MATCH } AutomationType; typedef struct StartWith { - AutomationCtx *autoSelf; + AutomationCtx *autoSelf; } StartWith; typedef struct Complement { AutomationCtx *autoSelf; } Complement; -// automation +// automation typedef struct AutomationCtx { - AutomationType type; - void *stdata; - char *data; + AutomationType type; + void * stdata; + char * data; } AutomationCtx; - -typedef enum ValueType { FST_INT, FST_CHAR, FST_ARRAY} ValueType; -typedef enum StartWithStateKind { Done, Running } StartWithStateKind; +typedef enum ValueType { FST_INT, FST_CHAR, FST_ARRAY } ValueType; +typedef enum StartWithStateKind { Done, Running } StartWithStateKind; typedef struct StartWithStateValue { StartWithStateKind kind; - ValueType type; + ValueType type; union { - int val; - char *ptr; + int val; + char * ptr; SArray *arr; // add more type - } ; + }; } StartWithStateValue; StartWithStateValue *startWithStateValueCreate(StartWithStateKind kind, ValueType ty, void *val); -StartWithStateValue *startWithStateValueDump(StartWithStateValue *sv); -void startWithStateValueDestroy(void *sv); - +StartWithStateValue *startWithStateValueDump(StartWithStateValue *sv); +void startWithStateValueDestroy(void *sv); typedef struct AutomationFunc { - void* (*start)(AutomationCtx *ctx) ; + void *(*start)(AutomationCtx *ctx); bool (*isMatch)(AutomationCtx *ctx, void *); bool (*canMatch)(AutomationCtx *ctx, void *data); - bool (*willAlwaysMatch)(AutomationCtx *ctx, void *state); - void* (*accept)(AutomationCtx *ctx, void *state, uint8_t byte); - void* (*acceptEof)(AutomationCtx *ct, void *state); -} AutomationFunc; + bool (*willAlwaysMatch)(AutomationCtx *ctx, void *state); + void *(*accept)(AutomationCtx *ctx, void *state, uint8_t byte); + void *(*acceptEof)(AutomationCtx *ct, void *state); +} AutomationFunc; AutomationCtx *automCtxCreate(void *data, AutomationType atype); -void automCtxDestroy(AutomationCtx *ctx); +void automCtxDestroy(AutomationCtx *ctx); -extern AutomationFunc automFuncs[]; +extern AutomationFunc automFuncs[]; #ifdef __cplusplus } #endif diff --git a/source/libs/index/inc/index_fst_common.h b/source/libs/index/inc/index_fst_common.h index 9c802faa33..8335e437fb 100644 --- a/source/libs/index/inc/index_fst_common.h +++ b/source/libs/index/inc/index_fst_common.h @@ -1,9 +1,9 @@ #ifndef __INDEX_FST_COMM_H__ #define __INDEX_FST_COMM_H__ - +#include "tutil.h" extern const uint8_t COMMON_INPUTS[]; -extern char const COMMON_INPUTS_INV[]; +extern const char COMMON_INPUTS_INV[]; #ifdef __cplusplus extern "C" { diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h index ac9a59fa04..b8a0384fb8 100644 --- a/source/libs/index/inc/index_fst_counting_writer.h +++ b/source/libs/index/inc/index_fst_counting_writer.h @@ -22,25 +22,24 @@ extern "C" { #include "tfile.h" - -#define DefaultMem 1024*1024 +#define DefaultMem 1024 * 1024 static char tmpFile[] = "./index"; -typedef enum WriterType {TMemory, TFile} WriterType; +typedef enum WriterType { TMemory, TFile } WriterType; typedef struct WriterCtx { int (*write)(struct WriterCtx *ctx, uint8_t *buf, int len); int (*read)(struct WriterCtx *ctx, uint8_t *buf, int len); int (*flush)(struct WriterCtx *ctx); - WriterType type; + WriterType type; union { struct { - int fd; + int fd; bool readOnly; - } file; + } file; struct { int32_t capa; - char *buf; + char * buf; } mem; }; int32_t offset; @@ -51,35 +50,31 @@ static int writeCtxDoWrite(WriterCtx *ctx, uint8_t *buf, int len); static int writeCtxDoRead(WriterCtx *ctx, uint8_t *buf, int len); static int writeCtxDoFlush(WriterCtx *ctx); -WriterCtx* writerCtxCreate(WriterType type, const char *path, bool readOnly, int32_t capacity); -void writerCtxDestroy(WriterCtx *w); +WriterCtx *writerCtxCreate(WriterType type, const char *path, bool readOnly, int32_t capacity); +void writerCtxDestroy(WriterCtx *w); typedef uint32_t CheckSummer; - typedef struct FstCountingWriter { - void* wrt; // wrap any writer that counts and checksum bytes written - uint64_t count; - CheckSummer summer; + void * wrt; // wrap any writer that counts and checksum bytes written + uint64_t count; + CheckSummer summer; } FstCountingWriter; -int fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t len); +int fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t len); int fstCountingWriterRead(FstCountingWriter *write, uint8_t *buf, uint32_t len); int fstCountingWriterFlush(FstCountingWriter *write); - uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter *write); FstCountingWriter *fstCountingWriterCreate(void *wtr); -void fstCountingWriterDestroy(FstCountingWriter *w); - +void fstCountingWriterDestroy(FstCountingWriter *w); -void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes); +void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes); uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n); - #define FST_WRITER_COUNT(writer) (writer->count) #define FST_WRITER_INTER_WRITER(writer) (writer->wtr) #define FST_WRITE_CHECK_SUMMER(writer) (writer->summer) @@ -89,5 +84,3 @@ uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n); #endif #endif - - diff --git a/source/libs/index/inc/index_fst_node.h b/source/libs/index/inc/index_fst_node.h index 87eb7cb746..a2041fad40 100644 --- a/source/libs/index/inc/index_fst_node.h +++ b/source/libs/index/inc/index_fst_node.h @@ -20,24 +20,24 @@ extern "C" { #endif -#include "index_fst_util.h" #include "index_fst_counting_writer.h" +#include "index_fst_util.h" -#define FST_BUILDER_NODE_IS_FINAL(bn) (bn->isFinal) -#define FST_BUILDER_NODE_TRANS_ISEMPTY(bn) (taosArrayGetSize(bn->trans) == 0) +#define FST_BUILDER_NODE_IS_FINAL(bn) (bn->isFinal) +#define FST_BUILDER_NODE_TRANS_ISEMPTY(bn) (taosArrayGetSize(bn->trans) == 0) #define FST_BUILDER_NODE_FINALOUTPUT_ISZERO(bn) (bn->finalOutput == 0) typedef struct FstTransition { - uint8_t inp; //The byte input associated with this transition. - Output out; //The output associated with this transition - CompiledAddr addr; //The address of the node that this transition points to + uint8_t inp; // The byte input associated with this transition. + Output out; // The output associated with this transition + CompiledAddr addr; // The address of the node that this transition points to } FstTransition; typedef struct FstBuilderNode { - bool isFinal; - Output finalOutput; + bool isFinal; + Output finalOutput; SArray *trans; // -} FstBuilderNode; +} FstBuilderNode; FstBuilderNode *fstBuilderNodeDefault(); @@ -45,8 +45,9 @@ FstBuilderNode *fstBuilderNodeClone(FstBuilderNode *src); void fstBuilderNodeCloneFrom(FstBuilderNode *dst, FstBuilderNode *src); -//bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr); -bool fstBuilderNodeEqual(FstBuilderNode *n1, FstBuilderNode *n2); +// bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, +// CompiledAddr lastAddr, CompiledAddr startAddr); +bool fstBuilderNodeEqual(FstBuilderNode *n1, FstBuilderNode *n2); void fstBuilderNodeDestroy(FstBuilderNode *node); diff --git a/source/libs/index/inc/index_fst_registry.h b/source/libs/index/inc/index_fst_registry.h index 1d89e57e52..1b0922c724 100644 --- a/source/libs/index/inc/index_fst_registry.h +++ b/source/libs/index/inc/index_fst_registry.h @@ -19,49 +19,48 @@ extern "C" { #endif +#include "index_fst_node.h" #include "index_fst_util.h" #include "tarray.h" -#include "index_fst_node.h" typedef struct FstRegistryCell { - CompiledAddr addr; - FstBuilderNode *node; + CompiledAddr addr; + FstBuilderNode *node; } FstRegistryCell; #define FST_REGISTRY_CELL_IS_EMPTY(cell) (cell->addr == NONE_ADDRESS) -#define FST_REGISTRY_CELL_INSERT(cell, tAddr) do {cell->addr = tAddr;} while(0) - +#define FST_REGISTRY_CELL_INSERT(cell, tAddr) \ + do { \ + cell->addr = tAddr; \ + } while (0) -//typedef struct FstRegistryCache { -// SArray *cells; +// typedef struct FstRegistryCache { +// SArray *cells; // uint32_t start; // uint32_t end; //} FstRegistryCache; -typedef enum {FOUND, NOTFOUND, REJECTED} FstRegistryEntryState; +typedef enum { FOUND, NOTFOUND, REJECTED } FstRegistryEntryState; typedef struct FstRegistryEntry { FstRegistryEntryState state; - CompiledAddr addr; - FstRegistryCell *cell; -} FstRegistryEntry; + CompiledAddr addr; + FstRegistryCell * cell; +} FstRegistryEntry; - - -// Registry relation function +// Registry relation function typedef struct FstRegistry { - SArray *table; // - uint64_t tableSize; // num of rows - uint64_t mruSize; // num of columns -} FstRegistry; - -// -FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize); -void fstRegistryDestroy(FstRegistry *registry); + SArray * table; // + uint64_t tableSize; // num of rows + uint64_t mruSize; // num of columns +} FstRegistry; +// +FstRegistry *fstRegistryCreate(uint64_t tableSize, uint64_t mruSize); +void fstRegistryDestroy(FstRegistry *registry); -FstRegistryEntry* fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode); -void fstRegistryEntryDestroy(FstRegistryEntry *entry); +FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode); +void fstRegistryEntryDestroy(FstRegistryEntry *entry); #ifdef __cplusplus } diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index 24b2508678..c41977e637 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -13,7 +13,6 @@ * along with this program. If not, see . */ - #ifndef __INDEX_FST_UTIL_H__ #define __INDEX_FST_UTIL_H__ @@ -21,16 +20,15 @@ extern "C" { #endif -#include "tarray.h" #include "index_fst_common.h" +#include "tarray.h" typedef uint64_t FstType; -typedef uint64_t CompiledAddr; -typedef uint64_t Output; -typedef uint8_t PackSizes; +typedef uint64_t CompiledAddr; +typedef uint64_t Output; +typedef uint8_t PackSizes; - -//A sentinel value used to indicate an empty final state +// A sentinel value used to indicate an empty final state extern const CompiledAddr EMPTY_ADDRESS; /// A sentinel value used to indicate an invalid state. extern const CompiledAddr NONE_ADDRESS; @@ -38,9 +36,9 @@ extern const CompiledAddr NONE_ADDRESS; // This version number is written to every finite state transducer created by // this version When a finite state transducer is read, its version number is // checked against this value. -extern const uint64_t VERSION; -// The threshold (in number of transitions) at which an index is created for -// a node's transitions. This speeds up lookup time at the expense of FST size +extern const uint64_t VERSION; +// The threshold (in number of transitions) at which an index is created for +// a node's transitions. This speeds up lookup time at the expense of FST size extern const uint64_t TRANS_INDEX_THRESHOLD; // high 4 bits is transition address packed size. @@ -48,73 +46,75 @@ extern const uint64_t TRANS_INDEX_THRESHOLD; // // `0` is a legal value which means there are no transitions/outputs - -#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4); } while(0) -#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4) -#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz; } while(0) -#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111) - -#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1] - -#define COMMON_INDEX(v, max, val) do { \ - val = ((uint16_t)COMMON_INPUTS[v] + 1)%256; \ - val = val > max ? 0: val; \ -} while(0) - - -//uint8_t commonInput(uint8_t idx); -//uint8_t commonIdx(uint8_t v, uint8_t max); - -uint8_t packSize(uint64_t n); +#define FST_SET_TRANSITION_PACK_SIZE(v, sz) \ + do { \ + v = (v & 0b00001111) | (sz << 4); \ + } while (0) +#define FST_GET_TRANSITION_PACK_SIZE(v) (((v)&0b11110000) >> 4) +#define FST_SET_OUTPUT_PACK_SIZE(v, sz) \ + do { \ + v = (v & 0b11110000) | sz; \ + } while (0) +#define FST_GET_OUTPUT_PACK_SIZE(v) ((v)&0b00001111) + +#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx)-1] + +#define COMMON_INDEX(v, max, val) \ + do { \ + val = ((uint16_t)COMMON_INPUTS[v] + 1) % 256; \ + val = val > max ? 0 : val; \ + } while (0) + +// uint8_t commonInput(uint8_t idx); +// uint8_t commonIdx(uint8_t v, uint8_t max); + +uint8_t packSize(uint64_t n); uint64_t unpackUint64(uint8_t *ch, uint8_t sz); uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr); CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr); - typedef struct FstString { - uint8_t *data; + uint8_t *data; uint32_t len; - int32_t ref; + int32_t ref; } FstString; typedef struct FstSlice { - FstString *str; - int32_t start; - int32_t end; + FstString *str; + int32_t start; + int32_t end; } FstSlice; FstSlice fstSliceCreate(uint8_t *data, uint64_t len); FstSlice fstSliceCopy(FstSlice *s, int32_t start, int32_t end); FstSlice fstSliceDeepCopy(FstSlice *s, int32_t start, int32_t end); -bool fstSliceIsEmpty(FstSlice *s); -int fstSliceCompare(FstSlice *s1, FstSlice *s2); -void fstSliceDestroy(FstSlice *s); -uint8_t *fstSliceData(FstSlice *s, int32_t *sz); +bool fstSliceIsEmpty(FstSlice *s); +int fstSliceCompare(FstSlice *s1, FstSlice *s2); +void fstSliceDestroy(FstSlice *s); +uint8_t *fstSliceData(FstSlice *s, int32_t *sz); #define FST_SLICE_LEN(s) (s->end - s->start + 1) -//// stack +//// stack // -//typedef (*StackFreeElemFn)(void *elem); +// typedef (*StackFreeElemFn)(void *elem); // -//typedef struct FstStack { -// void *first; -// void *end; -// size_t elemSize; +// typedef struct FstStack { +// void *first; +// void *end; +// size_t elemSize; // size_t nElem; // StackFreeElemFn fn; //} FstStack; // // -//FstStack* fstStackCreate(size_t elemSize, stackFreeElem); -//void *fstStackPush(FstStack *s, void *elem); -//void *fstStackTop(FstStack *s); -//size_t fstStackLen(FstStack *s); -//void fstStackDestory(FstStack *); +// FstStack* fstStackCreate(size_t elemSize, stackFreeElem); +// void *fstStackPush(FstStack *s, void *elem); +// void *fstStackTop(FstStack *s); +// size_t fstStackLen(FstStack *s); +// void fstStackDestory(FstStack *); // - - #ifdef __cplusplus } #endif diff --git a/source/libs/index/inc/index_tfile.h b/source/libs/index/inc/index_tfile.h index ce974cc49f..7c6261aacf 100644 --- a/source/libs/index/inc/index_tfile.h +++ b/source/libs/index/inc/index_tfile.h @@ -17,10 +17,10 @@ #include "index.h" #include "indexInt.h" -#include "tlockfree.h" -#include "index_tfile.h" -#include "index_fst_counting_writer.h" #include "index_fst.h" +#include "index_fst_counting_writer.h" +#include "index_tfile.h" +#include "tlockfree.h" #ifdef __cplusplus extern "C" { @@ -29,92 +29,85 @@ extern "C" { // tfile header // |<---suid--->|<---version--->|<--colLen-->|<-colName->|<---type-->| // |<-uint64_t->|<---int32_t--->|<--int32_t->|<-colLen-->|<-uint8_t->| - + typedef struct TFileReadHeader { uint64_t suid; int32_t version; - char colName[128]; // - uint8_t colType; + char colName[128]; // + uint8_t colType; } TFileReadHeader; -#define TFILE_HEADER_SIZE (sizeof(TFILE_HEADER_SIZE) + sizeof(uint32_t)); +#define TFILE_HEADER_SIZE (sizeof(TFILE_HEADER_SIZE) + sizeof(uint32_t)); #define TFILE_HADER_PRE_SIZE (sizeof(uint64_t) + sizeof(int32_t) + sizeof(int32_t)) typedef struct TFileCacheKey { - uint64_t suid; - uint8_t colType; - int32_t version; + uint64_t suid; + uint8_t colType; + int32_t version; const char *colName; - int32_t nColName; -} TFileCacheKey; - + int32_t nColName; +} TFileCacheKey; // table cache // refactor to LRU cache later typedef struct TFileCache { - SHashObj *tableCache; - int16_t capacity; - // add more param + SHashObj *tableCache; + int16_t capacity; + // add more param } TFileCache; - typedef struct TFileWriter { FstBuilder *fb; - WriterCtx *ctx; + WriterCtx * ctx; } TFileWriter; typedef struct TFileReader { - T_REF_DECLARE() - Fst *fst; - WriterCtx *ctx; -} TFileReader; + T_REF_DECLARE() + Fst * fst; + WriterCtx * ctx; + TFileReadHeader header; +} TFileReader; typedef struct IndexTFile { - char *path; - TFileCache *cache; - TFileWriter *tw; + char * path; + TFileCache * cache; + TFileWriter *tw; } IndexTFile; typedef struct TFileWriterOpt { uint64_t suid; int8_t colType; - char *colName; - int32_t nColName; + char * colName; + int32_t nColName; int32_t version; -} TFileWriterOpt; +} TFileWriterOpt; typedef struct TFileReaderOpt { - uint64_t suid; - char *colName; - int32_t nColName; - + uint64_t suid; + char * colName; + int32_t nColName; } TFileReaderOpt; -// tfile cache, manage tindex reader -TFileCache *tfileCacheCreate(const char *path); -void tfileCacheDestroy(TFileCache *tcache); -TFileReader* tfileCacheGet(TFileCache *tcache, TFileCacheKey *key); -void tfileCachePut(TFileCache *tcache, TFileCacheKey *key, TFileReader *reader); - -TFileReader* tfileReaderCreate(); -void TFileReaderDestroy(TFileReader *reader); +// tfile cache, manage tindex reader +TFileCache * tfileCacheCreate(const char *path); +void tfileCacheDestroy(TFileCache *tcache); +TFileReader *tfileCacheGet(TFileCache *tcache, TFileCacheKey *key); +void tfileCachePut(TFileCache *tcache, TFileCacheKey *key, TFileReader *reader); +TFileReader *tfileReaderCreate(); +void TFileReaderDestroy(TFileReader *reader); TFileWriter *tfileWriterCreate(const char *suid, const char *colName); -void tfileWriterDestroy(TFileWriter *tw); +void tfileWriterDestroy(TFileWriter *tw); -// +// IndexTFile *indexTFileCreate(const char *path); -int indexTFilePut(void *tfile, SIndexTerm *term, uint64_t uid); -int indexTFileSearch(void *tfile, SIndexTermQuery *query, SArray *result); - +int indexTFilePut(void *tfile, SIndexTerm *term, uint64_t uid); +int indexTFileSearch(void *tfile, SIndexTermQuery *query, SArray *result); #ifdef __cplusplus } - #endif - - #endif diff --git a/source/libs/index/inc/index_util.h b/source/libs/index/inc/index_util.h index 4ab517ecfa..f708e71f57 100644 --- a/source/libs/index/inc/index_util.h +++ b/source/libs/index/inc/index_util.h @@ -19,33 +19,32 @@ extern "C" { #endif -#define SERIALIZE_MEM_TO_BUF(buf, key, mem) \ - do { \ - memcpy((void *)buf, (void *)(&key->mem), sizeof(key->mem)); \ - buf += sizeof(key->mem); \ +#define SERIALIZE_MEM_TO_BUF(buf, key, mem) \ + do { \ + memcpy((void *)buf, (void *)(&key->mem), sizeof(key->mem)); \ + buf += sizeof(key->mem); \ } while (0) #define SERIALIZE_STR_MEM_TO_BUF(buf, key, mem, len) \ - do { \ - memcpy((void *)buf, (void *)key->mem, len); \ - buf += len; \ + do { \ + memcpy((void *)buf, (void *)key->mem, len); \ + buf += len; \ } while (0) -#define SERIALIZE_VAR_TO_BUF(buf, var, type) \ - do { \ - type c = var; \ - assert(sizeof(var) == sizeof(type));\ - memcpy((void *)buf, (void *)&c, sizeof(c)); \ - buf += sizeof(c); \ +#define SERIALIZE_VAR_TO_BUF(buf, var, type) \ + do { \ + type c = var; \ + assert(sizeof(var) == sizeof(type)); \ + memcpy((void *)buf, (void *)&c, sizeof(c)); \ + buf += sizeof(c); \ } while (0) #define SERIALIZE_STR_VAR_TO_BUF(buf, var, len) \ - do { \ - memcpy((void *)buf, (void *)var, len); \ - buf += len;\ + do { \ + memcpy((void *)buf, (void *)var, len); \ + buf += len; \ } while (0) - #ifdef __cplusplus } #endif diff --git a/source/libs/index/src/index.c b/source/libs/index/src/index.c index 84b49493a2..cc68324017 100644 --- a/source/libs/index/src/index.c +++ b/source/libs/index/src/index.c @@ -26,105 +26,108 @@ static int uidCompare(const void *a, const void *b) { uint64_t u1 = *(uint64_t *)a; uint64_t u2 = *(uint64_t *)b; - if (u1 == u2) { return 0; } - else { return u1 < u2 ? -1 : 1; } + if (u1 == u2) { + return 0; + } else { + return u1 < u2 ? -1 : 1; + } } typedef struct SIdxColInfo { - int colId; // generated by index internal + int colId; // generated by index internal int cVersion; -} SIdxColInfo; +} SIdxColInfo; static pthread_once_t isInit = PTHREAD_ONCE_INIT; -static void indexInit(); - +static void indexInit(); static int indexTermSearch(SIndex *sIdx, SIndexTermQuery *term, SArray **result); static int indexMergeCacheIntoTindex(SIndex *sIdx); static void indexInterResultsDestroy(SArray *results); -static int indexMergeFinalResults(SArray *interResults, EIndexOperatorType oType, SArray *finalResult); +static int indexMergeFinalResults(SArray *interResults, EIndexOperatorType oType, SArray *finalResult); int indexOpen(SIndexOpts *opts, const char *path, SIndex **index) { pthread_once(&isInit, indexInit); SIndex *sIdx = calloc(1, sizeof(SIndex)); - if (sIdx == NULL) { return -1; } + if (sIdx == NULL) { + return -1; + } -#ifdef USE_LUCENE - index_t *index = index_open(path); +#ifdef USE_LUCENE + index_t *index = index_open(path); sIdx->index = index; #endif - sIdx->cache = (void*)indexCacheCreate(); - sIdx->tindex = NULL; - sIdx->colObj = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); - sIdx->colId = 1; - sIdx->cVersion = 1; + sIdx->cache = (void *)indexCacheCreate(); + sIdx->tindex = NULL; + sIdx->colObj = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + sIdx->colId = 1; + sIdx->cVersion = 1; pthread_mutex_init(&sIdx->mtx, NULL); - *index = sIdx; - return 0; + *index = sIdx; + return 0; } void indexClose(SIndex *sIdx) { -#ifdef USE_LUCENE - index_close(sIdex->index); +#ifdef USE_LUCENE + index_close(sIdex->index); sIdx->index = NULL; #endif #ifdef USE_INVERTED_INDEX indexCacheDestroy(sIdx->cache); - taosHashCleanup(sIdx->colObj); + taosHashCleanup(sIdx->colObj); pthread_mutex_destroy(&sIdx->mtx); #endif - free(sIdx); + free(sIdx); return; } -int indexPut(SIndex *index, SIndexMultiTerm * fVals, uint64_t uid) { - -#ifdef USE_LUCENE - index_document_t *doc = index_document_create(); - - char buf[16] = {0}; - sprintf(buf, "%d", uid); - - for (int i = 0; i < taosArrayGetSize(fVals); i++) { - SIndexTerm *p = taosArrayGetP(fVals, i); - index_document_add(doc, (const char *)(p->key), p->nKey, (const char *)(p->val), p->nVal, 1); - } - index_document_add(doc, NULL, 0, buf, strlen(buf), 0); +int indexPut(SIndex *index, SIndexMultiTerm *fVals, uint64_t uid) { +#ifdef USE_LUCENE + index_document_t *doc = index_document_create(); + + char buf[16] = {0}; + sprintf(buf, "%d", uid); - index_put(index->index, doc); - index_document_destroy(doc); + for (int i = 0; i < taosArrayGetSize(fVals); i++) { + SIndexTerm *p = taosArrayGetP(fVals, i); + index_document_add(doc, (const char *)(p->key), p->nKey, (const char *)(p->val), p->nVal, 1); + } + index_document_add(doc, NULL, 0, buf, strlen(buf), 0); + + index_put(index->index, doc); + index_document_destroy(doc); #endif #ifdef USE_INVERTED_INDEX - - //TODO(yihao): reduce the lock range - pthread_mutex_lock(&index->mtx); + + // TODO(yihao): reduce the lock range + pthread_mutex_lock(&index->mtx); for (int i = 0; i < taosArrayGetSize(fVals); i++) { - SIndexTerm *p = taosArrayGetP(fVals, i); + SIndexTerm * p = taosArrayGetP(fVals, i); SIdxColInfo *fi = taosHashGet(index->colObj, p->colName, p->nColName); if (fi == NULL) { SIdxColInfo tfi = {.colId = index->colId}; - index->cVersion++; + index->cVersion++; index->colId++; - taosHashPut(index->colObj, p->colName, p->nColName, &tfi, sizeof(tfi)); + taosHashPut(index->colObj, p->colName, p->nColName, &tfi, sizeof(tfi)); } else { - //TODO, del + // TODO, del } - } + } pthread_mutex_unlock(&index->mtx); for (int i = 0; i < taosArrayGetSize(fVals); i++) { - SIndexTerm *p = taosArrayGetP(fVals, i); + SIndexTerm * p = taosArrayGetP(fVals, i); SIdxColInfo *fi = taosHashGet(index->colObj, p->colName, p->nColName); - assert(fi != NULL); - int32_t colId = fi->colId; + assert(fi != NULL); + int32_t colId = fi->colId; int32_t version = index->cVersion; - int ret = indexCachePut(index->cache, p, colId, version, uid); + int ret = indexCachePut(index->cache, p, colId, version, uid); if (ret != 0) { - return ret; + return ret; } } #endif @@ -132,29 +135,29 @@ int indexPut(SIndex *index, SIndexMultiTerm * fVals, uint64_t uid) { return 0; } int indexSearch(SIndex *index, SIndexMultiTermQuery *multiQuerys, SArray *result) { -#ifdef USE_LUCENE - EIndexOperatorType opera = multiQuerys->opera; +#ifdef USE_LUCENE + EIndexOperatorType opera = multiQuerys->opera; - int nQuery = taosArrayGetSize(multiQuerys->query); + int nQuery = taosArrayGetSize(multiQuerys->query); char **fields = malloc(sizeof(char *) * nQuery); - char **keys = malloc(sizeof(char *) * nQuery); - int *types = malloc(sizeof(int) * nQuery); + char **keys = malloc(sizeof(char *) * nQuery); + int * types = malloc(sizeof(int) * nQuery); for (int i = 0; i < nQuery; i++) { - SIndexTermQuery *p = taosArrayGet(multiQuerys->query, i); - SIndexTerm *term = p->field_value; - - fields[i] = calloc(1, term->nKey + 1); - keys[i] = calloc(1, term->nVal + 1); - - memcpy(fields[i], term->key, term->nKey); - memcpy(keys[i], term->val, term->nVal); - types[i] = (int)(p->type); - } - int *tResult = NULL; - int tsz= 0; + SIndexTermQuery *p = taosArrayGet(multiQuerys->query, i); + SIndexTerm * term = p->field_value; + + fields[i] = calloc(1, term->nKey + 1); + keys[i] = calloc(1, term->nVal + 1); + + memcpy(fields[i], term->key, term->nKey); + memcpy(keys[i], term->val, term->nVal); + types[i] = (int)(p->type); + } + int *tResult = NULL; + int tsz = 0; index_multi_search(index->index, (const char **)fields, (const char **)keys, types, nQuery, opera, &tResult, &tsz); - + for (int i = 0; i < tsz; i++) { taosArrayPush(result, &tResult[i]); } @@ -169,57 +172,55 @@ int indexSearch(SIndex *index, SIndexMultiTermQuery *multiQuerys, SArray *result #endif #ifdef USE_INVERTED_INDEX - EIndexOperatorType opera = multiQuerys->opera; // relation of querys - + EIndexOperatorType opera = multiQuerys->opera; // relation of querys + SArray *interResults = taosArrayInit(4, POINTER_BYTES); - int nQuery = taosArrayGetSize(multiQuerys->query); + int nQuery = taosArrayGetSize(multiQuerys->query); for (size_t i = 0; i < nQuery; i++) { - SIndexTermQuery *qTerm = taosArrayGet(multiQuerys->query, i); - SArray *tResult = NULL; - indexTermSearch(index, qTerm, &tResult); - taosArrayPush(interResults, (void *)&tResult); - } + SIndexTermQuery *qTerm = taosArrayGet(multiQuerys->query, i); + SArray * tResult = NULL; + indexTermSearch(index, qTerm, &tResult); + taosArrayPush(interResults, (void *)&tResult); + } indexMergeFinalResults(interResults, opera, result); indexInterResultsDestroy(interResults); - + #endif return 1; } - - int indexDelete(SIndex *index, SIndexMultiTermQuery *query) { #ifdef USE_INVERTED_INDEX #endif - + return 1; } -int indexRebuild(SIndex *index, SIndexOpts *opts) { +int indexRebuild(SIndex *index, SIndexOpts *opts){ #ifdef USE_INVERTED_INDEX #endif } - SIndexOpts *indexOptsCreate() { -#ifdef USE_LUCENE +#ifdef USE_LUCENE #endif -return NULL; + return NULL; } -void indexOptsDestroy(SIndexOpts *opts) { -#ifdef USE_LUCENE +void indexOptsDestroy(SIndexOpts *opts){ +#ifdef USE_LUCENE #endif -} -/* - * @param: oper - * -*/ +} /* + * @param: oper + * + */ SIndexMultiTermQuery *indexMultiTermQueryCreate(EIndexOperatorType opera) { SIndexMultiTermQuery *p = (SIndexMultiTermQuery *)malloc(sizeof(SIndexMultiTermQuery)); - if (p == NULL) { return NULL; } - p->opera = opera; - p->query = taosArrayInit(4, sizeof(SIndexTermQuery)); + if (p == NULL) { + return NULL; + } + p->opera = opera; + p->query = taosArrayInit(4, sizeof(SIndexTermQuery)); return p; } void indexMultiTermQueryDestroy(SIndexMultiTermQuery *pQuery) { @@ -227,25 +228,27 @@ void indexMultiTermQueryDestroy(SIndexMultiTermQuery *pQuery) { SIndexTermQuery *p = (SIndexTermQuery *)taosArrayGet(pQuery->query, i); indexTermDestroy(p->term); } - taosArrayDestroy(pQuery->query); + taosArrayDestroy(pQuery->query); free(pQuery); }; -int indexMultiTermQueryAdd(SIndexMultiTermQuery *pQuery, SIndexTerm *term, EIndexQueryType qType){ - SIndexTermQuery q = {.qType = qType, .term = term}; +int indexMultiTermQueryAdd(SIndexMultiTermQuery *pQuery, SIndexTerm *term, EIndexQueryType qType) { + SIndexTermQuery q = {.qType = qType, .term = term}; taosArrayPush(pQuery->query, &q); return 0; } +SIndexTerm *indexTermCreate(int64_t suid, SIndexOperOnColumn oper, uint8_t colType, const char *colName, + int32_t nColName, const char *colVal, int32_t nColVal) { + SIndexTerm *t = (SIndexTerm *)calloc(1, (sizeof(SIndexTerm))); + if (t == NULL) { + return NULL; + } -SIndexTerm *indexTermCreate(int64_t suid, SIndexOperOnColumn oper, uint8_t colType, const char *colName, int32_t nColName, const char *colVal, int32_t nColVal) { - SIndexTerm *t = (SIndexTerm *)calloc(1, (sizeof(SIndexTerm))); - if (t == NULL) { return NULL; } - - t->suid = suid; - t->operType= oper; + t->suid = suid; + t->operType = oper; t->colType = colType; - t->colName = (char *)calloc(1, nColName + 1); + t->colName = (char *)calloc(1, nColName + 1); memcpy(t->colName, colName, nColName); t->nColName = nColName; @@ -258,15 +261,13 @@ void indexTermDestroy(SIndexTerm *p) { free(p->colName); free(p->colVal); free(p); -} - -SIndexMultiTerm *indexMultiTermCreate() { - return taosArrayInit(4, sizeof(SIndexTerm *)); } +SIndexMultiTerm *indexMultiTermCreate() { return taosArrayInit(4, sizeof(SIndexTerm *)); } + int indexMultiTermAdd(SIndexMultiTerm *terms, SIndexTerm *term) { - taosArrayPush(terms, &term); - return 0; + taosArrayPush(terms, &term); + return 0; } void indexMultiTermDestroy(SIndexMultiTerm *terms) { for (int32_t i = 0; i < taosArrayGetSize(terms); i++) { @@ -277,40 +278,40 @@ void indexMultiTermDestroy(SIndexMultiTerm *terms) { } void indexInit() { - //do nothing + // do nothing } static int indexTermSearch(SIndex *sIdx, SIndexTermQuery *query, SArray **result) { - int32_t version = -1; - int16_t colId = -1; + int32_t version = -1; + int16_t colId = -1; SIdxColInfo *colInfo = NULL; SIndexTerm *term = query->term; const char *colName = term->colName; - int32_t nColName = term->nColName; + int32_t nColName = term->nColName; - pthread_mutex_lock(&sIdx->mtx); - colInfo = taosHashGet(sIdx->colObj, colName, nColName); + pthread_mutex_lock(&sIdx->mtx); + colInfo = taosHashGet(sIdx->colObj, colName, nColName); if (colInfo == NULL) { - pthread_mutex_unlock(&sIdx->mtx); - return -1; + pthread_mutex_unlock(&sIdx->mtx); + return -1; } - colId = colInfo->colId; + colId = colInfo->colId; version = colInfo->cVersion; - pthread_mutex_unlock(&sIdx->mtx); - + pthread_mutex_unlock(&sIdx->mtx); + *result = taosArrayInit(4, sizeof(uint64_t)); - //TODO: iterator mem and tidex - STermValueType s; + // TODO: iterator mem and tidex + STermValueType s; if (0 == indexCacheSearch(sIdx->cache, query, colId, version, *result, &s)) { if (s == kTypeDeletion) { indexInfo("col: %s already drop by other opera", term->colName); - // coloum already drop by other oper, no need to query tindex + // coloum already drop by other oper, no need to query tindex return 0; } else { if (0 != indexTFileSearch(sIdx->tindex, query, *result)) { - indexError("corrupt at index(TFile) col:%s val: %s", term->colName, term->colVal); - return -1; - } + indexError("corrupt at index(TFile) col:%s val: %s", term->colName, term->colVal); + return -1; + } } } else { indexError("corrupt at index(cache) col:%s val: %s", term->colName, term->colVal); @@ -319,39 +320,40 @@ static int indexTermSearch(SIndex *sIdx, SIndexTermQuery *query, SArray **result return 0; } static void indexInterResultsDestroy(SArray *results) { - if (results == NULL) { return; } + if (results == NULL) { + return; + } size_t sz = taosArrayGetSize(results); for (size_t i = 0; i < sz; i++) { SArray *p = taosArrayGetP(results, i); - taosArrayDestroy(p); - } + taosArrayDestroy(p); + } taosArrayDestroy(results); - } static int indexMergeFinalResults(SArray *interResults, EIndexOperatorType oType, SArray *fResults) { - //refactor, merge interResults into fResults by oType - SArray *first = taosArrayGetP(interResults, 0); - taosArraySort(first, uidCompare); + // refactor, merge interResults into fResults by oType + SArray *first = taosArrayGetP(interResults, 0); + taosArraySort(first, uidCompare); taosArrayRemoveDuplicate(first, uidCompare, NULL); if (oType == MUST) { - // just one column index, enhance later - taosArrayAddAll(fResults, first); + // just one column index, enhance later + taosArrayAddAll(fResults, first); } else if (oType == SHOULD) { - // just one column index, enhance later - taosArrayAddAll(fResults, first); + // just one column index, enhance later + taosArrayAddAll(fResults, first); // tag1 condistion || tag2 condition } else if (oType == NOT) { - // just one column index, enhance later - taosArrayAddAll(fResults, first); - // not use currently + // just one column index, enhance later + taosArrayAddAll(fResults, first); + // not use currently } return 0; } static int indexMergeCacheIntoTindex(SIndex *sIdx) { if (sIdx == NULL) { - return -1; + return -1; } - indexWarn("suid %" PRIu64 " merge cache into tindex", sIdx->suid); + indexWarn("suid %" PRIu64 " merge cache into tindex", sIdx->suid); return 0; } diff --git a/source/libs/index/src/index_cache.c b/source/libs/index/src/index_cache.c index ea185fefe5..dd8a8bcbb6 100644 --- a/source/libs/index/src/index_cache.c +++ b/source/libs/index/src/index_cache.c @@ -14,148 +14,154 @@ */ #include "index_cache.h" -#include "tcompare.h" #include "index_util.h" +#include "tcompare.h" -#define MAX_INDEX_KEY_LEN 256// test only, change later +#define MAX_INDEX_KEY_LEN 256 // test only, change later -// ref index_cache.h:22 -#define CACHE_KEY_LEN(p) (sizeof(int32_t) + sizeof(uint16_t) + sizeof(p->colType) + sizeof(p->nColVal) + p->nColVal + sizeof(uint64_t) + sizeof(p->operType)) +// ref index_cache.h:22 +#define CACHE_KEY_LEN(p) \ + (sizeof(int32_t) + sizeof(uint16_t) + sizeof(p->colType) + sizeof(p->nColVal) + p->nColVal + sizeof(uint64_t) + \ + sizeof(p->operType)) -static char* getIndexKey(const void *pData) { - return NULL; -} +static char * getIndexKey(const void *pData) { return NULL; } static int32_t compareKey(const void *l, const void *r) { char *lp = (char *)l; char *rp = (char *)r; // skip total len, not compare - int32_t ll, rl; // len + int32_t ll, rl; // len memcpy(&ll, lp, sizeof(int32_t)); memcpy(&rl, rp, sizeof(int32_t)); - lp += sizeof(int32_t); + lp += sizeof(int32_t); rp += sizeof(int32_t); - + // compare field id - int16_t lf, rf; // field id + int16_t lf, rf; // field id memcpy(&lf, lp, sizeof(lf)); memcpy(&rf, rp, sizeof(rf)); if (lf != rf) { - return lf < rf ? -1: 1; + return lf < rf ? -1 : 1; } lp += sizeof(lf); rp += sizeof(rf); // compare field type - int8_t lft, rft; + int8_t lft, rft; memcpy(&lft, lp, sizeof(lft)); memcpy(&rft, rp, sizeof(rft)); lp += sizeof(lft); rp += sizeof(rft); assert(rft == rft); - - // skip value len + + // skip value len int32_t lfl, rfl; - memcpy(&lfl, lp, sizeof(lfl)); - memcpy(&rfl, rp, sizeof(rfl)); + memcpy(&lfl, lp, sizeof(lfl)); + memcpy(&rfl, rp, sizeof(rfl)); lp += sizeof(lfl); rp += sizeof(rfl); - - // compare value + + // compare value int32_t i, j; for (i = 0, j = 0; i < lfl && j < rfl; i++, j++) { - if (lp[i] == rp[j]) { continue; } - else { return lp[i] < rp[j] ? -1 : 1;} + if (lp[i] == rp[j]) { + continue; + } else { + return lp[i] < rp[j] ? -1 : 1; + } + } + if (i < lfl) { + return 1; + } else if (j < rfl) { + return -1; } - if (i < lfl) { return 1;} - else if (j < rfl) { return -1; } lp += lfl; - rp += rfl; + rp += rfl; - // skip uid + // skip uid uint64_t lu, ru; - memcpy(&lu, lp, sizeof(lu)); + memcpy(&lu, lp, sizeof(lu)); memcpy(&ru, rp, sizeof(ru)); lp += sizeof(lu); rp += sizeof(ru); - + // compare version, desc order int32_t lv, rv; memcpy(&lv, lp, sizeof(lv)); memcpy(&rv, rp, sizeof(rv)); if (lv != rv) { - return lv > rv ? -1 : 1; - } + return lv > rv ? -1 : 1; + } lp += sizeof(lv); rp += sizeof(rv); // not care item type - return 0; - -} + return 0; +} IndexCache *indexCacheCreate() { IndexCache *cache = calloc(1, sizeof(IndexCache)); - cache->skiplist = tSkipListCreate(MAX_SKIP_LIST_LEVEL, TSDB_DATA_TYPE_BINARY, MAX_INDEX_KEY_LEN, compareKey, SL_ALLOW_DUP_KEY, getIndexKey); + cache->skiplist = tSkipListCreate( + MAX_SKIP_LIST_LEVEL, TSDB_DATA_TYPE_BINARY, MAX_INDEX_KEY_LEN, compareKey, SL_ALLOW_DUP_KEY, getIndexKey); return cache; - } void indexCacheDestroy(void *cache) { - IndexCache *pCache = cache; - if (pCache == NULL) { return; } + IndexCache *pCache = cache; + if (pCache == NULL) { + return; + } tSkipListDestroy(pCache->skiplist); free(pCache); } int indexCachePut(void *cache, SIndexTerm *term, int16_t colId, int32_t version, uint64_t uid) { - if (cache == NULL) { return -1;} + if (cache == NULL) { + return -1; + } IndexCache *pCache = cache; // encode data - int32_t total = CACHE_KEY_LEN(term); - char *buf = calloc(1, total); - char *p = buf; + int32_t total = CACHE_KEY_LEN(term); + char * buf = calloc(1, total); + char * p = buf; - SERIALIZE_VAR_TO_BUF(p, total,int32_t); + SERIALIZE_VAR_TO_BUF(p, total, int32_t); SERIALIZE_VAR_TO_BUF(p, colId, int16_t); SERIALIZE_MEM_TO_BUF(p, term, colType); - SERIALIZE_MEM_TO_BUF(p, term, nColVal); - SERIALIZE_STR_MEM_TO_BUF(p, term, colVal, term->nColVal); - + SERIALIZE_MEM_TO_BUF(p, term, nColVal); + SERIALIZE_STR_MEM_TO_BUF(p, term, colVal, term->nColVal); + SERIALIZE_VAR_TO_BUF(p, version, int32_t); - SERIALIZE_VAR_TO_BUF(p, uid, uint64_t); + SERIALIZE_VAR_TO_BUF(p, uid, uint64_t); SERIALIZE_MEM_TO_BUF(p, term, operType); - tSkipListPut(pCache->skiplist, (void *)buf); + tSkipListPut(pCache->skiplist, (void *)buf); return 0; // encode end - } int indexCacheDel(void *cache, int32_t fieldId, const char *fieldValue, int32_t fvlen, uint64_t uid, int8_t operType) { IndexCache *pCache = cache; return 0; } -int indexCacheSearch(void *cache, SIndexTermQuery *query, int16_t colId, int32_t version, SArray *result, STermValueType *s) { - if (cache == NULL) { return -1; } - IndexCache *pCache = cache; - SIndexTerm *term = query->term; - EIndexQueryType qtype = query->qType; - - int32_t keyLen = CACHE_KEY_LEN(term); +int indexCacheSearch( + void *cache, SIndexTermQuery *query, int16_t colId, int32_t version, SArray *result, STermValueType *s) { + if (cache == NULL) { + return -1; + } + IndexCache * pCache = cache; + SIndexTerm * term = query->term; + EIndexQueryType qtype = query->qType; + + int32_t keyLen = CACHE_KEY_LEN(term); char *buf = calloc(1, keyLen); if (qtype == QUERY_TERM) { - } else if (qtype == QUERY_PREFIX) { - } else if (qtype == QUERY_SUFFIX) { - } else if (qtype == QUERY_REGEX) { - } - + return 0; } - diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 0f00aacf3b..adb002c2b7 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -14,83 +14,84 @@ */ #include "index_fst.h" -#include "tcoding.h" -#include "tchecksum.h" -#include "indexInt.h" +#include "indexInt.h" #include "index_fst_automation.h" - +#include "tchecksum.h" +#include "tcoding.h" static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr, uint8_t nBytes) { CompiledAddr deltaAddr = (transAddr == EMPTY_ADDRESS) ? EMPTY_ADDRESS : nodeAddr - transAddr; - fstCountingWriterPackUintIn(wrt, deltaAddr, nBytes); + fstCountingWriterPackUintIn(wrt, deltaAddr, nBytes); } static uint8_t fstPackDetla(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr) { uint8_t nBytes = packDeltaSize(nodeAddr, transAddr); - fstPackDeltaIn(wrt, nodeAddr, transAddr, nBytes); + fstPackDeltaIn(wrt, nodeAddr, transAddr, nBytes); return nBytes; } FstUnFinishedNodes *fstUnFinishedNodesCreate() { FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes)); - if (nodes == NULL) { return NULL; } + if (nodes == NULL) { + return NULL; + } nodes->stack = (SArray *)taosArrayInit(64, sizeof(FstBuilderNodeUnfinished)); fstUnFinishedNodesPushEmpty(nodes, false); return nodes; } -void unFinishedNodeDestroyElem(void* elem) { - FstBuilderNodeUnfinished *b = (FstBuilderNodeUnfinished*)elem; - fstBuilderNodeDestroy(b->node); - free(b->last); +void unFinishedNodeDestroyElem(void *elem) { + FstBuilderNodeUnfinished *b = (FstBuilderNodeUnfinished *)elem; + fstBuilderNodeDestroy(b->node); + free(b->last); b->last = NULL; -} +} void fstUnFinishedNodesDestroy(FstUnFinishedNodes *nodes) { - if (nodes == NULL) { return; } + if (nodes == NULL) { + return; + } - taosArrayDestroyEx(nodes->stack, unFinishedNodeDestroyElem); + taosArrayDestroyEx(nodes->stack, unFinishedNodeDestroyElem); free(nodes); } void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal) { FstBuilderNode *node = malloc(sizeof(FstBuilderNode)); - node->isFinal = isFinal; + node->isFinal = isFinal; node->finalOutput = 0; - node->trans = taosArrayInit(16, sizeof(FstTransition)); + node->trans = taosArrayInit(16, sizeof(FstTransition)); - FstBuilderNodeUnfinished un = {.node = node, .last = NULL}; + FstBuilderNodeUnfinished un = {.node = node, .last = NULL}; taosArrayPush(nodes->stack, &un); - } FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes) { assert(taosArrayGetSize(nodes->stack) == 1); FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack); - assert(un->last == NULL); - return un->node; + assert(un->last == NULL); + return un->node; } FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) { FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack); fstBuilderNodeUnfinishedLastCompiled(un, addr); - //free(un->last); // TODO add func FstLastTransitionFree() - //un->last = NULL; - return un->node; + // free(un->last); // TODO add func FstLastTransitionFree() + // un->last = NULL; + return un->node; } FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes) { FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack); - assert(un->last == NULL); - return un->node; - + assert(un->last == NULL); + return un->node; } void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *nodes, Output out) { FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, 0); - un->node->isFinal = true; + un->node->isFinal = true; un->node->finalOutput = out; - //un->node->trans = NULL; -} + // un->node->trans = NULL; +} void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) { - size_t sz = taosArrayGetSize(nodes->stack) - 1; + size_t sz = taosArrayGetSize(nodes->stack) - 1; FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz); fstBuilderNodeUnfinishedLastCompiled(un, addr); } @@ -99,181 +100,177 @@ void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output if (fstSliceIsEmpty(s)) { return; } - size_t sz = taosArrayGetSize(nodes->stack) - 1; + size_t sz = taosArrayGetSize(nodes->stack) - 1; FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz); assert(un->last == NULL); - //FstLastTransition *trn = malloc(sizeof(FstLastTransition)); - //trn->inp = s->data[s->start]; - //trn->out = out; - int32_t len = 0; + // FstLastTransition *trn = malloc(sizeof(FstLastTransition)); + // trn->inp = s->data[s->start]; + // trn->out = out; + int32_t len = 0; uint8_t *data = fstSliceData(s, &len); - un->last = fstLastTransitionCreate(data[0], out); + un->last = fstLastTransitionCreate(data[0], out); for (uint64_t i = 1; i < len; i++) { FstBuilderNode *n = malloc(sizeof(FstBuilderNode)); - n->isFinal = false; + n->isFinal = false; n->finalOutput = 0; - n->trans = taosArrayInit(16, sizeof(FstTransition)); - - //FstLastTransition *trn = malloc(sizeof(FstLastTransition)); - //trn->inp = s->data[i]; - //trn->out = out; + n->trans = taosArrayInit(16, sizeof(FstTransition)); + + // FstLastTransition *trn = malloc(sizeof(FstLastTransition)); + // trn->inp = s->data[i]; + // trn->out = out; FstLastTransition *trn = fstLastTransitionCreate(data[i], 0); - FstBuilderNodeUnfinished un = {.node = n, .last = trn}; - taosArrayPush(nodes->stack, &un); + FstBuilderNodeUnfinished un = {.node = n, .last = trn}; + taosArrayPush(nodes->stack, &un); } - fstUnFinishedNodesPushEmpty(nodes, true); + fstUnFinishedNodesPushEmpty(nodes, true); } - uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs) { FstSlice *s = &bs; - size_t ssz = taosArrayGetSize(node->stack); // stack size + size_t ssz = taosArrayGetSize(node->stack); // stack size uint64_t count = 0; - int32_t lsz; // data len - uint8_t *data = fstSliceData(s, &lsz); + int32_t lsz; // data len + uint8_t *data = fstSliceData(s, &lsz); for (size_t i = 0; i < ssz && i < lsz; i++) { - FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i); + FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i); if (un->last->inp == data[i]) { count++; } else { break; - } + } } return count; } uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out) { FstSlice *s = &bs; - size_t lsz = (size_t)(s->end - s->start + 1); // data len - size_t ssz = taosArrayGetSize(node->stack); // stack size + size_t lsz = (size_t)(s->end - s->start + 1); // data len + size_t ssz = taosArrayGetSize(node->stack); // stack size *out = in; uint64_t i = 0; for (i = 0; i < lsz && i < ssz; i++) { FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i); - FstLastTransition *t = un->last; - uint64_t addPrefix = 0; - uint8_t *data = fstSliceData(s, NULL); + FstLastTransition *t = un->last; + uint64_t addPrefix = 0; + uint8_t * data = fstSliceData(s, NULL); if (t && t->inp == data[i]) { - uint64_t commPrefix = MIN(t->out, *out); - uint64_t tAddPrefix = t->out - commPrefix; - (*out) = (*out) - commPrefix; + uint64_t commPrefix = MIN(t->out, *out); + uint64_t tAddPrefix = t->out - commPrefix; + (*out) = (*out) - commPrefix; t->out = commPrefix; - addPrefix = tAddPrefix; + addPrefix = tAddPrefix; } else { - break; + break; } if (addPrefix != 0) { if (i + 1 < ssz) { FstBuilderNodeUnfinished *unf = taosArrayGet(node->stack, i + 1); - fstBuilderNodeUnfinishedAddOutputPrefix(unf, addPrefix); + fstBuilderNodeUnfinishedAddOutputPrefix(unf, addPrefix); } } - } + } return i; -} - +} -FstState fstStateCreateFrom(FstSlice* slice, CompiledAddr addr) { +FstState fstStateCreateFrom(FstSlice *slice, CompiledAddr addr) { FstState fs = {.state = EmptyFinal, .val = 0}; if (addr == EMPTY_ADDRESS) { - return fs; + return fs; } - + uint8_t *data = fstSliceData(slice, NULL); - uint8_t v = data[addr]; - uint8_t t = (v & 0b11000000) >> 6; + uint8_t v = data[addr]; + uint8_t t = (v & 0b11000000) >> 6; if (t == 0b11) { fs.state = OneTransNext; } else if (t == 0b10) { - fs.state = OneTrans; + fs.state = OneTrans; } else { - fs.state = AnyTrans; + fs.state = AnyTrans; } fs.val = v; return fs; } -static FstState fstStateDict[] = { - {.state = OneTransNext, .val = 0b11000000}, - {.state = OneTrans, .val = 0b10000000}, - {.state = AnyTrans, .val = 0b00000000}, - {.state = EmptyFinal, .val = 0b00000000} -}; -// debug -static const char *fstStateStr[] = {"ONE_TRANS_NEXT", "ONE_TRANS", "ANY_TRANS", "EMPTY_FINAL"}; +static FstState fstStateDict[] = {{.state = OneTransNext, .val = 0b11000000}, {.state = OneTrans, .val = 0b10000000}, + {.state = AnyTrans, .val = 0b00000000}, {.state = EmptyFinal, .val = 0b00000000}}; +// debug +static const char *fstStateStr[] = {"ONE_TRANS_NEXT", "ONE_TRANS", "ANY_TRANS", "EMPTY_FINAL"}; -FstState fstStateCreate(State state){ +FstState fstStateCreate(State state) { uint8_t idx = (uint8_t)state; return fstStateDict[idx]; } -//compile +// compile void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp) { - FstState s = fstStateCreate(OneTransNext); + FstState s = fstStateCreate(OneTransNext); fstStateSetCommInput(&s, inp); - bool null = false; + bool null = false; uint8_t v = fstStateCommInput(&s, &null); if (null) { // w->write_all(&[inp]) fstCountingWriterWrite(w, &inp, 1); - } + } fstCountingWriterWrite(w, &(s.val), 1); // w->write_all(&[s.val]) return; } -void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition* trn) { - Output out = trn->out; - uint8_t outPackSize = (out == 0 ? 0 : fstCountingWriterPackUint(w, out)); - uint8_t transPackSize = fstPackDetla(w, addr, trn->addr); +void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition *trn) { + Output out = trn->out; + uint8_t outPackSize = (out == 0 ? 0 : fstCountingWriterPackUint(w, out)); + uint8_t transPackSize = fstPackDetla(w, addr, trn->addr); PackSizes packSizes = 0; FST_SET_OUTPUT_PACK_SIZE(packSizes, outPackSize); FST_SET_TRANSITION_PACK_SIZE(packSizes, transPackSize); - fstCountingWriterWrite(w, (char *)&packSizes, sizeof(packSizes)); + fstCountingWriterWrite(w, (char *)&packSizes, sizeof(packSizes)); + + FstState st = fstStateCreate(OneTrans); - FstState st = fstStateCreate(OneTrans); - fstStateSetCommInput(&st, trn->inp); - bool null = false; - uint8_t inp = fstStateCommInput(&st, &null); + bool null = false; + uint8_t inp = fstStateCommInput(&st, &null); if (null == true) { fstCountingWriterWrite(w, (char *)&trn->inp, sizeof(trn->inp)); } fstCountingWriterWrite(w, (char *)(&(st.val)), sizeof(st.val)); - return ; - + return; } void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node) { - size_t sz = taosArrayGetSize(node->trans); + size_t sz = taosArrayGetSize(node->trans); assert(sz <= 256); uint8_t tSize = 0; - uint8_t oSize = packSize(node->finalOutput) ; - + uint8_t oSize = packSize(node->finalOutput); + // finalOutput.is_zero() - bool anyOuts = (node->finalOutput != 0) ; + bool anyOuts = (node->finalOutput != 0); for (size_t i = 0; i < sz; i++) { - FstTransition *t = taosArrayGet(node->trans, i); - tSize = MAX(tSize, packDeltaSize(addr, t->addr)); + FstTransition *t = taosArrayGet(node->trans, i); + tSize = MAX(tSize, packDeltaSize(addr, t->addr)); oSize = MAX(oSize, packSize(t->out)); - anyOuts = anyOuts || (t->out != 0); + anyOuts = anyOuts || (t->out != 0); } - PackSizes packSizes = 0; - if (anyOuts) { FST_SET_OUTPUT_PACK_SIZE(packSizes, oSize); } - else { FST_SET_OUTPUT_PACK_SIZE(packSizes, 0); } + PackSizes packSizes = 0; + if (anyOuts) { + FST_SET_OUTPUT_PACK_SIZE(packSizes, oSize); + } else { + FST_SET_OUTPUT_PACK_SIZE(packSizes, 0); + } FST_SET_TRANSITION_PACK_SIZE(packSizes, tSize); - + FstState st = fstStateCreate(AnyTrans); - fstStateSetFinalState(&st, node->isFinal); + fstStateSetFinalState(&st, node->isFinal); fstStateSetStateNtrans(&st, (uint8_t)sz); - + if (anyOuts) { if (FST_BUILDER_NODE_IS_FINAL(node)) { fstCountingWriterPackUintIn(w, node->finalOutput, oSize); @@ -282,123 +279,115 @@ void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuil FstTransition *t = taosArrayGet(node->trans, i); fstCountingWriterPackUintIn(w, t->out, oSize); } - } + } for (int32_t i = sz - 1; i >= 0; i--) { - FstTransition *t = taosArrayGet(node->trans, i); - fstPackDeltaIn(w, addr, t->addr, tSize); + FstTransition *t = taosArrayGet(node->trans, i); + fstPackDeltaIn(w, addr, t->addr, tSize); } for (int32_t i = sz - 1; i >= 0; i--) { - FstTransition *t = taosArrayGet(node->trans, i); - fstCountingWriterWrite(w, (char *)&t->inp, 1); - //fstPackDeltaIn(w, addr, t->addr, tSize); + FstTransition *t = taosArrayGet(node->trans, i); + fstCountingWriterWrite(w, (char *)&t->inp, 1); + // fstPackDeltaIn(w, addr, t->addr, tSize); } if (sz > TRANS_INDEX_THRESHOLD) { // A value of 255 indicates that no transition exists for the byte // at that index. (Except when there are 256 transitions.) Namely, // any value greater than or equal to the number of transitions in // this node indicates an absent transition. - uint8_t *index = (uint8_t *)malloc(sizeof(uint8_t) * 256); + uint8_t *index = (uint8_t *)malloc(sizeof(uint8_t) * 256); memset(index, 255, sizeof(uint8_t) * 256); - ///for (uint8_t i = 0; i < 256; i++) { + /// for (uint8_t i = 0; i < 256; i++) { // index[i] = 255; ///} for (size_t i = 0; i < sz; i++) { FstTransition *t = taosArrayGet(node->trans, i); index[t->inp] = i; - //fstPackDeltaIn(w, addr, t->addr, tSize); + // fstPackDeltaIn(w, addr, t->addr, tSize); } - fstCountingWriterWrite(w, (char *)index, 256); + fstCountingWriterWrite(w, (char *)index, 256); free(index); } fstCountingWriterWrite(w, (char *)&packSizes, 1); bool null = false; fstStateStateNtrans(&st, &null); if (null == true) { - // 256 can't be represented in a u8, so we abuse the fact that - // the # of transitions can never be 1 here, since 1 is always - // encoded in the state byte. + // 256 can't be represented in a u8, so we abuse the fact that + // the # of transitions can never be 1 here, since 1 is always + // encoded in the state byte. uint8_t v = 1; - if (sz == 256) { fstCountingWriterWrite(w, (char *)&v, 1); } - else { fstCountingWriterWrite(w, (char *)&sz, 1); } + if (sz == 256) { + fstCountingWriterWrite(w, (char *)&v, 1); + } else { + fstCountingWriterWrite(w, (char *)&sz, 1); + } } fstCountingWriterWrite(w, (char *)(&(st.val)), 1); return; } // set_comm_input -void fstStateSetCommInput(FstState* s, uint8_t inp) { +void fstStateSetCommInput(FstState *s, uint8_t inp) { assert(s->state == OneTransNext || s->state == OneTrans); uint8_t val; - COMMON_INDEX(inp, 0x111111, val); - s->val = (s->val & fstStateDict[s->state].val) | val; + COMMON_INDEX(inp, 0x111111, val); + s->val = (s->val & fstStateDict[s->state].val) | val; } // comm_input -uint8_t fstStateCommInput(FstState* s, bool *null) { +uint8_t fstStateCommInput(FstState *s, bool *null) { assert(s->state == OneTransNext || s->state == OneTrans); uint8_t v = s->val & 0b00111111; - if (v == 0) { - *null = true; + if (v == 0) { + *null = true; return v; - } - //v = 0 indicate that common_input is None - return v == 0 ? 0 : COMMON_INPUT(v); + } + // v = 0 indicate that common_input is None + return v == 0 ? 0 : COMMON_INPUT(v); } // input_len -uint64_t fstStateInputLen(FstState* s) { +uint64_t fstStateInputLen(FstState *s) { assert(s->state == OneTransNext || s->state == OneTrans); bool null = false; - fstStateCommInput(s, &null); - return null ? 1 : 0 ; -} - -// end_addr -uint64_t fstStateEndAddrForOneTransNext(FstState* s, FstSlice *data) { + fstStateCommInput(s, &null); + return null ? 1 : 0; +} + +// end_addr +uint64_t fstStateEndAddrForOneTransNext(FstState *s, FstSlice *data) { assert(s->state == OneTransNext); return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s); } uint64_t fstStateEndAddrForOneTrans(FstState *s, FstSlice *data, PackSizes sizes) { assert(s->state == OneTrans); - return FST_SLICE_LEN(data) - - 1 - - fstStateInputLen(s) - - 1 // pack size - - FST_GET_TRANSITION_PACK_SIZE(sizes) - - FST_GET_OUTPUT_PACK_SIZE(sizes); -} -uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) { - uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(sizes); - uint8_t finalOsize = !fstStateIsFinalState(state) ? 0 : oSizes; - return FST_SLICE_LEN(date) - - 1 - - fstStateNtransLen(state) - - 1 //pack size - - fstStateTotalTransSize(state, version, sizes, nTrans) - - nTrans * oSizes // output values - - finalOsize; // final output -} -// input -uint8_t fstStateInput(FstState *s, FstNode *node) { + return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s) - 1 // pack size + - FST_GET_TRANSITION_PACK_SIZE(sizes) - FST_GET_OUTPUT_PACK_SIZE(sizes); +} +uint64_t fstStateEndAddrForAnyTrans( + FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) { + uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(sizes); + uint8_t finalOsize = !fstStateIsFinalState(state) ? 0 : oSizes; + return FST_SLICE_LEN(date) - 1 - fstStateNtransLen(state) - 1 // pack size + - fstStateTotalTransSize(state, version, sizes, nTrans) - nTrans * oSizes // output values + - finalOsize; // final output +} +// input +uint8_t fstStateInput(FstState *s, FstNode *node) { assert(s->state == OneTransNext || s->state == OneTrans); FstSlice *slice = &node->data; - bool null = false; - uint8_t inp = fstStateCommInput(s, &null); - uint8_t *data = fstSliceData(slice, NULL); + bool null = false; + uint8_t inp = fstStateCommInput(s, &null); + uint8_t * data = fstSliceData(slice, NULL); return null == false ? inp : data[-1]; } -uint8_t fstStateInputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { +uint8_t fstStateInputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { assert(s->state == AnyTrans); - FstSlice *slice = &node->data; + FstSlice *slice = &node->data; - uint64_t at = node->start - - fstStateNtransLen(s) - - 1 // pack size - - fstStateTransIndexSize(s, node->version, node->nTrans) - - i - - 1; // the output size + uint64_t at = node->start - fstStateNtransLen(s) - 1 // pack size + - fstStateTransIndexSize(s, node->version, node->nTrans) - i - 1; // the output size uint8_t *data = fstSliceData(slice, NULL); return data[at]; @@ -409,84 +398,68 @@ CompiledAddr fstStateTransAddr(FstState *s, FstNode *node) { assert(s->state == OneTransNext || s->state == OneTrans); FstSlice *slice = &node->data; if (s->state == OneTransNext) { - return (CompiledAddr)(node->end) - 1; + return (CompiledAddr)(node->end) - 1; } else { - PackSizes sizes = node->sizes; - uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(sizes); - uint64_t i = node->start - - fstStateInputLen(s) - - 1 // PackSizes - - tSizes; - - // refactor error logic + PackSizes sizes = node->sizes; + uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(sizes); + uint64_t i = node->start - fstStateInputLen(s) - 1 // PackSizes + - tSizes; + + // refactor error logic uint8_t *data = fstSliceData(slice, NULL); - return unpackDelta(data +i, tSizes, node->end); - } + return unpackDelta(data + i, tSizes, node->end); + } } CompiledAddr fstStateTransAddrForAnyTrans(FstState *s, FstNode *node, uint64_t i) { assert(s->state == AnyTrans); FstSlice *slice = &node->data; - uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(node->sizes); - uint64_t at = node->start - - fstStateNtransLen(s) - - 1 - - fstStateTransIndexSize(s, node->version, node->nTrans) - - node->nTrans - - (i * tSizes) - - tSizes; + uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(node->sizes); + uint64_t at = node->start - fstStateNtransLen(s) - 1 - fstStateTransIndexSize(s, node->version, node->nTrans) - + node->nTrans - (i * tSizes) - tSizes; uint8_t *data = fstSliceData(slice, NULL); - return unpackDelta(data + at, tSizes, node->end); + return unpackDelta(data + at, tSizes, node->end); } -// sizes +// sizes PackSizes fstStateSizes(FstState *s, FstSlice *slice) { - assert(s->state == OneTrans || s->state == AnyTrans) ; - uint64_t i; + assert(s->state == OneTrans || s->state == AnyTrans); + uint64_t i; if (s->state == OneTrans) { - i = FST_SLICE_LEN(slice) - 1 - fstStateInputLen(s) - 1; + i = FST_SLICE_LEN(slice) - 1 - fstStateInputLen(s) - 1; } else { i = FST_SLICE_LEN(slice) - 1 - fstStateNtransLen(s) - 1; } uint8_t *data = fstSliceData(slice, NULL); - return (PackSizes)(*(data +i)); + return (PackSizes)(*(data + i)); } -// Output +// Output Output fstStateOutput(FstState *s, FstNode *node) { - assert(s->state == OneTrans); - + assert(s->state == OneTrans); + uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(node->sizes); if (oSizes == 0) { return 0; } FstSlice *slice = &node->data; - uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(node->sizes); - - uint64_t i = node->start - - fstStateInputLen(s) - - 1 - - tSizes - - oSizes; - uint8_t *data = fstSliceData(slice, NULL); + uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(node->sizes); + + uint64_t i = node->start - fstStateInputLen(s) - 1 - tSizes - oSizes; + uint8_t *data = fstSliceData(slice, NULL); return unpackUint64(data + i, oSizes); - } Output fstStateOutputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { assert(s->state == AnyTrans); - uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(node->sizes); + uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(node->sizes); if (oSizes == 0) { - return 0; - } + return 0; + } FstSlice *slice = &node->data; - uint8_t *data = fstSliceData(slice, NULL); - uint64_t at = node->start - - fstStateNtransLen(s) - - 1 // pack size - - fstStateTotalTransSize(s, node->version, node->sizes, node->nTrans) - - (i * oSizes) - - oSizes; + uint8_t * data = fstSliceData(slice, NULL); + uint64_t at = node->start - fstStateNtransLen(s) - 1 // pack size + - fstStateTotalTransSize(s, node->version, node->sizes, node->nTrans) - (i * oSizes) - oSizes; return unpackUint64(data + at, oSizes); } @@ -494,230 +467,226 @@ Output fstStateOutputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { // anyTrans specify function void fstStateSetFinalState(FstState *s, bool yes) { - assert(s->state == AnyTrans); - if (yes) { s->val |= 0b01000000; } + assert(s->state == AnyTrans); + if (yes) { + s->val |= 0b01000000; + } return; } bool fstStateIsFinalState(FstState *s) { - assert(s->state == AnyTrans); - return (s->val & 0b01000000) == 0b01000000; -} + assert(s->state == AnyTrans); + return (s->val & 0b01000000) == 0b01000000; +} void fstStateSetStateNtrans(FstState *s, uint8_t n) { - assert(s->state == AnyTrans); + assert(s->state == AnyTrans); if (n <= 0b00111111) { - s->val = (s->val & 0b11000000) | n; - } + s->val = (s->val & 0b11000000) | n; + } return; } // state_ntrans uint8_t fstStateStateNtrans(FstState *s, bool *null) { - assert(s->state == AnyTrans); + assert(s->state == AnyTrans); *null = false; - uint8_t n = s->val & 0b00111111; + uint8_t n = s->val & 0b00111111; if (n == 0) { - *null = true; // None - } + *null = true; // None + } return n; } uint64_t fstStateTotalTransSize(FstState *s, uint64_t version, PackSizes sizes, uint64_t nTrans) { - assert(s->state == AnyTrans); - uint64_t idxSize = fstStateTransIndexSize(s, version, nTrans); + assert(s->state == AnyTrans); + uint64_t idxSize = fstStateTransIndexSize(s, version, nTrans); return nTrans + (nTrans * FST_GET_TRANSITION_PACK_SIZE(sizes)) + idxSize; } uint64_t fstStateTransIndexSize(FstState *s, uint64_t version, uint64_t nTrans) { - assert(s->state == AnyTrans); - return (version >= 2 &&nTrans > TRANS_INDEX_THRESHOLD) ? 256 : 0; + assert(s->state == AnyTrans); + return (version >= 2 && nTrans > TRANS_INDEX_THRESHOLD) ? 256 : 0; } uint64_t fstStateNtransLen(FstState *s) { assert(s->state == AnyTrans); bool null = false; fstStateStateNtrans(s, &null); - return null == true ? 1 : 0; + return null == true ? 1 : 0; } uint64_t fstStateNtrans(FstState *s, FstSlice *slice) { - bool null = false; + bool null = false; uint8_t n = fstStateStateNtrans(s, &null); if (null != true) { - return n; - } - int32_t len; + return n; + } + int32_t len; uint8_t *data = fstSliceData(slice, &len); n = data[len - 2]; - //n = data[slice->end - 1]; // data[data.len() - 2] - return n == 1 ? 256: n; // // "1" is never a normal legal value here, because if there, // is only 1 transition, then it is encoded in the state byte -} -Output fstStateFinalOutput(FstState *s, uint64_t version, FstSlice *slice, PackSizes sizes, uint64_t nTrans) { - uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(sizes); - if (oSizes == 0 || !fstStateIsFinalState(s)) { - return 0; - } - - uint64_t at = FST_SLICE_LEN(slice) - - 1 - - fstStateNtransLen(s) - - 1 // pack size - - fstStateTotalTransSize(s, version, sizes, nTrans) - - (nTrans * oSizes) - - oSizes; - uint8_t *data = fstSliceData(slice, NULL); - return unpackUint64(data + at, (uint8_t)oSizes); + // n = data[slice->end - 1]; // data[data.len() - 2] + return n == 1 ? 256 : n; // // "1" is never a normal legal value here, because if there, // is only 1 transition, + // then it is encoded in the state byte +} +Output fstStateFinalOutput(FstState *s, uint64_t version, FstSlice *slice, PackSizes sizes, uint64_t nTrans) { + uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(sizes); + if (oSizes == 0 || !fstStateIsFinalState(s)) { + return 0; + } + uint64_t at = FST_SLICE_LEN(slice) - 1 - fstStateNtransLen(s) - 1 // pack size + - fstStateTotalTransSize(s, version, sizes, nTrans) - (nTrans * oSizes) - oSizes; + uint8_t *data = fstSliceData(slice, NULL); + return unpackUint64(data + at, (uint8_t)oSizes); } uint64_t fstStateFindInput(FstState *s, FstNode *node, uint8_t b, bool *null) { assert(s->state == AnyTrans); FstSlice *slice = &node->data; if (node->version >= 2 && node->nTrans > TRANS_INDEX_THRESHOLD) { - uint64_t at = node->start - - fstStateNtransLen(s) - - 1 // pack size + uint64_t at = node->start - fstStateNtransLen(s) - 1 // pack size - fstStateTransIndexSize(s, node->version, node->nTrans); - int32_t dlen = 0; + int32_t dlen = 0; uint8_t *data = fstSliceData(slice, &dlen); uint64_t i = data[at + b]; - //uint64_t i = slice->data[slice->start + at + b]; + // uint64_t i = slice->data[slice->start + at + b]; if (i >= node->nTrans) { *null = true; - } + } return i; } else { - uint64_t start = node->start - - fstStateNtransLen(s) - - 1 // pack size - - node->nTrans; - uint64_t end = start + node->nTrans; + uint64_t start = node->start - fstStateNtransLen(s) - 1 // pack size + - node->nTrans; + uint64_t end = start + node->nTrans; FstSlice t = fstSliceCopy(slice, start, end - 1); - int32_t len = 0; + int32_t len = 0; uint8_t *data = fstSliceData(&t, &len); - int i = 0; - for(; i < len; i++) { - uint8_t v = data[i]; + int i = 0; + for (; i < len; i++) { + uint8_t v = data[i]; if (v == b) { fstSliceDestroy(&t); - return node->nTrans - i - 1; // bug + return node->nTrans - i - 1; // bug } - } - if (i == len) { *null = true; } + } + if (i == len) { + *null = true; + } fstSliceDestroy(&t); - } + } } - -// fst node function +// fst node function FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) { - FstNode *n = (FstNode *)malloc(sizeof(FstNode)); - if (n == NULL) { return NULL; } + FstNode *n = (FstNode *)malloc(sizeof(FstNode)); + if (n == NULL) { + return NULL; + } - FstState st = fstStateCreateFrom(slice, addr); + FstState st = fstStateCreateFrom(slice, addr); if (st.state == EmptyFinal) { - n->data = fstSliceCreate(NULL, 0); - n->version = version; - n->state = st; - n->start = EMPTY_ADDRESS; - n->end = EMPTY_ADDRESS; - n->isFinal = true; - n->nTrans = 0; - n->sizes = 0; - n->finalOutput = 0; + n->data = fstSliceCreate(NULL, 0); + n->version = version; + n->state = st; + n->start = EMPTY_ADDRESS; + n->end = EMPTY_ADDRESS; + n->isFinal = true; + n->nTrans = 0; + n->sizes = 0; + n->finalOutput = 0; } else if (st.state == OneTransNext) { - n->data = fstSliceCopy(slice, 0, addr); - n->version = version; - n->state = st; - n->start = addr; - n->end = fstStateEndAddrForOneTransNext(&st, &n->data); //? s.end_addr(data); - n->isFinal = false; - n->sizes = 0; - n->nTrans = 1; - n->finalOutput = 0; + n->data = fstSliceCopy(slice, 0, addr); + n->version = version; + n->state = st; + n->start = addr; + n->end = fstStateEndAddrForOneTransNext(&st, &n->data); //? s.end_addr(data); + n->isFinal = false; + n->sizes = 0; + n->nTrans = 1; + n->finalOutput = 0; } else if (st.state == OneTrans) { - FstSlice data = fstSliceCopy(slice, 0, addr); - PackSizes sz = fstStateSizes(&st, &data); - n->data = data; - n->version = version; - n->state = st; - n->start = addr; - n->end = fstStateEndAddrForOneTrans(&st, &data, sz); // s.end_addr(data, sz); - n->isFinal = false; - n->nTrans = 1; - n->sizes = sz; - n->finalOutput = 0; + FstSlice data = fstSliceCopy(slice, 0, addr); + PackSizes sz = fstStateSizes(&st, &data); + n->data = data; + n->version = version; + n->state = st; + n->start = addr; + n->end = fstStateEndAddrForOneTrans(&st, &data, sz); // s.end_addr(data, sz); + n->isFinal = false; + n->nTrans = 1; + n->sizes = sz; + n->finalOutput = 0; } else { - FstSlice data = fstSliceCopy(slice, 0, addr); - uint64_t sz = fstStateSizes(&st, &data); // s.sizes(data) - uint32_t nTrans = fstStateNtrans(&st, &data); // s.ntrans(data) - n->data = data; - n->version = version; - n->state = st; - n->start = addr; - n->end = fstStateEndAddrForAnyTrans(&st, version, &data, sz, nTrans); // s.end_addr(version, data, sz, ntrans); - n->isFinal = fstStateIsFinalState(&st); // s.is_final_state(); - n->nTrans = nTrans; - n->sizes = sz; - n->finalOutput = fstStateFinalOutput(&st, version, &data, sz, nTrans); // s.final_output(version, data, sz, ntrans); - } - return n; + FstSlice data = fstSliceCopy(slice, 0, addr); + uint64_t sz = fstStateSizes(&st, &data); // s.sizes(data) + uint32_t nTrans = fstStateNtrans(&st, &data); // s.ntrans(data) + n->data = data; + n->version = version; + n->state = st; + n->start = addr; + n->end = fstStateEndAddrForAnyTrans(&st, version, &data, sz, nTrans); // s.end_addr(version, data, sz, ntrans); + n->isFinal = fstStateIsFinalState(&st); // s.is_final_state(); + n->nTrans = nTrans; + n->sizes = sz; + n->finalOutput = + fstStateFinalOutput(&st, version, &data, sz, nTrans); // s.final_output(version, data, sz, ntrans); + } + return n; } // debug state transition static const char *fstNodeState(FstNode *node) { - FstState *st = &node->state; - return fstStateStr[st->state]; + FstState *st = &node->state; + return fstStateStr[st->state]; } - void fstNodeDestroy(FstNode *node) { - fstSliceDestroy(&node->data); + fstSliceDestroy(&node->data); free(node); } -FstTransitions* fstNodeTransitions(FstNode *node) { +FstTransitions *fstNodeTransitions(FstNode *node) { FstTransitions *t = malloc(sizeof(FstTransitions)); if (NULL == t) { - return NULL; + return NULL; } FstRange range = {.start = 0, .end = FST_NODE_LEN(node)}; t->range = range; - t->node = node; - return t; -} + t->node = node; + return t; +} -// Returns the transition at index `i`. +// Returns the transition at index `i`. bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *trn) { - bool s = true; + bool s = true; FstState *st = &node->state; if (st->state == OneTransNext) { - trn->inp = fstStateInput(st, node); - trn->out = 0; - trn->addr = fstStateTransAddr(st, node); + trn->inp = fstStateInput(st, node); + trn->out = 0; + trn->addr = fstStateTransAddr(st, node); } else if (st->state == OneTrans) { - trn->inp = fstStateInput(st, node); - trn->out = fstStateOutput(st, node); - trn->addr = fstStateTransAddr(st, node); + trn->inp = fstStateInput(st, node); + trn->out = fstStateOutput(st, node); + trn->addr = fstStateTransAddr(st, node); } else if (st->state == AnyTrans) { - trn->inp = fstStateInputForAnyTrans(st, node, i); - trn->out = fstStateOutputForAnyTrans(st, node, i); - trn->addr = fstStateTransAddrForAnyTrans(st, node, i); + trn->inp = fstStateInputForAnyTrans(st, node, i); + trn->out = fstStateOutputForAnyTrans(st, node, i); + trn->addr = fstStateTransAddrForAnyTrans(st, node, i); } else { s = false; } return s; -} +} // Returns the transition address of the `i`th transition bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) { - bool s = true; + bool s = true; FstState *st = &node->state; if (st->state == OneTransNext) { assert(i == 0); fstStateTransAddr(st, node); } else if (st->state == OneTrans) { - assert(i == 0); + assert(i == 0); fstStateTransAddr(st, node); } else if (st->state == AnyTrans) { fstStateTransAddrForAnyTrans(st, node, i); - } else if (FST_STATE_EMPTY_FINAL(node)){ + } else if (FST_STATE_EMPTY_FINAL(node)) { s = false; } else { assert(0); @@ -726,129 +695,138 @@ bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) { } // Finds the `i`th transition corresponding to the given input byte. -// If no transition for this byte exists, then `false` is returned. +// If no transition for this byte exists, then `false` is returned. bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) { - bool s = true; + bool s = true; FstState *st = &node->state; if (st->state == OneTransNext) { - if (fstStateInput(st,node) == b) { *res = 0; } - else { s = false; } } - else if (st->state == OneTrans) { - if (fstStateInput(st, node) == b) { *res = 0 ;} - else { s = false; } + if (fstStateInput(st, node) == b) { + *res = 0; + } else { + s = false; + } + } else if (st->state == OneTrans) { + if (fstStateInput(st, node) == b) { + *res = 0; + } else { + s = false; + } } else if (st->state == AnyTrans) { - bool null = false; - uint64_t out = fstStateFindInput(st, node, b, &null); - if (null == false) { *res = out; } - else { s = false;} + bool null = false; + uint64_t out = fstStateFindInput(st, node, b, &null); + if (null == false) { + *res = out; + } else { + s = false; + } } return s; -} +} bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode) { - size_t sz = taosArrayGetSize(builderNode->trans); + size_t sz = taosArrayGetSize(builderNode->trans); assert(sz < 256); if (sz == 0 && builderNode->isFinal && builderNode->finalOutput == 0) { - return true; + return true; } else if (sz != 1 || builderNode->isFinal) { - fstStateCompileForAnyTrans(w, addr, builderNode); + fstStateCompileForAnyTrans(w, addr, builderNode); // AnyTrans->Compile(w, addr, node); } else { - FstTransition *tran = taosArrayGet(builderNode->trans, 0); + FstTransition *tran = taosArrayGet(builderNode->trans, 0); if (tran->addr == lastAddr && tran->out == 0) { - fstStateCompileForOneTransNext(w, addr, tran->inp); - //OneTransNext::compile(w, lastAddr, tran->inp); - return true; + fstStateCompileForOneTransNext(w, addr, tran->inp); + // OneTransNext::compile(w, lastAddr, tran->inp); + return true; } else { - fstStateCompileForOneTrans(w, addr, tran); - //OneTrans::Compile(w, lastAddr, *tran); - return true; - } - } - return true; -} + fstStateCompileForOneTrans(w, addr, tran); + // OneTrans::Compile(w, lastAddr, *tran); + return true; + } + } + return true; +} bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr) { return fstNodeCompile(NULL, wrt, lastAddr, startAddr, b); } +FstBuilder *fstBuilderCreate(void *w, FstType ty) { + FstBuilder *b = malloc(sizeof(FstBuilder)); + if (NULL == b) { + return b; + } + b->wrt = fstCountingWriterCreate(w); + b->unfinished = fstUnFinishedNodesCreate(); + b->registry = fstRegistryCreate(10000, 2); + b->last = fstSliceCreate(NULL, 0); + b->lastAddr = NONE_ADDRESS; + b->len = 0; -FstBuilder *fstBuilderCreate(void *w, FstType ty) { - FstBuilder *b = malloc(sizeof(FstBuilder)); - if (NULL == b) { return b; } - - - b->wrt = fstCountingWriterCreate(w); - b->unfinished = fstUnFinishedNodesCreate(); - b->registry = fstRegistryCreate(10000, 2) ; - b->last = fstSliceCreate(NULL, 0); - b->lastAddr = NONE_ADDRESS; - b->len = 0; - - char buf64[8] = {0}; + char buf64[8] = {0}; void *pBuf64 = buf64; - taosEncodeFixedU64(&pBuf64, VERSION); + taosEncodeFixedU64(&pBuf64, VERSION); fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64)); - - memset(buf64, 0, sizeof(buf64)); + + memset(buf64, 0, sizeof(buf64)); pBuf64 = buf64; - taosEncodeFixedU64(&pBuf64, ty); + taosEncodeFixedU64(&pBuf64, ty); fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64)); return b; } void fstBuilderDestroy(FstBuilder *b) { - if (b == NULL) { return; } + if (b == NULL) { + return; + } - fstCountingWriterDestroy(b->wrt); - fstUnFinishedNodesDestroy(b->unfinished); + fstCountingWriterDestroy(b->wrt); + fstUnFinishedNodesDestroy(b->unfinished); fstRegistryDestroy(b->registry); fstSliceDestroy(&b->last); free(b); } - bool fstBuilderInsert(FstBuilder *b, FstSlice bs, Output in) { - OrderType t = fstBuilderCheckLastKey(b, bs, true); + OrderType t = fstBuilderCheckLastKey(b, bs, true); if (t == Ordered) { // add log info - fstBuilderInsertOutput(b, bs, in); - return true; - } + fstBuilderInsertOutput(b, bs, in); + return true; + } indexInfo("key must be ordered"); return false; } void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in) { - FstSlice *s = &bs; - if (fstSliceIsEmpty(s)) { - b->len = 1; - fstUnFinishedNodesSetRootOutput(b->unfinished, in); - return; - } - //if (in != 0) { //if let Some(in) = in - // prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); - //} else { - // prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs); - // out = 0; - //} - Output out; - uint64_t prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); - - if (prefixLen == FST_SLICE_LEN(s)) { - assert(out == 0); - return; - } - - b->len += 1; - fstBuilderCompileFrom(b, prefixLen); - - FstSlice sub = fstSliceCopy(s, prefixLen, s->end); - fstUnFinishedNodesAddSuffix(b->unfinished, sub, out); - fstSliceDestroy(&sub); - return; - } + FstSlice *s = &bs; + if (fstSliceIsEmpty(s)) { + b->len = 1; + fstUnFinishedNodesSetRootOutput(b->unfinished, in); + return; + } + // if (in != 0) { //if let Some(in) = in + // prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); + //} else { + // prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs); + // out = 0; + //} + Output out; + uint64_t prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); + + if (prefixLen == FST_SLICE_LEN(s)) { + assert(out == 0); + return; + } + + b->len += 1; + fstBuilderCompileFrom(b, prefixLen); + + FstSlice sub = fstSliceCopy(s, prefixLen, s->end); + fstUnFinishedNodesAddSuffix(b->unfinished, sub, out); + fstSliceDestroy(&sub); + return; +} OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup) { FstSlice *input = &bs; @@ -859,16 +837,16 @@ OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup) { } else { int comp = fstSliceCompare(&b->last, &bs); if (comp == 0 && ckDup) { - return DuplicateKey; + return DuplicateKey; } else if (comp == 1) { return OutOfOrdered; } // deep copy or not fstSliceDestroy(&b->last); - b->last = fstSliceDeepCopy(&bs, input->start, input->end); - } + b->last = fstSliceDeepCopy(&bs, input->start, input->end); + } return Ordered; -} +} void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate) { CompiledAddr addr = NONE_ADDRESS; while (istate + 1 < FST_UNFINISHED_NODES_LEN(b->unfinished)) { @@ -881,252 +859,240 @@ void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate) { addr = fstBuilderCompile(b, bn); fstBuilderNodeDestroy(bn); - assert(addr != NONE_ADDRESS); - //fstBuilderNodeDestroy(n); + assert(addr != NONE_ADDRESS); + // fstBuilderNodeDestroy(n); } fstUnFinishedNodesTopLastFreeze(b->unfinished, addr); - return; + return; } CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) { - if (FST_BUILDER_NODE_IS_FINAL(bn) - && FST_BUILDER_NODE_TRANS_ISEMPTY(bn) - && FST_BUILDER_NODE_FINALOUTPUT_ISZERO(bn)) { - return EMPTY_ADDRESS; + if (FST_BUILDER_NODE_IS_FINAL(bn) && FST_BUILDER_NODE_TRANS_ISEMPTY(bn) && FST_BUILDER_NODE_FINALOUTPUT_ISZERO(bn)) { + return EMPTY_ADDRESS; } - FstRegistryEntry *entry = fstRegistryGetEntry(b->registry, bn); - if (entry->state == FOUND) { + FstRegistryEntry *entry = fstRegistryGetEntry(b->registry, bn); + if (entry->state == FOUND) { CompiledAddr ret = entry->addr; fstRegistryEntryDestroy(entry); return ret; - } + } CompiledAddr startAddr = (CompiledAddr)(FST_WRITER_COUNT(b->wrt)); - fstBuilderNodeCompileTo(bn, b->wrt, b->lastAddr, startAddr); - b->lastAddr = (CompiledAddr)(FST_WRITER_COUNT(b->wrt) - 1); + fstBuilderNodeCompileTo(bn, b->wrt, b->lastAddr, startAddr); + b->lastAddr = (CompiledAddr)(FST_WRITER_COUNT(b->wrt) - 1); if (entry->state == NOTFOUND) { - FST_REGISTRY_CELL_INSERT(entry->cell, b->lastAddr); + FST_REGISTRY_CELL_INSERT(entry->cell, b->lastAddr); } fstRegistryEntryDestroy(entry); - - return b->lastAddr; + + return b->lastAddr; } -void* fstBuilderInsertInner(FstBuilder *b) { - fstBuilderCompileFrom(b, 0); - FstBuilderNode *rootNode = fstUnFinishedNodesPopRoot(b->unfinished); - CompiledAddr rootAddr = fstBuilderCompile(b, rootNode); +void *fstBuilderInsertInner(FstBuilder *b) { + fstBuilderCompileFrom(b, 0); + FstBuilderNode *rootNode = fstUnFinishedNodesPopRoot(b->unfinished); + CompiledAddr rootAddr = fstBuilderCompile(b, rootNode); fstBuilderNodeDestroy(rootNode); - char buf64[8] = {0}; + char buf64[8] = {0}; + + void *pBuf64 = buf64; + taosEncodeFixedU64(&pBuf64, b->len); + fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64)); - void *pBuf64 = buf64; - taosEncodeFixedU64(&pBuf64, b->len); - fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64)); - pBuf64 = buf64; - taosEncodeFixedU64(&pBuf64, rootAddr); - fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64)); + taosEncodeFixedU64(&pBuf64, rootAddr); + fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64)); - char buf32[4] = {0}; - void *pBuf32 = buf32; + char buf32[4] = {0}; + void * pBuf32 = buf32; uint32_t sum = fstCountingWriterMaskedCheckSum(b->wrt); - taosEncodeFixedU32(&pBuf32, sum); - fstCountingWriterWrite(b->wrt, buf32, sizeof(buf32)); - + taosEncodeFixedU32(&pBuf32, sum); + fstCountingWriterWrite(b->wrt, buf32, sizeof(buf32)); + fstCountingWriterFlush(b->wrt); - //fstCountingWriterDestroy(b->wrt); - //b->wrt = NULL; + // fstCountingWriterDestroy(b->wrt); + // b->wrt = NULL; return b->wrt; } -void fstBuilderFinish(FstBuilder *b) { - fstBuilderInsertInner(b); -} - - +void fstBuilderFinish(FstBuilder *b) { fstBuilderInsertInner(b); } FstSlice fstNodeAsSlice(FstNode *node) { - FstSlice *slice = &node->data; - FstSlice s = fstSliceCopy(slice, slice->end, FST_SLICE_LEN(slice) - 1); - return s; + FstSlice *slice = &node->data; + FstSlice s = fstSliceCopy(slice, slice->end, FST_SLICE_LEN(slice) - 1); + return s; } FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out) { FstLastTransition *trn = malloc(sizeof(FstLastTransition)); - if (trn == NULL) { return NULL; } + if (trn == NULL) { + return NULL; + } trn->inp = inp; trn->out = out; return trn; } -void fstLastTransitionDestroy(FstLastTransition *trn) { - free(trn); -} +void fstLastTransitionDestroy(FstLastTransition *trn) { free(trn); } void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *unNode, CompiledAddr addr) { - FstLastTransition *trn = unNode->last; - if (trn == NULL) { return; } - FstTransition t = {.inp = trn->inp, .out = trn->out, .addr = addr}; - taosArrayPush(unNode->node->trans, &t); - fstLastTransitionDestroy(trn); + FstLastTransition *trn = unNode->last; + if (trn == NULL) { + return; + } + FstTransition t = {.inp = trn->inp, .out = trn->out, .addr = addr}; + taosArrayPush(unNode->node->trans, &t); + fstLastTransitionDestroy(trn); unNode->last = NULL; return; } void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *unNode, Output out) { if (FST_BUILDER_NODE_IS_FINAL(unNode->node)) { - unNode->node->finalOutput += out; + unNode->node->finalOutput += out; } size_t sz = taosArrayGetSize(unNode->node->trans); for (size_t i = 0; i < sz; i++) { - FstTransition *trn = taosArrayGet(unNode->node->trans, i); + FstTransition *trn = taosArrayGet(unNode->node->trans, i); trn->out += out; } if (unNode->last) { - unNode->last->out += out; + unNode->last->out += out; } return; } -Fst* fstCreate(FstSlice *slice) { +Fst *fstCreate(FstSlice *slice) { int32_t slen; - char *buf = fstSliceData(slice, &slen); - if (slen < 36) { - return NULL; + char * buf = fstSliceData(slice, &slen); + if (slen < 36) { + return NULL; } uint64_t len = slen; - uint64_t skip = 0; + uint64_t skip = 0; - uint64_t version; - taosDecodeFixedU64(buf, &version); - skip += sizeof(version); + uint64_t version; + taosDecodeFixedU64(buf, &version); + skip += sizeof(version); if (version == 0 || version > VERSION) { - return NULL; - } + return NULL; + } uint64_t type; taosDecodeFixedU64(buf + skip, &type); - skip += sizeof(type); + skip += sizeof(type); uint32_t checkSum = 0; len -= sizeof(checkSum); - taosDecodeFixedU32(buf + len, &checkSum); + taosDecodeFixedU32(buf + len, &checkSum); CompiledAddr rootAddr; - len -= sizeof(rootAddr); - taosDecodeFixedU64(buf + len, &rootAddr); + len -= sizeof(rootAddr); + taosDecodeFixedU64(buf + len, &rootAddr); - uint64_t fstLen; - len -= sizeof(fstLen); + uint64_t fstLen; + len -= sizeof(fstLen); taosDecodeFixedU64(buf + len, &fstLen); - //TODO(validate root addr) - Fst *fst= (Fst *)calloc(1, sizeof(Fst)); - if (fst == NULL) { return NULL; } - + // TODO(validate root addr) + Fst *fst = (Fst *)calloc(1, sizeof(Fst)); + if (fst == NULL) { + return NULL; + } + fst->meta = (FstMeta *)malloc(sizeof(FstMeta)); - if (NULL == fst->meta) { - goto FST_CREAT_FAILED; + if (NULL == fst->meta) { + goto FST_CREAT_FAILED; } - fst->meta->version = version; - fst->meta->rootAddr = rootAddr; - fst->meta->ty = type; - fst->meta->len = fstLen; + fst->meta->version = version; + fst->meta->rootAddr = rootAddr; + fst->meta->ty = type; + fst->meta->len = fstLen; fst->meta->checkSum = checkSum; FstSlice *s = calloc(1, sizeof(FstSlice)); - *s = fstSliceCopy(slice, 0, FST_SLICE_LEN(slice)); - fst->data = s; - + *s = fstSliceCopy(slice, 0, FST_SLICE_LEN(slice)); + fst->data = s; + return fst; -FST_CREAT_FAILED: - free(fst->meta); +FST_CREAT_FAILED: + free(fst->meta); free(fst); - } void fstDestroy(Fst *fst) { - if (fst) { - free(fst->meta); + if (fst) { + free(fst->meta); fstSliceDestroy(fst->data); free(fst->data); - } - free(fst); + } + free(fst); } bool fstGet(Fst *fst, FstSlice *b, Output *out) { - FstNode *root = fstGetRoot(fst); - Output tOut = 0; - int32_t len; + FstNode *root = fstGetRoot(fst); + Output tOut = 0; + int32_t len; uint8_t *data = fstSliceData(b, &len); - SArray *nodes = (SArray *)taosArrayInit(len, sizeof(FstNode *)); + SArray *nodes = (SArray *)taosArrayInit(len, sizeof(FstNode *)); taosArrayPush(nodes, &root); for (uint32_t i = 0; i < len; i++) { uint8_t inp = data[i]; Output res = 0; if (false == fstNodeFindInput(root, inp, &res)) { - return false; - } + return false; + } - FstTransition trn; + FstTransition trn; fstNodeGetTransitionAt(root, res, &trn); - tOut += trn.out; + tOut += trn.out; root = fstGetNode(fst, trn.addr); taosArrayPush(nodes, &root); } if (!FST_NODE_IS_FINAL(root)) { return false; } else { - tOut = tOut + FST_NODE_FINAL_OUTPUT(root); + tOut = tOut + FST_NODE_FINAL_OUTPUT(root); } for (size_t i = 0; i < taosArrayGetSize(nodes); i++) { - FstNode **node = (FstNode **)taosArrayGet(nodes, i); - fstNodeDestroy(*node); + FstNode **node = (FstNode **)taosArrayGet(nodes, i); + fstNodeDestroy(*node); } taosArrayDestroy(nodes); fst->root = NULL; *out = tOut; - - return true; -} -FstStreamBuilder *fstSearch(Fst *fst, AutomationCtx *ctx) { - return fstStreamBuilderCreate(fst, ctx); + + return true; } -StreamWithState* streamBuilderIntoStream(FstStreamBuilder *sb) { - if (sb == NULL) { return NULL; } +FstStreamBuilder *fstSearch(Fst *fst, AutomationCtx *ctx) { return fstStreamBuilderCreate(fst, ctx); } +StreamWithState * streamBuilderIntoStream(FstStreamBuilder *sb) { + if (sb == NULL) { + return NULL; + } return streamWithStateCreate(sb->fst, sb->aut, sb->min, sb->max); } -FstStreamWithStateBuilder *fstSearchWithState(Fst *fst, AutomationCtx *ctx) { - return fstStreamBuilderCreate(fst, ctx); -} +FstStreamWithStateBuilder *fstSearchWithState(Fst *fst, AutomationCtx *ctx) { return fstStreamBuilderCreate(fst, ctx); } FstNode *fstGetRoot(Fst *fst) { if (fst->root != NULL) { return fst->root; } - CompiledAddr rAddr = fstGetRootAddr(fst); - fst->root = fstGetNode(fst, rAddr); + CompiledAddr rAddr = fstGetRootAddr(fst); + fst->root = fstGetNode(fst, rAddr); return fst->root; } -FstNode* fstGetNode(Fst *fst, CompiledAddr addr) { - return fstNodeCreate(fst->meta->version, addr, fst->data); - -} -FstType fstGetType(Fst *fst) { - return fst->meta->ty; -} -CompiledAddr fstGetRootAddr(Fst *fst) { - return fst->meta->rootAddr; -} +FstNode * fstGetNode(Fst *fst, CompiledAddr addr) { return fstNodeCreate(fst->meta->version, addr, fst->data); } +FstType fstGetType(Fst *fst) { return fst->meta->ty; } +CompiledAddr fstGetRootAddr(Fst *fst) { return fst->meta->rootAddr; } Output fstEmptyFinalOutput(Fst *fst, bool *null) { - Output res = 0; + Output res = 0; FstNode *node = fstGetRoot(fst); if (FST_NODE_IS_FINAL(node)) { *null = false; - res = FST_NODE_FINAL_OUTPUT(node); + res = FST_NODE_FINAL_OUTPUT(node); } else { *null = true; } @@ -1135,9 +1101,9 @@ Output fstEmptyFinalOutput(Fst *fst, bool *null) { bool fstVerify(Fst *fst) { uint32_t checkSum = fst->meta->checkSum; - int32_t len; + int32_t len; uint8_t *data = fstSliceData(fst->data, &len); - TSCKSUM initSum = 0; + TSCKSUM initSum = 0; if (!taosCheckChecksumWhole(data, len)) { return false; } @@ -1145,9 +1111,11 @@ bool fstVerify(Fst *fst) { } // data bound function -FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice *data) { +FstBoundWithData *fstBoundStateCreate(FstBound type, FstSlice *data) { FstBoundWithData *b = calloc(1, sizeof(FstBoundWithData)); - if (b == NULL) { return NULL; } + if (b == NULL) { + return NULL; + } if (data != NULL) { b->data = fstSliceCopy(data, data->start, data->end); @@ -1156,10 +1124,9 @@ FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice *data) { } b->type = type; - return b; + return b; } - bool fstBoundWithDataExceededBy(FstBoundWithData *bound, FstSlice *slice) { int comp = fstSliceCompare(slice, &bound->data); if (bound->type == Included) { @@ -1173,62 +1140,62 @@ bool fstBoundWithDataExceededBy(FstBoundWithData *bound, FstSlice *slice) { bool fstBoundWithDataIsEmpty(FstBoundWithData *bound) { if (bound->type == Unbounded) { return true; - } else { - return fstSliceIsEmpty(&bound->data); - } + } else { + return fstSliceIsEmpty(&bound->data); + } } +bool fstBoundWithDataIsIncluded(FstBoundWithData *bound) { return bound->type == Excluded ? false : true; } -bool fstBoundWithDataIsIncluded(FstBoundWithData *bound) { - return bound->type == Excluded? false : true; -} - -void fstBoundDestroy(FstBoundWithData *bound) { - free(bound); -} +void fstBoundDestroy(FstBoundWithData *bound) { free(bound); } -StreamWithState *streamWithStateCreate(Fst *fst, AutomationCtx *automation, FstBoundWithData *min, FstBoundWithData *max) { +StreamWithState *streamWithStateCreate( + Fst *fst, AutomationCtx *automation, FstBoundWithData *min, FstBoundWithData *max) { StreamWithState *sws = calloc(1, sizeof(StreamWithState)); - if (sws == NULL) { return NULL; } + if (sws == NULL) { + return NULL; + } + + sws->fst = fst; + sws->aut = automation; + sws->inp = (SArray *)taosArrayInit(256, sizeof(uint8_t)); - sws->fst = fst; - sws->aut = automation; - sws->inp = (SArray *)taosArrayInit(256, sizeof(uint8_t)); - sws->emptyOutput.null = false; - sws->emptyOutput.out = 0; + sws->emptyOutput.out = 0; - sws->stack = (SArray *)taosArrayInit(256, sizeof(StreamState)); - sws->endAt = max; + sws->stack = (SArray *)taosArrayInit(256, sizeof(StreamState)); + sws->endAt = max; streamWithStateSeekMin(sws, min); return sws; } void streamWithStateDestroy(StreamWithState *sws) { - if (sws == NULL) { return; } + if (sws == NULL) { + return; + } taosArrayDestroy(sws->inp); taosArrayDestroyEx(sws->stack, streamStateDestroy); - free(sws); + free(sws); } bool streamWithStateSeekMin(StreamWithState *sws, FstBoundWithData *min) { - AutomationCtx *aut = sws->aut; if (fstBoundWithDataIsEmpty(min)) { if (fstBoundWithDataIsIncluded(min)) { - sws->emptyOutput.out = fstEmptyFinalOutput(sws->fst, &(sws->emptyOutput.null)); - } - StreamState s = {.node = fstGetRoot(sws->fst), - .trans = 0, - .out = {.null = false, .out = 0}, - .autState = automFuncs[aut->type].start(aut)}; // auto.start callback + sws->emptyOutput.out = fstEmptyFinalOutput(sws->fst, &(sws->emptyOutput.null)); + } + StreamState s = {.node = fstGetRoot(sws->fst), + .trans = 0, + .out = {.null = false, .out = 0}, + .autState = automFuncs[aut->type].start(aut)}; // auto.start callback taosArrayPush(sws->stack, &s); return true; - } + } FstSlice *key = NULL; - bool inclusize = false;; + bool inclusize = false; + ; if (min->type == Included) { key = &min->data; @@ -1239,86 +1206,77 @@ bool streamWithStateSeekMin(StreamWithState *sws, FstBoundWithData *min) { return false; } - FstNode *node = fstGetRoot(sws->fst); - Output out = 0; - //void* autState = sws->aut->start(); - void* autState = automFuncs[aut->type].start(aut); + FstNode *node = fstGetRoot(sws->fst); + Output out = 0; + // void* autState = sws->aut->start(); + void *autState = automFuncs[aut->type].start(aut); - int32_t len; - uint8_t *data = fstSliceData(key, &len); + int32_t len; + uint8_t *data = fstSliceData(key, &len); for (uint32_t i = 0; i < len; i++) { - uint8_t b = data[i]; + uint8_t b = data[i]; uint64_t res = 0; - bool null = fstNodeFindInput(node, b, &res); + bool null = fstNodeFindInput(node, b, &res); if (null == false) { FstTransition trn; - fstNodeGetTransitionAt(node, res, &trn); + fstNodeGetTransitionAt(node, res, &trn); void *preState = autState; // autState = sws->aut->accept(preState, b); autState = automFuncs[aut->type].accept(aut, preState, b); taosArrayPush(sws->inp, &b); - StreamState s = {.node = node, - .trans = res + 1, - .out = {.null = false, .out = out}, - .autState = preState}; + StreamState s = {.node = node, .trans = res + 1, .out = {.null = false, .out = out}, .autState = preState}; taosArrayPush(sws->stack, &s); out += trn.out; - node = fstGetNode(sws->fst, trn.addr); + node = fstGetNode(sws->fst, trn.addr); fstNodeDestroy(node); } else { - // This is a little tricky. We're in this case if the // given bound is not a prefix of any key in the FST. // Since this is a minimum bound, we need to find the // first transition in this node that proceeds the current - // input byte. + // input byte. FstTransitions *trans = fstNodeTransitions(node); - uint64_t i = 0; + uint64_t i = 0; for (i = trans->range.start; i < trans->range.end; i++) { FstTransition trn; if (fstNodeGetTransitionAt(node, i, &trn) && trn.inp > b) { - break; - } + break; + } } - - StreamState s = {.node = node, - .trans = i, - .out = {.null = false, .out = out}, - .autState = autState}; - taosArrayPush(sws->stack, &s); - return true; + + StreamState s = {.node = node, .trans = i, .out = {.null = false, .out = out}, .autState = autState}; + taosArrayPush(sws->stack, &s); + return true; } } - uint32_t sz = taosArrayGetSize(sws->stack); + uint32_t sz = taosArrayGetSize(sws->stack); if (sz != 0) { - StreamState *s = taosArrayGet(sws->stack, sz - 1); + StreamState *s = taosArrayGet(sws->stack, sz - 1); if (inclusize) { s->trans -= 1; taosArrayPop(sws->inp); } else { - FstNode *n = s->node; - uint64_t trans = s->trans; - FstTransition trn; + FstNode * n = s->node; + uint64_t trans = s->trans; + FstTransition trn; fstNodeGetTransitionAt(n, trans - 1, &trn); - StreamState s = {.node = fstGetNode(sws->fst, trn.addr), - .trans= 0, - .out = {.null = false, .out = out}, - .autState = autState}; + StreamState s = { + .node = fstGetNode(sws->fst, trn.addr), .trans = 0, .out = {.null = false, .out = out}, .autState = autState}; taosArrayPush(sws->stack, &s); - return true; - } - return false; - } -} + return true; + } + return false; + } +} StreamWithStateResult *streamWithStateNextWith(StreamWithState *sws, StreamCallback callback) { AutomationCtx *aut = sws->aut; - FstOutput output = sws->emptyOutput; + FstOutput output = sws->emptyOutput; if (output.null == false) { - FstSlice emptySlice = fstSliceCreate(NULL, 0); + FstSlice emptySlice = fstSliceCreate(NULL, 0); if (fstBoundWithDataExceededBy(sws->endAt, &emptySlice)) { taosArrayDestroyEx(sws->stack, streamStateDestroy); - sws->stack = (SArray *)taosArrayInit(256, sizeof(StreamState)); + sws->stack = (SArray *)taosArrayInit(256, sizeof(StreamState)); return NULL; } void *start = automFuncs[aut->type].start(aut); @@ -1327,117 +1285,125 @@ StreamWithStateResult *streamWithStateNextWith(StreamWithState *sws, StreamCallb return swsResultCreate(&s, output, callback(start)); } } - SArray *nodes = taosArrayInit(8, sizeof(FstNode *)); + SArray *nodes = taosArrayInit(8, sizeof(FstNode *)); while (taosArrayGetSize(sws->stack) > 0) { - StreamState *p = (StreamState *)taosArrayPop(sws->stack); + StreamState *p = (StreamState *)taosArrayPop(sws->stack); if (p->trans >= FST_NODE_LEN(p->node) || !automFuncs[aut->type].canMatch(aut, p->autState)) { if (FST_NODE_ADDR(p->node) != fstGetRootAddr(sws->fst)) { taosArrayPop(sws->inp); } - streamStateDestroy(p); + streamStateDestroy(p); continue; } - FstTransition trn; + FstTransition trn; fstNodeGetTransitionAt(p->node, p->trans, &trn); - Output out = p->out.out + trn.out; - void* nextState = automFuncs[aut->type].accept(aut, p->autState, trn.inp); - void* tState = callback(nextState); - bool isMatch = automFuncs[aut->type].isMatch(aut, nextState); - FstNode *nextNode = fstGetNode(sws->fst, trn.addr); - taosArrayPush(nodes, &nextNode); - taosArrayPush(sws->inp, &(trn.inp)); + Output out = p->out.out + trn.out; + void * nextState = automFuncs[aut->type].accept(aut, p->autState, trn.inp); + void * tState = callback(nextState); + bool isMatch = automFuncs[aut->type].isMatch(aut, nextState); + FstNode *nextNode = fstGetNode(sws->fst, trn.addr); + taosArrayPush(nodes, &nextNode); + taosArrayPush(sws->inp, &(trn.inp)); if (FST_NODE_IS_FINAL(nextNode)) { - //void *eofState = sws->aut->acceptEof(nextState); + // void *eofState = sws->aut->acceptEof(nextState); void *eofState = automFuncs[aut->type].acceptEof(aut, nextState); if (eofState != NULL) { isMatch = automFuncs[aut->type].isMatch(aut, eofState); } - } - StreamState s1 = { .node = p->node, .trans = p->trans + 1, .out = p->out, .autState = p->autState}; + } + StreamState s1 = {.node = p->node, .trans = p->trans + 1, .out = p->out, .autState = p->autState}; taosArrayPush(sws->stack, &s1); StreamState s2 = {.node = nextNode, .trans = 0, .out = {.null = false, .out = out}, .autState = nextState}; taosArrayPush(sws->stack, &s2); - - size_t isz = taosArrayGetSize(sws->inp); - uint8_t *buf = (uint8_t *)malloc(isz * sizeof(uint8_t)); + size_t isz = taosArrayGetSize(sws->inp); + uint8_t *buf = (uint8_t *)malloc(isz * sizeof(uint8_t)); for (uint32_t i = 0; i < isz; i++) { buf[i] = *(uint8_t *)taosArrayGet(sws->inp, i); } FstSlice slice = fstSliceCreate(buf, taosArrayGetSize(sws->inp)); if (fstBoundWithDataExceededBy(sws->endAt, &slice)) { taosArrayDestroyEx(sws->stack, streamStateDestroy); - sws->stack = (SArray *)taosArrayInit(256, sizeof(StreamState)); + sws->stack = (SArray *)taosArrayInit(256, sizeof(StreamState)); free(buf); fstSliceDestroy(&slice); return NULL; } if (FST_NODE_IS_FINAL(nextNode) && isMatch) { - FstOutput fOutput = {.null = false, .out = out + FST_NODE_FINAL_OUTPUT(nextNode)}; - StreamWithStateResult *result = swsResultCreate(&slice, fOutput, tState); + FstOutput fOutput = {.null = false, .out = out + FST_NODE_FINAL_OUTPUT(nextNode)}; + StreamWithStateResult *result = swsResultCreate(&slice, fOutput, tState); free(buf); fstSliceDestroy(&slice); - return result; + return result; } free(buf); fstSliceDestroy(&slice); } for (size_t i = 0; i < taosArrayGetSize(nodes); i++) { - FstNode** node = (FstNode **)taosArrayGet(nodes, i); + FstNode **node = (FstNode **)taosArrayGet(nodes, i); fstNodeDestroy(*node); } taosArrayDestroy(nodes); - return NULL; - + return NULL; } StreamWithStateResult *swsResultCreate(FstSlice *data, FstOutput fOut, void *state) { - StreamWithStateResult *result = calloc(1, sizeof(StreamWithStateResult)); - if (result == NULL) { return NULL; } - - result->data = fstSliceCopy(data, 0, FST_SLICE_LEN(data) - 1); - result->out = fOut; - result->state = state; + StreamWithStateResult *result = calloc(1, sizeof(StreamWithStateResult)); + if (result == NULL) { + return NULL; + } + + result->data = fstSliceCopy(data, 0, FST_SLICE_LEN(data) - 1); + result->out = fOut; + result->state = state; return result; } void swsResultDestroy(StreamWithStateResult *result) { - if (NULL == result) { return; } - + if (NULL == result) { + return; + } + fstSliceDestroy(&result->data); - startWithStateValueDestroy(result->state); + startWithStateValueDestroy(result->state); free(result); } void streamStateDestroy(void *s) { - if (NULL == s) { return; } + if (NULL == s) { + return; + } StreamState *ss = (StreamState *)s; fstNodeDestroy(ss->node); - //free(s->autoState); + // free(s->autoState); } FstStreamBuilder *fstStreamBuilderCreate(Fst *fst, AutomationCtx *aut) { FstStreamBuilder *b = calloc(1, sizeof(FstStreamBuilder)); - if (NULL == b) { return NULL; } + if (NULL == b) { + return NULL; + } b->fst = fst; b->aut = aut; b->min = fstBoundStateCreate(Unbounded, NULL); - b->max = fstBoundStateCreate(Unbounded, NULL); + b->max = fstBoundStateCreate(Unbounded, NULL); return b; } void fstStreamBuilderDestroy(FstStreamBuilder *b) { fstSliceDestroy(&b->min->data); - tfree(b->min); + tfree(b->min); fstSliceDestroy(&b->max->data); tfree(b->max); free(b); } FstStreamBuilder *fstStreamBuilderRange(FstStreamBuilder *b, FstSlice *val, RangeType type) { - if (b == NULL) { return NULL; } + if (b == NULL) { + return NULL; + } if (type == GE) { b->min->type = Included; @@ -1458,9 +1424,3 @@ FstStreamBuilder *fstStreamBuilderRange(FstStreamBuilder *b, FstSlice *val, Rang } return b; } - - - - - - diff --git a/source/libs/index/src/index_fst_automation.c b/source/libs/index/src/index_fst_automation.c index 07ad45079b..cf3165709c 100644 --- a/source/libs/index/src/index_fst_automation.c +++ b/source/libs/index/src/index_fst_automation.c @@ -15,44 +15,49 @@ #include "index_fst_automation.h" - StartWithStateValue *startWithStateValueCreate(StartWithStateKind kind, ValueType ty, void *val) { StartWithStateValue *nsv = calloc(1, sizeof(StartWithStateValue)); - if (nsv == NULL) { return NULL; } + if (nsv == NULL) { + return NULL; + } nsv->kind = kind; nsv->type = ty; if (ty == FST_INT) { nsv->val = *(int *)val; } else if (ty == FST_CHAR) { - size_t len = strlen((char *)val); - nsv->ptr = (char *)calloc(1, len + 1); + size_t len = strlen((char *)val); + nsv->ptr = (char *)calloc(1, len + 1); memcpy(nsv->ptr, val, len); } else if (ty == FST_ARRAY) { - //TODO, - //nsv->arr = taosArrayFromList() + // TODO, + // nsv->arr = taosArrayFromList() } return nsv; } void startWithStateValueDestroy(void *val) { StartWithStateValue *sv = (StartWithStateValue *)val; - if (sv == NULL) { return; } + if (sv == NULL) { + return; + } if (sv->type == FST_INT) { - // + // } else if (sv->type == FST_CHAR) { free(sv->ptr); } else if (sv->type == FST_ARRAY) { taosArrayDestroy(sv->arr); } - free(sv); + free(sv); } StartWithStateValue *startWithStateValueDump(StartWithStateValue *sv) { StartWithStateValue *nsv = calloc(1, sizeof(StartWithStateValue)); - if (nsv == NULL) { return NULL; } + if (nsv == NULL) { + return NULL; + } nsv->kind = sv->kind; - nsv->type= sv->type; + nsv->type = sv->type; if (nsv->type == FST_INT) { nsv->val = sv->val; } else if (nsv->type == FST_CHAR) { @@ -64,93 +69,67 @@ StartWithStateValue *startWithStateValueDump(StartWithStateValue *sv) { return nsv; } - // prefix query, impl later -static void* prefixStart(AutomationCtx *ctx) { +static void *prefixStart(AutomationCtx *ctx) { StartWithStateValue *data = (StartWithStateValue *)(ctx->stdata); - return startWithStateValueDump(data); + return startWithStateValueDump(data); }; static bool prefixIsMatch(AutomationCtx *ctx, void *sv) { - StartWithStateValue* ssv = (StartWithStateValue *)sv; - return ssv->val == strlen(ctx->data); -} -static bool prefixCanMatch(AutomationCtx *ctx, void *sv) { - StartWithStateValue* ssv = (StartWithStateValue *)sv; - return ssv->val >= 0; + StartWithStateValue *ssv = (StartWithStateValue *)sv; + return ssv->val == strlen(ctx->data); } -static bool prefixWillAlwaysMatch(AutomationCtx *ctx, void *state) { - return true; +static bool prefixCanMatch(AutomationCtx *ctx, void *sv) { + StartWithStateValue *ssv = (StartWithStateValue *)sv; + return ssv->val >= 0; } -static void* prefixAccept(AutomationCtx *ctx, void *state, uint8_t byte) { - StartWithStateValue* ssv = (StartWithStateValue *)state; - if (ssv == NULL || ctx == NULL) {return NULL;} +static bool prefixWillAlwaysMatch(AutomationCtx *ctx, void *state) { return true; } +static void *prefixAccept(AutomationCtx *ctx, void *state, uint8_t byte) { + StartWithStateValue *ssv = (StartWithStateValue *)state; + if (ssv == NULL || ctx == NULL) { + return NULL; + } char *data = ctx->data; if (ssv->kind == Done) { return startWithStateValueCreate(Done, FST_INT, &ssv->val); } if ((strlen(data) > ssv->val) && data[ssv->val] == byte) { - int val = ssv->val + 1; + int val = ssv->val + 1; StartWithStateValue *nsv = startWithStateValueCreate(Running, FST_INT, &val); if (prefixIsMatch(ctx, nsv)) { nsv->kind = Done; } else { nsv->kind = Running; - } + } return nsv; - } - return NULL; -} -static void* prefixAcceptEof(AutomationCtx *ctx, void *state) { + } return NULL; } +static void *prefixAcceptEof(AutomationCtx *ctx, void *state) { return NULL; } // pattern query, impl later -static void* patternStart(AutomationCtx *ctx) { - return NULL; -} -static bool patternIsMatch(AutomationCtx *ctx, void *data) { - return true; -} -static bool patternCanMatch(AutomationCtx *ctx, void *data) { - return true; -} -static bool patternWillAlwaysMatch(AutomationCtx *ctx, void *state) { - return true; -} +static void *patternStart(AutomationCtx *ctx) { return NULL; } +static bool patternIsMatch(AutomationCtx *ctx, void *data) { return true; } +static bool patternCanMatch(AutomationCtx *ctx, void *data) { return true; } +static bool patternWillAlwaysMatch(AutomationCtx *ctx, void *state) { return true; } -static void* patternAccept(AutomationCtx *ctx, void *state, uint8_t byte) { - return NULL; -} +static void *patternAccept(AutomationCtx *ctx, void *state, uint8_t byte) { return NULL; } -static void* patternAcceptEof(AutomationCtx *ctx, void *state) { - return NULL; -} +static void *patternAcceptEof(AutomationCtx *ctx, void *state) { return NULL; } -AutomationFunc automFuncs[] = {{ - prefixStart, - prefixIsMatch, - prefixCanMatch, - prefixWillAlwaysMatch, - prefixAccept, - prefixAcceptEof - }, - { - patternStart, - patternIsMatch, - patternCanMatch, - patternWillAlwaysMatch, - patternAccept, - patternAcceptEof - } - // add more search type +AutomationFunc automFuncs[] = { + {prefixStart, prefixIsMatch, prefixCanMatch, prefixWillAlwaysMatch, prefixAccept, prefixAcceptEof}, + {patternStart, patternIsMatch, patternCanMatch, patternWillAlwaysMatch, patternAccept, patternAcceptEof} + // add more search type }; -AutomationCtx* automCtxCreate(void *data,AutomationType atype) { +AutomationCtx *automCtxCreate(void *data, AutomationType atype) { AutomationCtx *ctx = calloc(1, sizeof(AutomationCtx)); - if (ctx == NULL) { return NULL; } + if (ctx == NULL) { + return NULL; + } StartWithStateValue *sv = NULL; if (atype == AUTOMATION_PREFIX) { @@ -158,22 +137,21 @@ AutomationCtx* automCtxCreate(void *data,AutomationType atype) { sv = startWithStateValueCreate(Running, FST_INT, &val); ctx->stdata = (void *)sv; } else if (atype == AUTMMATION_MATCH) { - } else { // add more search type } - char* src = (char *)data; + char * src = (char *)data; size_t len = strlen(src); - char* dst = (char *)malloc(len * sizeof(char) + 1); + char * dst = (char *)malloc(len * sizeof(char) + 1); memcpy(dst, src, len); dst[len] = 0; - - ctx->data = dst; - ctx->type = atype; - ctx->stdata = (void *)sv; - return ctx; -} + + ctx->data = dst; + ctx->type = atype; + ctx->stdata = (void *)sv; + return ctx; +} void automCtxDestroy(AutomationCtx *ctx) { startWithStateValueDestroy(ctx->stdata); free(ctx->data); diff --git a/source/libs/index/src/index_fst_common.c b/source/libs/index/src/index_fst_common.c index 97fb88d60e..ee3b07713d 100644 --- a/source/libs/index/src/index_fst_common.c +++ b/source/libs/index/src/index_fst_common.c @@ -12,296 +12,522 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ - -#include "tutil.h" +#include "index_fst_common.h" const uint8_t COMMON_INPUTS[] = { - 84, // '\x00' - 85, // '\x01' - 86, // '\x02' - 87, // '\x03' - 88, // '\x04' - 89, // '\x05' - 90, // '\x06' - 91, // '\x07' - 92, // '\x08' - 93, // '\t' - 94, // '\n' - 95, // '\x0b' - 96, // '\x0c' - 97, // '\r' - 98, // '\x0e' - 99, // '\x0f' - 100, // '\x10' - 101, // '\x11' - 102, // '\x12' - 103, // '\x13' - 104, // '\x14' - 105, // '\x15' - 106, // '\x16' - 107, // '\x17' - 108, // '\x18' - 109, // '\x19' - 110, // '\x1a' - 111, // '\x1b' - 112, // '\x1c' - 113, // '\x1d' - 114, // '\x1e' - 115, // '\x1f' - 116, // ' ' - 80, // '!' - 117, // '"' - 118, // '#' - 79, // '$' - 39, // '%' - 30, // '&' - 81, // "'" - 75, // '(' - 74, // ')' - 82, // '*' - 57, // '+' - 66, // ',' - 16, // '-' - 12, // '.' - 2, // '/' - 19, // '0' - 20, // '1' - 21, // '2' - 27, // '3' - 32, // '4' - 29, // '5' - 35, // '6' - 36, // '7' - 37, // '8' - 34, // '9' - 24, // ':' - 73, // ';' - 119, // '<' - 23, // '=' - 120, // '>' - 40, // '?' - 83, // '@' - 44, // 'A' - 48, // 'B' - 42, // 'C' - 43, // 'D' - 49, // 'E' - 46, // 'F' - 62, // 'G' - 61, // 'H' - 47, // 'I' - 69, // 'J' - 68, // 'K' - 58, // 'L' - 56, // 'M' - 55, // 'N' - 59, // 'O' - 51, // 'P' - 72, // 'Q' - 54, // 'R' - 45, // 'S' - 52, // 'T' - 64, // 'U' - 65, // 'V' - 63, // 'W' - 71, // 'X' - 67, // 'Y' - 70, // 'Z' - 77, // '[' - 121, // '\\' - 78, // ']' - 122, // '^' - 31, // '_' - 123, // '`' - 4, // 'a' - 25, // 'b' - 9, // 'c' - 17, // 'd' - 1, // 'e' - 26, // 'f' - 22, // 'g' - 13, // 'h' - 7, // 'i' - 50, // 'j' - 38, // 'k' - 14, // 'l' - 15, // 'm' - 10, // 'n' - 3, // 'o' - 8, // 'p' - 60, // 'q' - 6, // 'r' - 5, // 's' - 0, // 't' - 18, // 'u' - 33, // 'v' - 11, // 'w' - 41, // 'x' - 28, // 'y' - 53, // 'z' - 124, // '{' - 125, // '|' - 126, // '}' - 76, // '~' - 127, // '\x7f' - 128, // '\x80' - 129, // '\x81' - 130, // '\x82' - 131, // '\x83' - 132, // '\x84' - 133, // '\x85' - 134, // '\x86' - 135, // '\x87' - 136, // '\x88' - 137, // '\x89' - 138, // '\x8a' - 139, // '\x8b' - 140, // '\x8c' - 141, // '\x8d' - 142, // '\x8e' - 143, // '\x8f' - 144, // '\x90' - 145, // '\x91' - 146, // '\x92' - 147, // '\x93' - 148, // '\x94' - 149, // '\x95' - 150, // '\x96' - 151, // '\x97' - 152, // '\x98' - 153, // '\x99' - 154, // '\x9a' - 155, // '\x9b' - 156, // '\x9c' - 157, // '\x9d' - 158, // '\x9e' - 159, // '\x9f' - 160, // '\xa0' - 161, // '¡' - 162, // '¢' - 163, // '£' - 164, // '¤' - 165, // '¥' - 166, // '¦' - 167, // '§' - 168, // '¨' - 169, // '©' - 170, // 'ª' - 171, // '«' - 172, // '¬' - 173, // '\xad' - 174, // '®' - 175, // '¯' - 176, // '°' - 177, // '±' - 178, // '²' - 179, // '³' - 180, // '´' - 181, // 'µ' - 182, // '¶' - 183, // '·' - 184, // '¸' - 185, // '¹' - 186, // 'º' - 187, // '»' - 188, // '¼' - 189, // '½' - 190, // '¾' - 191, // '¿' - 192, // 'À' - 193, // 'Á' - 194, // 'Â' - 195, // 'Ã' - 196, // 'Ä' - 197, // 'Å' - 198, // 'Æ' - 199, // 'Ç' - 200, // 'È' - 201, // 'É' - 202, // 'Ê' - 203, // 'Ë' - 204, // 'Ì' - 205, // 'Í' - 206, // 'Î' - 207, // 'Ï' - 208, // 'Ð' - 209, // 'Ñ' - 210, // 'Ò' - 211, // 'Ó' - 212, // 'Ô' - 213, // 'Õ' - 214, // 'Ö' - 215, // '×' - 216, // 'Ø' - 217, // 'Ù' - 218, // 'Ú' - 219, // 'Û' - 220, // 'Ü' - 221, // 'Ý' - 222, // 'Þ' - 223, // 'ß' - 224, // 'à' - 225, // 'á' - 226, // 'â' - 227, // 'ã' - 228, // 'ä' - 229, // 'å' - 230, // 'æ' - 231, // 'ç' - 232, // 'è' - 233, // 'é' - 234, // 'ê' - 235, // 'ë' - 236, // 'ì' - 237, // 'í' - 238, // 'î' - 239, // 'ï' - 240, // 'ð' - 241, // 'ñ' - 242, // 'ò' - 243, // 'ó' - 244, // 'ô' - 245, // 'õ' - 246, // 'ö' - 247, // '÷' - 248, // 'ø' - 249, // 'ù' - 250, // 'ú' - 251, // 'û' - 252, // 'ü' - 253, // 'ý' - 254, // 'þ' - 255, // 'ÿ' + 84, // '\x00' + 85, // '\x01' + 86, // '\x02' + 87, // '\x03' + 88, // '\x04' + 89, // '\x05' + 90, // '\x06' + 91, // '\x07' + 92, // '\x08' + 93, // '\t' + 94, // '\n' + 95, // '\x0b' + 96, // '\x0c' + 97, // '\r' + 98, // '\x0e' + 99, // '\x0f' + 100, // '\x10' + 101, // '\x11' + 102, // '\x12' + 103, // '\x13' + 104, // '\x14' + 105, // '\x15' + 106, // '\x16' + 107, // '\x17' + 108, // '\x18' + 109, // '\x19' + 110, // '\x1a' + 111, // '\x1b' + 112, // '\x1c' + 113, // '\x1d' + 114, // '\x1e' + 115, // '\x1f' + 116, // ' ' + 80, // '!' + 117, // '"' + 118, // '#' + 79, // '$' + 39, // '%' + 30, // '&' + 81, // "'" + 75, // '(' + 74, // ')' + 82, // '*' + 57, // '+' + 66, // ',' + 16, // '-' + 12, // '.' + 2, // '/' + 19, // '0' + 20, // '1' + 21, // '2' + 27, // '3' + 32, // '4' + 29, // '5' + 35, // '6' + 36, // '7' + 37, // '8' + 34, // '9' + 24, // ':' + 73, // ';' + 119, // '<' + 23, // '=' + 120, // '>' + 40, // '?' + 83, // '@' + 44, // 'A' + 48, // 'B' + 42, // 'C' + 43, // 'D' + 49, // 'E' + 46, // 'F' + 62, // 'G' + 61, // 'H' + 47, // 'I' + 69, // 'J' + 68, // 'K' + 58, // 'L' + 56, // 'M' + 55, // 'N' + 59, // 'O' + 51, // 'P' + 72, // 'Q' + 54, // 'R' + 45, // 'S' + 52, // 'T' + 64, // 'U' + 65, // 'V' + 63, // 'W' + 71, // 'X' + 67, // 'Y' + 70, // 'Z' + 77, // '[' + 121, // '\\' + 78, // ']' + 122, // '^' + 31, // '_' + 123, // '`' + 4, // 'a' + 25, // 'b' + 9, // 'c' + 17, // 'd' + 1, // 'e' + 26, // 'f' + 22, // 'g' + 13, // 'h' + 7, // 'i' + 50, // 'j' + 38, // 'k' + 14, // 'l' + 15, // 'm' + 10, // 'n' + 3, // 'o' + 8, // 'p' + 60, // 'q' + 6, // 'r' + 5, // 's' + 0, // 't' + 18, // 'u' + 33, // 'v' + 11, // 'w' + 41, // 'x' + 28, // 'y' + 53, // 'z' + 124, // '{' + 125, // '|' + 126, // '}' + 76, // '~' + 127, // '\x7f' + 128, // '\x80' + 129, // '\x81' + 130, // '\x82' + 131, // '\x83' + 132, // '\x84' + 133, // '\x85' + 134, // '\x86' + 135, // '\x87' + 136, // '\x88' + 137, // '\x89' + 138, // '\x8a' + 139, // '\x8b' + 140, // '\x8c' + 141, // '\x8d' + 142, // '\x8e' + 143, // '\x8f' + 144, // '\x90' + 145, // '\x91' + 146, // '\x92' + 147, // '\x93' + 148, // '\x94' + 149, // '\x95' + 150, // '\x96' + 151, // '\x97' + 152, // '\x98' + 153, // '\x99' + 154, // '\x9a' + 155, // '\x9b' + 156, // '\x9c' + 157, // '\x9d' + 158, // '\x9e' + 159, // '\x9f' + 160, // '\xa0' + 161, // '¡' + 162, // '¢' + 163, // '£' + 164, // '¤' + 165, // '¥' + 166, // '¦' + 167, // '§' + 168, // '¨' + 169, // '©' + 170, // 'ª' + 171, // '«' + 172, // '¬' + 173, // '\xad' + 174, // '®' + 175, // '¯' + 176, // '°' + 177, // '±' + 178, // '²' + 179, // '³' + 180, // '´' + 181, // 'µ' + 182, // '¶' + 183, // '·' + 184, // '¸' + 185, // '¹' + 186, // 'º' + 187, // '»' + 188, // '¼' + 189, // '½' + 190, // '¾' + 191, // '¿' + 192, // 'À' + 193, // 'Á' + 194, // 'Â' + 195, // 'Ã' + 196, // 'Ä' + 197, // 'Å' + 198, // 'Æ' + 199, // 'Ç' + 200, // 'È' + 201, // 'É' + 202, // 'Ê' + 203, // 'Ë' + 204, // 'Ì' + 205, // 'Í' + 206, // 'Î' + 207, // 'Ï' + 208, // 'Ð' + 209, // 'Ñ' + 210, // 'Ò' + 211, // 'Ó' + 212, // 'Ô' + 213, // 'Õ' + 214, // 'Ö' + 215, // '×' + 216, // 'Ø' + 217, // 'Ù' + 218, // 'Ú' + 219, // 'Û' + 220, // 'Ü' + 221, // 'Ý' + 222, // 'Þ' + 223, // 'ß' + 224, // 'à' + 225, // 'á' + 226, // 'â' + 227, // 'ã' + 228, // 'ä' + 229, // 'å' + 230, // 'æ' + 231, // 'ç' + 232, // 'è' + 233, // 'é' + 234, // 'ê' + 235, // 'ë' + 236, // 'ì' + 237, // 'í' + 238, // 'î' + 239, // 'ï' + 240, // 'ð' + 241, // 'ñ' + 242, // 'ò' + 243, // 'ó' + 244, // 'ô' + 245, // 'õ' + 246, // 'ö' + 247, // '÷' + 248, // 'ø' + 249, // 'ù' + 250, // 'ú' + 251, // 'û' + 252, // 'ü' + 253, // 'ý' + 254, // 'þ' + 255, // 'ÿ' }; const char COMMON_INPUTS_INV[] = { - 't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w', - '.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=', - ':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6', - '7', '8', 'k', '%', '?', 'x', 'C', 'D', 'A', 'S', 'F', 'I', - 'B', 'E', 'j', 'P', 'T', 'z', 'R', 'N', 'M', '+', 'L', 'O', - 'q', 'H', 'G', 'W', 'U', 'V', ',', 'Y', 'K', 'J', 'Z', 'X', - 'Q', ';', ')', '(', '~', '[', ']', '$', '!', '\'', '*', '@', - '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', - '\x08', '\t', '\n', '\x0b', '\x0c', '\r', '\x0e', '\x0f', '\x10', - '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', - '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', ' ', '"', - '#', '<', '>', '\\', '^', '`', '{', '|', '}','\x7f','\x80', - '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', '\x88', - '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', '\x90', - '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', '\x98', - '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', '\xa0', - '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', '\xa8', - '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', '\xb0', - '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', '\xb8', - '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', '\xc0', - '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', '\xc8', - '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', '\xd0', - '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', '\xd8', - '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', '\xe0', - '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', '\xe8', - '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', '\xf0', - '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', '\xf8', - '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', + 't', + 'e', + '/', + 'o', + 'a', + 's', + 'r', + 'i', + 'p', + 'c', + 'n', + 'w', + '.', + 'h', + 'l', + 'm', + '-', + 'd', + 'u', + '0', + '1', + '2', + 'g', + '=', + ':', + 'b', + 'f', + '3', + 'y', + '5', + '&', + '_', + '4', + 'v', + '9', + '6', + '7', + '8', + 'k', + '%', + '?', + 'x', + 'C', + 'D', + 'A', + 'S', + 'F', + 'I', + 'B', + 'E', + 'j', + 'P', + 'T', + 'z', + 'R', + 'N', + 'M', + '+', + 'L', + 'O', + 'q', + 'H', + 'G', + 'W', + 'U', + 'V', + ',', + 'Y', + 'K', + 'J', + 'Z', + 'X', + 'Q', + ';', + ')', + '(', + '~', + '[', + ']', + '$', + '!', + '\'', + '*', + '@', + '\x00', + '\x01', + '\x02', + '\x03', + '\x04', + '\x05', + '\x06', + '\x07', + '\x08', + '\t', + '\n', + '\x0b', + '\x0c', + '\r', + '\x0e', + '\x0f', + '\x10', + '\x11', + '\x12', + '\x13', + '\x14', + '\x15', + '\x16', + '\x17', + '\x18', + '\x19', + '\x1a', + '\x1b', + '\x1c', + '\x1d', + '\x1e', + '\x1f', + ' ', + '"', + '#', + '<', + '>', + '\\', + '^', + '`', + '{', + '|', + '}', + '\x7f', + '\x80', + '\x81', + '\x82', + '\x83', + '\x84', + '\x85', + '\x86', + '\x87', + '\x88', + '\x89', + '\x8a', + '\x8b', + '\x8c', + '\x8d', + '\x8e', + '\x8f', + '\x90', + '\x91', + '\x92', + '\x93', + '\x94', + '\x95', + '\x96', + '\x97', + '\x98', + '\x99', + '\x9a', + '\x9b', + '\x9c', + '\x9d', + '\x9e', + '\x9f', + '\xa0', + '\xa1', + '\xa2', + '\xa3', + '\xa4', + '\xa5', + '\xa6', + '\xa7', + '\xa8', + '\xa9', + '\xaa', + '\xab', + '\xac', + '\xad', + '\xae', + '\xaf', + '\xb0', + '\xb1', + '\xb2', + '\xb3', + '\xb4', + '\xb5', + '\xb6', + '\xb7', + '\xb8', + '\xb9', + '\xba', + '\xbb', + '\xbc', + '\xbd', + '\xbe', + '\xbf', + '\xc0', + '\xc1', + '\xc2', + '\xc3', + '\xc4', + '\xc5', + '\xc6', + '\xc7', + '\xc8', + '\xc9', + '\xca', + '\xcb', + '\xcc', + '\xcd', + '\xce', + '\xcf', + '\xd0', + '\xd1', + '\xd2', + '\xd3', + '\xd4', + '\xd5', + '\xd6', + '\xd7', + '\xd8', + '\xd9', + '\xda', + '\xdb', + '\xdc', + '\xdd', + '\xde', + '\xdf', + '\xe0', + '\xe1', + '\xe2', + '\xe3', + '\xe4', + '\xe5', + '\xe6', + '\xe7', + '\xe8', + '\xe9', + '\xea', + '\xeb', + '\xec', + '\xed', + '\xee', + '\xef', + '\xf0', + '\xf1', + '\xf2', + '\xf3', + '\xf4', + '\xf5', + '\xf6', + '\xf7', + '\xf8', + '\xf9', + '\xfa', + '\xfb', + '\xfc', + '\xfd', + '\xfe', + '\xff', }; - diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c index f0a7b04407..824021c9e3 100644 --- a/source/libs/index/src/index_fst_counting_writer.c +++ b/source/libs/index/src/index_fst_counting_writer.c @@ -12,10 +12,10 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -#include "tutil.h" +#include "index_fst_counting_writer.h" #include "indexInt.h" #include "index_fst_util.h" -#include "index_fst_counting_writer.h" +#include "tutil.h" static int writeCtxDoWrite(WriterCtx *ctx, uint8_t *buf, int len) { if (ctx->offset + len > ctx->limit) { @@ -25,13 +25,13 @@ static int writeCtxDoWrite(WriterCtx *ctx, uint8_t *buf, int len) { if (ctx->type == TFile) { assert(len == tfWrite(ctx->file.fd, buf, len)); } else { - memcpy(ctx->mem.buf+ ctx->offset, buf, len); - } + memcpy(ctx->mem.buf + ctx->offset, buf, len); + } ctx->offset += len; return len; } static int writeCtxDoRead(WriterCtx *ctx, uint8_t *buf, int len) { - int nRead = 0; + int nRead = 0; if (ctx->type == TFile) { nRead = tfRead(ctx->file.fd, buf, len); } else { @@ -40,110 +40,116 @@ static int writeCtxDoRead(WriterCtx *ctx, uint8_t *buf, int len) { ctx->offset += nRead; return nRead; -} +} static int writeCtxDoFlush(WriterCtx *ctx) { if (ctx->type == TFile) { - //tfFsync(ctx->fd); - //tfFlush(ctx->file.fd); + // tfFsync(ctx->fd); + // tfFlush(ctx->file.fd); } else { // do nothing } return 1; } -WriterCtx* writerCtxCreate(WriterType type, const char *path, bool readOnly, int32_t capacity) { +WriterCtx *writerCtxCreate(WriterType type, const char *path, bool readOnly, int32_t capacity) { WriterCtx *ctx = calloc(1, sizeof(WriterCtx)); - if (ctx == NULL) { return NULL; } + if (ctx == NULL) { + return NULL; + } ctx->type = type; if (ctx->type == TFile) { // ugly code, refactor later ctx->file.readOnly = readOnly; if (readOnly == false) { - ctx->file.fd = tfOpenCreateWriteAppend(tmpFile); + ctx->file.fd = tfOpenCreateWriteAppend(tmpFile); } else { ctx->file.fd = tfOpenReadWrite(tmpFile); - } + } if (ctx->file.fd < 0) { goto END; - indexError("open file error %d", errno); + indexError("open file error %d", errno); } } else if (ctx->type == TMemory) { - ctx->mem.buf = calloc(1, sizeof(char) * capacity); - ctx->mem.capa = capacity; - } + ctx->mem.buf = calloc(1, sizeof(char) * capacity); + ctx->mem.capa = capacity; + } ctx->write = writeCtxDoWrite; - ctx->read = writeCtxDoRead; + ctx->read = writeCtxDoRead; ctx->flush = writeCtxDoFlush; ctx->offset = 0; - ctx->limit = capacity; + ctx->limit = capacity; return ctx; END: - if (ctx->type == TMemory) { free(ctx->mem.buf); } + if (ctx->type == TMemory) { + free(ctx->mem.buf); + } free(ctx); } void writerCtxDestroy(WriterCtx *ctx) { if (ctx->type == TMemory) { free(ctx->mem.buf); } else { - tfClose(ctx->file.fd); + tfClose(ctx->file.fd); } free(ctx); } - FstCountingWriter *fstCountingWriterCreate(void *wrt) { - FstCountingWriter *cw = calloc(1, sizeof(FstCountingWriter)); - if (cw == NULL) { return NULL; } - + FstCountingWriter *cw = calloc(1, sizeof(FstCountingWriter)); + if (cw == NULL) { + return NULL; + } + cw->wrt = wrt; - //(void *)(writerCtxCreate(TFile, readOnly)); - return cw; + //(void *)(writerCtxCreate(TFile, readOnly)); + return cw; } void fstCountingWriterDestroy(FstCountingWriter *cw) { - // free wrt object: close fd or free mem + // free wrt object: close fd or free mem fstCountingWriterFlush(cw); - //writerCtxDestroy((WriterCtx *)(cw->wrt)); + // writerCtxDestroy((WriterCtx *)(cw->wrt)); free(cw); } int fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t len) { - if (write == NULL) { return 0; } - // update checksum + if (write == NULL) { + return 0; + } + // update checksum // write data to file/socket or mem WriterCtx *ctx = write->wrt; - int nWrite = ctx->write(ctx, buf, len); + int nWrite = ctx->write(ctx, buf, len); assert(nWrite == len); write->count += len; - return len; -} + return len; +} int fstCountingWriterRead(FstCountingWriter *write, uint8_t *buf, uint32_t len) { - if (write == NULL) { return 0; } + if (write == NULL) { + return 0; + } WriterCtx *ctx = write->wrt; - int nRead = ctx->read(ctx, buf, len); - //assert(nRead == len); - return nRead; -} - -uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter *write) { - - return 0; + int nRead = ctx->read(ctx, buf, len); + // assert(nRead == len); + return nRead; } -int fstCountingWriterFlush(FstCountingWriter *write) { + +uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter *write) { return 0; } +int fstCountingWriterFlush(FstCountingWriter *write) { WriterCtx *ctx = write->wrt; ctx->flush(ctx); - //write->wtr->flush + // write->wtr->flush return 1; } -void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes) { +void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes) { assert(1 <= nBytes && nBytes <= 8); - uint8_t *buf = calloc(8, sizeof(uint8_t)); + uint8_t *buf = calloc(8, sizeof(uint8_t)); for (uint8_t i = 0; i < nBytes; i++) { - buf[i] = (uint8_t)n; + buf[i] = (uint8_t)n; n = n >> 8; } fstCountingWriterWrite(writer, buf, nBytes); @@ -154,7 +160,5 @@ void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n) { uint8_t nBytes = packSize(n); fstCountingWriterPackUintIn(writer, n, nBytes); - return nBytes; -} - - + return nBytes; +} diff --git a/source/libs/index/src/index_fst_node.c b/source/libs/index/src/index_fst_node.c index 5abe8ad5a0..084f280bc3 100644 --- a/source/libs/index/src/index_fst_node.c +++ b/source/libs/index/src/index_fst_node.c @@ -16,30 +16,34 @@ FstBuilderNode *fstBuilderNodeDefault() { FstBuilderNode *bn = malloc(sizeof(FstBuilderNode)); - bn->isFinal = false; - bn->finalOutput = 0; - bn->trans = taosArrayInit(16, sizeof(FstTransition)); + bn->isFinal = false; + bn->finalOutput = 0; + bn->trans = taosArrayInit(16, sizeof(FstTransition)); return bn; } void fstBuilderNodeDestroy(FstBuilderNode *node) { - if (node == NULL) { return; } + if (node == NULL) { + return; + } taosArrayDestroy(node->trans); free(node); -} +} bool fstBuilderNodeEqual(FstBuilderNode *n1, FstBuilderNode *n2) { - if (n1 == n2) { return true; } - if (n1 == NULL || n2 == NULL ) { + if (n1 == n2) { + return true; + } + if (n1 == NULL || n2 == NULL) { return false; } if (n1->isFinal != n2->isFinal || n1->finalOutput != n2->finalOutput) { return false; } - size_t s1 = n1->trans? taosArrayGetSize(n1->trans): 0; - size_t s2 = n2->trans? taosArrayGetSize(n2->trans): 0; - if (s1 != s2) { + size_t s1 = n1->trans ? taosArrayGetSize(n1->trans) : 0; + size_t s2 = n2->trans ? taosArrayGetSize(n2->trans) : 0; + if (s1 != s2) { return false; } for (size_t i = 0; i < s1; i++) { @@ -47,69 +51,70 @@ bool fstBuilderNodeEqual(FstBuilderNode *n1, FstBuilderNode *n2) { FstTransition *t2 = taosArrayGet(n2->trans, i); if (t1->inp != t2->inp || t1->out != t2->out || t1->addr != t2->addr) { return false; - } + } } - + return true; } FstBuilderNode *fstBuilderNodeClone(FstBuilderNode *src) { - FstBuilderNode *node = malloc(sizeof(FstBuilderNode)); - if (node == NULL) { return NULL; } + FstBuilderNode *node = malloc(sizeof(FstBuilderNode)); + if (node == NULL) { + return NULL; + } - // - size_t sz = taosArrayGetSize(src->trans); + // + size_t sz = taosArrayGetSize(src->trans); SArray *trans = taosArrayInit(sz, sizeof(FstTransition)); for (size_t i = 0; i < sz; i++) { FstTransition *tran = taosArrayGet(src->trans, i); - taosArrayPush(trans, tran); + taosArrayPush(trans, tran); } - node->trans = trans; + node->trans = trans; node->isFinal = src->isFinal; node->finalOutput = src->finalOutput; return node; - } -// not destroy src, User's bussiness +// not destroy src, User's bussiness void fstBuilderNodeCloneFrom(FstBuilderNode *dst, FstBuilderNode *src) { - if (dst == NULL || src == NULL) { return; } + if (dst == NULL || src == NULL) { + return; + } - dst->isFinal = src->isFinal; + dst->isFinal = src->isFinal; dst->finalOutput = src->finalOutput; - //release free avoid mem leak - taosArrayDestroy(dst->trans); + // release free avoid mem leak + taosArrayDestroy(dst->trans); size_t sz = taosArrayGetSize(src->trans); - dst->trans = taosArrayInit(sz, sizeof(FstTransition)); + dst->trans = taosArrayInit(sz, sizeof(FstTransition)); for (size_t i = 0; i < sz; i++) { - FstTransition *trn = taosArrayGet(src->trans, i); + FstTransition *trn = taosArrayGet(src->trans, i); taosArrayPush(dst->trans, trn); - } + } } +// bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr +// startAddr) { -//bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr) { - - //size_t sz = taosArrayGetSize(b->trans); - //assert(sz < 256); - //if (FST_BUILDER_NODE_IS_FINAL(b) - // && FST_BUILDER_NODE_TRANS_ISEMPTY(b) - // && FST_BUILDER_NODE_FINALOUTPUT_ISZERO(b)) { - // return true; - //} else if (sz != 1 || b->isFinal) { - // // AnyTrans->Compile(w, addr, node); - //} else { - // FstTransition *tran = taosArrayGet(b->trans, 0); - // if (tran->addr == lastAddr && tran->out == 0) { - // //OneTransNext::compile(w, lastAddr, tran->inp); - // return true; - // } else { - // //OneTrans::Compile(w, lastAddr, *tran); - // return true; - // } - //} - //return true; -//} - - +// size_t sz = taosArrayGetSize(b->trans); +// assert(sz < 256); +// if (FST_BUILDER_NODE_IS_FINAL(b) +// && FST_BUILDER_NODE_TRANS_ISEMPTY(b) +// && FST_BUILDER_NODE_FINALOUTPUT_ISZERO(b)) { +// return true; +//} else if (sz != 1 || b->isFinal) { +// // AnyTrans->Compile(w, addr, node); +//} else { +// FstTransition *tran = taosArrayGet(b->trans, 0); +// if (tran->addr == lastAddr && tran->out == 0) { +// //OneTransNext::compile(w, lastAddr, tran->inp); +// return true; +// } else { +// //OneTrans::Compile(w, lastAddr, *tran); +// return true; +// } +//} +// return true; +//} diff --git a/source/libs/index/src/index_fst_registry.c b/source/libs/index/src/index_fst_registry.c index 8fb0dbfcaa..7bb2e72230 100644 --- a/source/libs/index/src/index_fst_registry.c +++ b/source/libs/index/src/index_fst_registry.c @@ -15,33 +15,33 @@ #include "index_fst_registry.h" - uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *bNode) { - //TODO(yihaoDeng): refactor later + // TODO(yihaoDeng): refactor later const uint64_t FNV_PRIME = 1099511628211; - uint64_t h = 14695981039346656037u; + uint64_t h = 14695981039346656037u; - h = (h ^ (uint64_t)bNode->isFinal) * FNV_PRIME; + h = (h ^ (uint64_t)bNode->isFinal) * FNV_PRIME; h = (h ^ (bNode)->finalOutput) * FNV_PRIME; - uint32_t sz = (uint32_t)taosArrayGetSize(bNode->trans); + uint32_t sz = (uint32_t)taosArrayGetSize(bNode->trans); for (uint32_t i = 0; i < sz; i++) { FstTransition *trn = taosArrayGet(bNode->trans, i); - h = (h ^ (uint64_t)(trn->inp)) * FNV_PRIME; - h = (h ^ (uint64_t)(trn->out)) * FNV_PRIME; - h = (h ^ (uint64_t)(trn->addr))* FNV_PRIME; - } - return h %(registry->tableSize); - + h = (h ^ (uint64_t)(trn->inp)) * FNV_PRIME; + h = (h ^ (uint64_t)(trn->out)) * FNV_PRIME; + h = (h ^ (uint64_t)(trn->addr)) * FNV_PRIME; + } + return h % (registry->tableSize); } static void fstRegistryCellSwap(SArray *arr, uint32_t a, uint32_t b) { size_t sz = taosArrayGetSize(arr); - if (a >= sz || b >= sz) { return; } + if (a >= sz || b >= sz) { + return; + } - FstRegistryCell *cell1 = (FstRegistryCell *)taosArrayGet(arr, a); + FstRegistryCell *cell1 = (FstRegistryCell *)taosArrayGet(arr, a); FstRegistryCell *cell2 = (FstRegistryCell *)taosArrayGet(arr, b); - FstRegistryCell t = {.addr = cell1->addr, .node = cell1->node}; + FstRegistryCell t = {.addr = cell1->addr, .node = cell1->node}; cell1->addr = cell2->addr; cell1->node = cell2->node; @@ -52,49 +52,55 @@ static void fstRegistryCellSwap(SArray *arr, uint32_t a, uint32_t b) { } static void fstRegistryCellPromote(SArray *arr, uint32_t start, uint32_t end) { - size_t sz = taosArrayGetSize(arr); - if (start >= sz && end >= sz) {return; } - + size_t sz = taosArrayGetSize(arr); + if (start >= sz && end >= sz) { + return; + } + assert(start >= end); int32_t s = (int32_t)start; int32_t e = (int32_t)end; - while(s > e) { + while (s > e) { fstRegistryCellSwap(arr, s - 1, s); s -= 1; } } -FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize) { - FstRegistry *registry = malloc(sizeof(FstRegistry)); - if (registry == NULL) { return NULL ;} +FstRegistry *fstRegistryCreate(uint64_t tableSize, uint64_t mruSize) { + FstRegistry *registry = malloc(sizeof(FstRegistry)); + if (registry == NULL) { + return NULL; + } - uint64_t nCells = tableSize * mruSize; - SArray* tb = (SArray *)taosArrayInit(nCells, sizeof(FstRegistryCell)); - if (NULL == tb) { - free(registry); - return NULL; + uint64_t nCells = tableSize * mruSize; + SArray * tb = (SArray *)taosArrayInit(nCells, sizeof(FstRegistryCell)); + if (NULL == tb) { + free(registry); + return NULL; } for (uint64_t i = 0; i < nCells; i++) { - FstRegistryCell cell = {.addr = NONE_ADDRESS, .node = fstBuilderNodeDefault()}; - taosArrayPush(tb, &cell); + FstRegistryCell cell = {.addr = NONE_ADDRESS, .node = fstBuilderNodeDefault()}; + taosArrayPush(tb, &cell); } - - registry->table = tb; - registry->tableSize = tableSize; - registry->mruSize = mruSize; - return registry; + + registry->table = tb; + registry->tableSize = tableSize; + registry->mruSize = mruSize; + return registry; } void fstRegistryDestroy(FstRegistry *registry) { - if (registry == NULL) { return; } + if (registry == NULL) { + return; + } SArray *tb = registry->table; - size_t sz = taosArrayGetSize(tb); + size_t sz = taosArrayGetSize(tb); for (size_t i = 0; i < sz; i++) { - FstRegistryCell *cell = taosArrayGet(tb, i); - fstBuilderNodeDestroy(cell->node); + FstRegistryCell *cell = taosArrayGet(tb, i); + fstBuilderNodeDestroy(cell->node); } taosArrayDestroy(tb); free(registry); @@ -102,74 +108,70 @@ void fstRegistryDestroy(FstRegistry *registry) { FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode) { if (taosArrayGetSize(registry->table) <= 0) { - return NULL; - } + return NULL; + } uint64_t bucket = fstRegistryHash(registry, bNode); - uint64_t start = registry->mruSize * bucket; - uint64_t end = start + registry->mruSize; - + uint64_t start = registry->mruSize * bucket; + uint64_t end = start + registry->mruSize; + FstRegistryEntry *entry = malloc(sizeof(FstRegistryEntry)); if (end - start == 1) { - FstRegistryCell *cell = taosArrayGet(registry->table, start); - //cell->isNode && + FstRegistryCell *cell = taosArrayGet(registry->table, start); + // cell->isNode && if (cell->addr != NONE_ADDRESS && fstBuilderNodeEqual(cell->node, bNode)) { - entry->state = FOUND; - entry->addr = cell->addr ; - return entry; + entry->state = FOUND; + entry->addr = cell->addr; + return entry; } else { - fstBuilderNodeCloneFrom(cell->node, bNode); - entry->state = NOTFOUND; - entry->cell = cell; // copy or not + fstBuilderNodeCloneFrom(cell->node, bNode); + entry->state = NOTFOUND; + entry->cell = cell; // copy or not } } else if (end - start == 2) { - FstRegistryCell *cell1 = taosArrayGet(registry->table, start); + FstRegistryCell *cell1 = taosArrayGet(registry->table, start); if (cell1->addr != NONE_ADDRESS && fstBuilderNodeEqual(cell1->node, bNode)) { - entry->state = FOUND; - entry->addr = cell1->addr; + entry->state = FOUND; + entry->addr = cell1->addr; return entry; - } - FstRegistryCell *cell2 = taosArrayGet(registry->table, start + 1); + } + FstRegistryCell *cell2 = taosArrayGet(registry->table, start + 1); if (cell2->addr != NONE_ADDRESS && fstBuilderNodeEqual(cell2->node, bNode)) { - entry->state = FOUND; - entry->addr = cell2->addr; + entry->state = FOUND; + entry->addr = cell2->addr; // must swap here - fstRegistryCellSwap(registry->table, start, start + 1); - return entry; + fstRegistryCellSwap(registry->table, start, start + 1); + return entry; } - //clone from bNode, refactor later + // clone from bNode, refactor later fstBuilderNodeCloneFrom(cell2->node, bNode); fstRegistryCellSwap(registry->table, start, start + 1); FstRegistryCell *cCell = taosArrayGet(registry->table, start); - entry->state = NOTFOUND; - entry->cell = cCell; + entry->state = NOTFOUND; + entry->cell = cCell; } else { - uint32_t i = start; + uint32_t i = start; for (; i < end; i++) { FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, i); if (cell->addr != NONE_ADDRESS && fstBuilderNodeEqual(cell->node, bNode)) { - entry->state = FOUND; - entry->addr = cell->addr; + entry->state = FOUND; + entry->addr = cell->addr; fstRegistryCellPromote(registry->table, i, start); break; } - } + } if (i >= end) { - uint64_t last = end - 1; - FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, last); - //clone from bNode, refactor later - fstBuilderNodeCloneFrom(cell->node, bNode); + uint64_t last = end - 1; + FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, last); + // clone from bNode, refactor later + fstBuilderNodeCloneFrom(cell->node, bNode); fstRegistryCellPromote(registry->table, last, start); FstRegistryCell *cCell = taosArrayGet(registry->table, start); - entry->state = NOTFOUND; - entry->cell = cCell; + entry->state = NOTFOUND; + entry->cell = cCell; } - } + } return entry; } -void fstRegistryEntryDestroy(FstRegistryEntry *entry) { - free(entry); -} - - +void fstRegistryEntryDestroy(FstRegistryEntry *entry) { free(entry); } diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c index c933c6d23b..597f7cc61a 100644 --- a/source/libs/index/src/index_fst_util.c +++ b/source/libs/index/src/index_fst_util.c @@ -15,37 +15,32 @@ #include "index_fst_util.h" #include "index_fst_common.h" - - -//A sentinel value used to indicate an empty final state -const CompiledAddr EMPTY_ADDRESS = 0; +// A sentinel value used to indicate an empty final state +const CompiledAddr EMPTY_ADDRESS = 0; /// A sentinel value used to indicate an invalid state. -const CompiledAddr NONE_ADDRESS = 1; +const CompiledAddr NONE_ADDRESS = 1; // This version number is written to every finite state transducer created by // this crate. When a finite state transducer is read, its version number is // checked against this value. -const uint64_t VERSION = 3; -// The threshold (in number of transitions) at which an index is created for -// a node's transitions. This speeds up lookup time at the expense of FST size +const uint64_t VERSION = 3; +// The threshold (in number of transitions) at which an index is created for +// a node's transitions. This speeds up lookup time at the expense of FST size const uint64_t TRANS_INDEX_THRESHOLD = 32; - -//uint8_t commonInput(uint8_t idx) { +// uint8_t commonInput(uint8_t idx) { // if (idx == 0) { return -1; } // else { -// return COMMON_INPUTS_INV[idx - 1]; +// return COMMON_INPUTS_INV[idx - 1]; // } -//} +//} // -//uint8_t commonIdx(uint8_t v, uint8_t max) { +// uint8_t commonIdx(uint8_t v, uint8_t max) { // uint8_t v = ((uint16_t)tCOMMON_INPUTS[v] + 1)%256; // return v > max ? 0: v; //} - - uint8_t packSize(uint64_t n) { if (n < (1u << 8)) { return 1; @@ -71,17 +66,17 @@ uint64_t unpackUint64(uint8_t *ch, uint8_t sz) { for (uint8_t i = 0; i < sz; i++) { n = n | (ch[i] << (8 * i)); } - return n; + return n; } uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) { if (transAddr == EMPTY_ADDRESS) { - return packSize(EMPTY_ADDRESS); + return packSize(EMPTY_ADDRESS); } else { return packSize(nodeAddr - transAddr); - } -} + } +} CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr) { - uint64_t delta = unpackUint64(data, len); + uint64_t delta = unpackUint64(data, len); // delta_add = u64_to_usize if (delta == EMPTY_ADDRESS) { return EMPTY_ADDRESS; @@ -95,56 +90,53 @@ CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr) { FstSlice fstSliceCreate(uint8_t *data, uint64_t len) { FstString *str = (FstString *)malloc(sizeof(FstString)); - str->ref = 1; - str->len = len; + str->ref = 1; + str->len = len; str->data = malloc(len * sizeof(uint8_t)); memcpy(str->data, data, len); - + FstSlice s = {.str = str, .start = 0, .end = len - 1}; return s; -} +} // just shallow copy FstSlice fstSliceCopy(FstSlice *s, int32_t start, int32_t end) { FstString *str = s->str; str->ref++; - //uint8_t *buf = fstSliceData(s, &alen); - //start = buf + start - (buf - s->start); - //end = buf + end - (buf - s->start); + // uint8_t *buf = fstSliceData(s, &alen); + // start = buf + start - (buf - s->start); + // end = buf + end - (buf - s->start); FstSlice t = {.str = str, .start = start + s->start, .end = end + s->start}; return t; } FstSlice fstSliceDeepCopy(FstSlice *s, int32_t start, int32_t end) { - - int32_t tlen = end - start + 1; - int32_t slen; - uint8_t *data = fstSliceData(s, &slen); + int32_t tlen = end - start + 1; + int32_t slen; + uint8_t *data = fstSliceData(s, &slen); assert(tlen <= slen); - uint8_t *buf = malloc(sizeof(uint8_t) * tlen); + uint8_t *buf = malloc(sizeof(uint8_t) * tlen); memcpy(buf, data + start, tlen); - - FstString *str = malloc(sizeof(FstString)); + + FstString *str = malloc(sizeof(FstString)); str->data = buf; - str->len = tlen; - str->ref = 1; + str->len = tlen; + str->ref = 1; FstSlice ans; - ans.str = str; - ans.start = 0; - ans.end = tlen - 1; - return ans; -} -bool fstSliceIsEmpty(FstSlice *s) { - return s->str == NULL || s->str->len == 0 || s->start < 0 || s->end < 0; + ans.str = str; + ans.start = 0; + ans.end = tlen - 1; + return ans; } +bool fstSliceIsEmpty(FstSlice *s) { return s->str == NULL || s->str->len == 0 || s->start < 0 || s->end < 0; } uint8_t *fstSliceData(FstSlice *s, int32_t *size) { - FstString *str = s->str; - if (size != NULL) { + FstString *str = s->str; + if (size != NULL) { *size = s->end - s->start + 1; - } - return str->data + s->start; + } + return str->data + s->start; } void fstSliceDestroy(FstSlice *s) { FstString *str = s->str; @@ -152,40 +144,45 @@ void fstSliceDestroy(FstSlice *s) { if (str->ref <= 0) { free(str->data); free(str); - s->str = NULL; + s->str = NULL; } } int fstSliceCompare(FstSlice *a, FstSlice *b) { - int32_t alen, blen; - uint8_t *aBuf = fstSliceData(a, &alen); - uint8_t *bBuf = fstSliceData(b, &blen); + int32_t alen, blen; + uint8_t *aBuf = fstSliceData(a, &alen); + uint8_t *bBuf = fstSliceData(b, &blen); uint32_t i, j; for (i = 0, j = 0; i < alen && j < blen; i++, j++) { uint8_t x = aBuf[i]; uint8_t y = bBuf[j]; - if (x == y) { continue;} - else if (x < y) { return -1; } - else { return 1; }; + if (x == y) { + continue; + } else if (x < y) { + return -1; + } else { + return 1; + }; + } + if (i < alen) { + return 1; + } else if (j < blen) { + return -1; + } else { + return 0; } - if (i < alen) { return 1; } - else if (j < blen) { return -1; } - else { return 0; } -} +} -//FstStack* fstStackCreate(size_t elemSize, StackFreeElem freeFn) { +// FstStack* fstStackCreate(size_t elemSize, StackFreeElem freeFn) { // FstStack *s = calloc(1, sizeof(FstStack)); // if (s == NULL) { return NULL; } -// s-> -// s->freeFn -// +// s-> +// s->freeFn +// //} -//void *fstStackPush(FstStack *s, void *elem); -//void *fstStackTop(FstStack *s); -//size_t fstStackLen(FstStack *s); -//void *fstStackGetAt(FstStack *s, size_t i); -//void fstStackDestory(FstStack *); - - - +// void *fstStackPush(FstStack *s, void *elem); +// void *fstStackTop(FstStack *s); +// size_t fstStackLen(FstStack *s); +// void *fstStackGetAt(FstStack *s, size_t i); +// void fstStackDestory(FstStack *); diff --git a/source/libs/index/src/index_tfile.c b/source/libs/index/src/index_tfile.c index bacadba716..19e8ff6750 100644 --- a/source/libs/index/src/index_tfile.c +++ b/source/libs/index/src/index_tfile.c @@ -16,58 +16,60 @@ //#include //#include #include "index_tfile.h" +#include "index.h" #include "index_fst.h" +#include "index_fst_counting_writer.h" #include "index_util.h" #include "taosdef.h" -#include "index.h" -#include "index_fst_counting_writer.h" - -static FORCE_INLINE int tfileLoadHeader(WriterCtx *ctx, TFileReadHeader *header) { - //TODO simple tfile header later - char buf[TFILE_HADER_PRE_SIZE]; - char *p = buf; - int64_t nread = ctx->read(ctx, buf, TFILE_HADER_PRE_SIZE); +static FORCE_INLINE int tfileReadLoadHeader(TFileReader *reader) { + // TODO simple tfile header later + char buf[TFILE_HADER_PRE_SIZE]; + char * p = buf; + TFileReadHeader *header = &reader->header; + int64_t nread = reader->ctx->read(reader->ctx, buf, TFILE_HADER_PRE_SIZE); assert(nread == TFILE_HADER_PRE_SIZE); - + memcpy(&header->suid, p, sizeof(header->suid)); p += sizeof(header->suid); memcpy(&header->version, p, sizeof(header->version)); p += sizeof(header->version); - int32_t colLen = 0; + int32_t colLen = 0; memcpy(&colLen, p, sizeof(colLen)); - assert(colLen < sizeof(header->colName)); - nread = ctx->read(ctx, header->colName, colLen); + assert(colLen < sizeof(header->colName)); + nread = reader->ctx->read(reader->ctx, header->colName, colLen); assert(nread == colLen); - nread = ctx->read(ctx, &header->colType, sizeof(header->colType)); - return 0; + nread = reader->ctx->read(reader->ctx, &header->colType, sizeof(header->colType)); + return 0; }; static int tfileGetFileList(const char *path, SArray *result) { - DIR *dir = opendir(path); - if (NULL == dir) { return -1; } + DIR *dir = opendir(path); + if (NULL == dir) { + return -1; + } struct dirent *entry; while ((entry = readdir(dir)) != NULL) { size_t len = strlen(entry->d_name); - char *buf = calloc(1, len + 1); - memcpy(buf, entry->d_name, len); - taosArrayPush(result, &buf); + char * buf = calloc(1, len + 1); + memcpy(buf, entry->d_name, len); + taosArrayPush(result, &buf); } closedir(dir); return 0; -} +} static void tfileDestroyFileName(void *elem) { char *p = *(char **)elem; free(p); -} +} static int tfileCompare(const void *a, const void *b) { const char *aName = *(char **)a; const char *bName = *(char **)b; - size_t aLen = strlen(aName); - size_t bLen = strlen(bName); + size_t aLen = strlen(aName); + size_t bLen = strlen(bName); return strncmp(aName, bName, aLen > bLen ? aLen : bLen); } // tfile name suid-colId-version.tindex @@ -75,126 +77,131 @@ static int tfileParseFileName(const char *filename, uint64_t *suid, int *colId, if (3 == sscanf(filename, "%" PRIu64 "-%d-%d.tindex", suid, colId, version)) { // read suid & colid & version success return 0; - } - return -1; -} + } + return -1; +} static void tfileSerialCacheKey(TFileCacheKey *key, char *buf) { SERIALIZE_MEM_TO_BUF(buf, key, suid); - SERIALIZE_VAR_TO_BUF(buf, '_', char); + SERIALIZE_VAR_TO_BUF(buf, '_', char); SERIALIZE_MEM_TO_BUF(buf, key, colType); - SERIALIZE_VAR_TO_BUF(buf, '_', char); + SERIALIZE_VAR_TO_BUF(buf, '_', char); SERIALIZE_MEM_TO_BUF(buf, key, version); - SERIALIZE_VAR_TO_BUF(buf, '_', char); + SERIALIZE_VAR_TO_BUF(buf, '_', char); SERIALIZE_STR_MEM_TO_BUF(buf, key, colName, key->nColName); } TFileCache *tfileCacheCreate(const char *path) { - TFileCache *tcache = calloc(1, sizeof(TFileCache)); - if (tcache == NULL) { return NULL; } - - tcache->tableCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); - tcache->capacity = 64; - - SArray *files = taosArrayInit(4, sizeof(void *)); - tfileGetFileList(path, files); + TFileCache *tcache = calloc(1, sizeof(TFileCache)); + if (tcache == NULL) { + return NULL; + } + + tcache->tableCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + tcache->capacity = 64; + + SArray *files = taosArrayInit(4, sizeof(void *)); + tfileGetFileList(path, files); taosArraySort(files, tfileCompare); for (size_t i = 0; i < taosArrayGetSize(files); i++) { - char *file = taosArrayGetP(files, i); - uint64_t suid; - int colId, version; + char * file = taosArrayGetP(files, i); + uint64_t suid; + int colId, version; if (0 != tfileParseFileName(file, &suid, &colId, &version)) { goto End; - continue; - } + continue; + } - TFileReader *reader = calloc(1, sizeof(TFileReader)); - reader->ctx = writerCtxCreate(TFile, file, true, 1024 * 64); - if (reader->ctx == NULL) { - TFileReaderDestroy(reader); + WriterCtx *wc = writerCtxCreate(TFile, file, true, 1024 * 64); + if (wc == NULL) { indexError("failed to open index: %s", file); - goto End; + goto End; } - TFileReadHeader header = {0}; - if (0 != tfileLoadHeader(reader->ctx, &header)) { - TFileReaderDestroy(reader); - indexError("failed to load index header, index Id: %s", file); + TFileReader *reader = tfileReaderCreate(wc); + if (0 != tfileReadLoadHeader(reader)) { + TFileReaderDestroy(reader); + indexError("failed to load index header, index Id: %s", file); + goto End; } } taosArrayDestroyEx(files, tfileDestroyFileName); return tcache; End: + tfileCacheDestroy(tcache); taosArrayDestroyEx(files, tfileDestroyFileName); - return NULL; + return NULL; } void tfileCacheDestroy(TFileCache *tcache) { - - free(tcache); - + if (tcache == NULL) { + return; + } + + // free table cache + TFileReader **reader = taosHashIterate(tcache->tableCache, NULL); + while (reader) { + TFileReader *p = *reader; + indexInfo("drop table cache suid: %" PRIu64 ", colName: %s, colType: %d", p->header.suid, p->header.colName, + p->header.colType); + TFileReaderDestroy(p); + reader = taosHashIterate(tcache->tableCache, reader); + } + taosHashCleanup(tcache->tableCache); + free(tcache); } TFileReader *tfileCacheGet(TFileCache *tcache, TFileCacheKey *key) { - char buf[128] = {0}; + char buf[128] = {0}; tfileSerialCacheKey(key, buf); - TFileReader *reader = taosHashGet(tcache->tableCache, buf, strlen(buf)); - return reader; + TFileReader *reader = taosHashGet(tcache->tableCache, buf, strlen(buf)); + return reader; } void tfileCachePut(TFileCache *tcache, TFileCacheKey *key, TFileReader *reader) { char buf[128] = {0}; tfileSerialCacheKey(key, buf); - taosHashPut(tcache->tableCache, buf, strlen(buf), &reader, sizeof(void *)); + taosHashPut(tcache->tableCache, buf, strlen(buf), &reader, sizeof(void *)); return; -} - +} -TFileReader* tfileReaderCreate() { - +TFileReader *tfileReaderCreate(WriterCtx *ctx) { + TFileReader *reader = calloc(1, sizeof(TFileReader)); + if (reader == NULL) { + return NULL; + } + reader->ctx = ctx; + // T_REF_INC(reader); + return reader; } void TFileReaderDestroy(TFileReader *reader) { - if (reader == NULL) { return; } - - writerCtxDestroy(reader->ctx); + if (reader == NULL) { + return; + } + // T_REF_INC(reader); + writerCtxDestroy(reader->ctx); free(reader); } - TFileWriter *tfileWriterCreate(const char *suid, const char *colName); -void tfileWriterDestroy(TFileWriter *tw); - +void tfileWriterDestroy(TFileWriter *tw); IndexTFile *indexTFileCreate(const char *path) { - IndexTFile *tfile = calloc(1, sizeof(IndexTFile)); - tfile->cache = tfileCacheCreate(path); - + IndexTFile *tfile = calloc(1, sizeof(IndexTFile)); + tfile->cache = tfileCacheCreate(path); + return tfile; } -void IndexTFileDestroy(IndexTFile *tfile) { - free(tfile); -} - +void IndexTFileDestroy(IndexTFile *tfile) { free(tfile); } int indexTFileSearch(void *tfile, SIndexTermQuery *query, SArray *result) { IndexTFile *pTfile = (IndexTFile *)tfile; - - SIndexTerm *term = query->term; - TFileCacheKey key = {.suid = term->suid, - .colType = term->colType, - .version = 0, - .colName = term->colName, - .nColName= term->nColName}; - TFileReader *reader = tfileCacheGet(pTfile->cache, &key); + + SIndexTerm * term = query->term; + TFileCacheKey key = { + .suid = term->suid, .colType = term->colType, .version = 0, .colName = term->colName, .nColName = term->nColName}; + TFileReader *reader = tfileCacheGet(pTfile->cache, &key); return 0; } -int indexTFilePut(void *tfile, SIndexTerm *term, uint64_t uid) { - TFileWriterOpt wOpt = {.suid = term->suid, - .colType = term->colType, - .colName = term->colName, - .nColName= term->nColName, - .version = 1}; - - - - return 0; -} - - +int indexTFilePut(void *tfile, SIndexTerm *term, uint64_t uid) { + TFileWriterOpt wOpt = { + .suid = term->suid, .colType = term->colType, .colName = term->colName, .nColName = term->nColName, .version = 1}; + return 0; +} -- GitLab