diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 7ab9358cd15a33b051f67b0b17eb599491fab42d..37feb79ac809529b853b65e641a01fb65e1bd830 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -80,6 +80,9 @@ void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in); OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup); void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate); CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn); +void* fstBuilerIntoInner(FstBuilder *b); +void fstBuilderFinish(FstBuilder *b); + @@ -216,6 +219,15 @@ bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res); bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode); FstSlice fstNodeAsSlice(FstNode *node); +// ops + +typedef struct FstIndexedValue { + uint64_t index; + uint64_t value; +} FstIndexedValue; + +FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out); +void fstLastTransitionDestroy(FstLastTransition *trn); typedef struct FstMeta { @@ -227,20 +239,20 @@ typedef struct FstMeta { } FstMeta; typedef struct Fst { - FstMeta meta; - void *data; // + FstMeta *meta; + FstSlice *data; // + FstNode *root; // } Fst; -// ops - -typedef struct FstIndexedValue { - uint64_t index; - uint64_t value; -} FstIndexedValue; - -FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out); -void fstLastTransitionDestroy(FstLastTransition *trn); - +// refactor simple function +Fst* fstCreate(FstSlice *data); +void fstDestroy(Fst *fst); +bool fstGet(Fst *fst, FstSlice *b, Output *out); +FstNode* fstGetNode(Fst *fst, CompiledAddr); +FstType fstGetType(Fst *fst); +CompiledAddr fstGetRootAddr(Fst *fst); +Output fstEmptyFinalOutput(Fst *fst, bool *null); +bool fstVerify(Fst *fst); #endif diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h index fbb2f1cff7b643c710e0d2f551e7d2a2c05b543b..465080403482c6022900eed2e3a4fe98aecb30de 100644 --- a/source/libs/index/inc/index_fst_counting_writer.h +++ b/source/libs/index/inc/index_fst_counting_writer.h @@ -27,9 +27,11 @@ typedef struct FstCountingWriter { uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t bufLen); -int FstCountingWriterFlush(FstCountingWriter *write); +int fstCountingWriterFlush(FstCountingWriter *write); +uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter *write); + FstCountingWriter *fstCountingWriterCreate(void *wtr); void fstCountingWriterDestroy(FstCountingWriter *w); diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index 5b84632418b6f11e70ef6bc1233b89d944e8ce2c..ff0946063d011bb3df9ed290dffff8c120a48055 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -32,9 +32,9 @@ extern const CompiledAddr EMPTY_ADDRESS; extern const CompiledAddr NONE_ADDRESS; // This version number is written to every finite state transducer created by -// this crate. When a finite state transducer is read, its version number is +// this version When a finite state transducer is read, its version number is // checked against this value. -extern const uint64_t version; +extern const uint64_t VERSION; // The threshold (in number of transitions) at which an index is created for // a node's transitions. This speeds up lookup time at the expense of FST size diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 8b9aa22fc689adb48887cdc493c69ffb266b769b..6c1ea8cfebcccdd2a7013a4c161caaee96ff8b27 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -14,7 +14,8 @@ */ #include "index_fst.h" - +#include "tcoding.h" +#include "tchecksum.h" static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr, uint8_t nBytes) { @@ -98,7 +99,7 @@ void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz); assert(un->last == NULL); - + //FstLastTransition *trn = malloc(sizeof(FstLastTransition)); //trn->inp = s->data[s->start]; @@ -146,24 +147,27 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, size_t lsz = (size_t)(s->end - s->start + 1); // data len size_t ssz = taosArrayGetSize(node->stack); // stack size - uint64_t res = 0; - for (size_t i = 0; i < lsz && i < ssz; i++) { + uint64_t i = 0; + for (i = 0; i < lsz && i < ssz; i++) { FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i); - FstLastTransition *last = un->last; - if (last->inp == s->data[s->start + i]) { - uint64_t commPrefix = last->out; - uint64_t addPrefix = last->out - commPrefix; - out = out - commPrefix; - last->out = commPrefix; - if (addPrefix != 0) { - fstBuilderNodeUnfinishedAddOutputPrefix(un, addPrefix); - } + FstLastTransition *t = un->last; + uint64_t addPrefix = 0; + if (t && t->inp == s->data[s->start + i]) { + uint64_t commPrefix = MIN(t->out, *out); + uint64_t tAddPrefix = t->out - commPrefix; + (*out) = (*out) - commPrefix; + t->out = commPrefix; + addPrefix = tAddPrefix; } else { - break; + break; + } + if (addPrefix != 0) { + fstBuilderNodeUnfinishedAddOutputPrefix(un, addPrefix); + } } - return res; + return i; } @@ -771,16 +775,16 @@ void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in) { return; } Output out; - uint64_t prefixLen; - if (in != 0) { //if let Some(in) = in - prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); - } else { - prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs); - out = 0; - } - + //if (in != 0) { //if let Some(in) = in + // prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); + //} else { + // prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs); + // out = 0; + //} + uint64_t prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); + if (prefixLen == FST_SLICE_LEN(s)) { - assert(out != 0); + assert(out == 0); return; } @@ -849,6 +853,31 @@ CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) { return b->lastAddr; } +void* fstBuilderInsertInner(FstBuilder *b) { + fstBuilderCompileFrom(b, 0); + FstBuilderNode *rootNode = fstUnFinishedNodesPopRoot(b->unfinished); + CompiledAddr rootAddr = fstBuilderCompile(b, rootNode); + + uint8_t buf64[8] = {0}; + + taosEncodeFixedU64((void **)&buf64, b->len); + fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64)); + + taosEncodeFixedU64((void **)&buf64, rootAddr); + fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64)); + + uint8_t buf32[4] = {0}; + uint32_t sum = fstCountingWriterMaskedCheckSum(b->wrt); + taosEncodeFixedU32((void **)&buf32, sum); + fstCountingWriterWrite(b->wrt, buf32, sizeof(buf32)); + + fstCountingWriterFlush(b->wrt); + return b->wrt; + +} +void fstBuilderFinish(FstBuilder *b) { + fstBuilderInsertInner(b); +} @@ -894,4 +923,108 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *unNode, O return; } +Fst* fstCreate(FstSlice *slice) { + char *buf = slice->data; + uint64_t skip = 0; + uint64_t len = slice->dLen; + if (len < 36) { + return NULL; + } + + uint64_t version; + taosDecodeFixedU64(buf, &version); + skip += sizeof(version); + if (version == 0 || version > VERSION) { + return NULL; + } + + uint64_t type; + taosDecodeFixedU64(buf + skip, &type); + skip += sizeof(type); + + uint32_t checkSum = 0; + len -= sizeof(checkSum); + taosDecodeFixedU32(buf + len, &checkSum); + + CompiledAddr rootAddr; + len -= sizeof(rootAddr); + taosDecodeFixedU64(buf + len, &rootAddr); + + uint64_t fstLen; + len -= sizeof(fstLen); + taosDecodeFixedU64(buf + len, &fstLen); + //TODO(validat root addr) + // + Fst *fst= (Fst *)calloc(1, sizeof(Fst)); + if (fst == NULL) { return NULL; } + + fst->meta = (FstMeta *)malloc(sizeof(FstMeta)); + if (NULL == fst->meta) { + goto FST_CREAT_FAILED; + } + + fst->meta->version = version; + fst->meta->rootAddr = rootAddr; + fst->meta->ty = type; + fst->meta->len = fstLen; + fst->meta->checkSum = checkSum; + fst->data = slice; + return fst; + +FST_CREAT_FAILED: + free(fst->meta); + free(fst); + +} +void fstDestroy(Fst *fst) { + if (fst) { + free(fst->meta); + fstNodeDestroy(fst->root); + } + free(fst); +} + +bool fstGet(Fst *fst, FstSlice *b, Output *out) { + + return false; +} + +FstNode* fstGetNode(Fst *fst, CompiledAddr addr) { + if (fst->root != NULL) { + return fst->root; + } + fst->root = fstNodeCreate(fst->meta->version, addr, fst->data); + return fst->root; + +} +FstType fstGetType(Fst *fst) { + return fst->meta->ty; +} +CompiledAddr fstGetRootAddr(Fst *fst) { + return fst->meta->rootAddr; +} + +Output fstEmptyFinalOutput(Fst *fst, bool *null) { + Output res = 0; + FstNode *node = fst->root; + if (FST_NODE_IS_FINAL(node)) { + *null = false; + res = FST_NODE_FINAL_OUTPUT(node); + } else { + *null = true; + } + return res; +} + + +bool fstVerify(Fst *fst) { + uint32_t checkSum = fst->meta->checkSum; + FstSlice *data = fst->data; + TSCKSUM initSum = 0; + if (taosCheckChecksumWhole(data->data, data->dLen)) { + return false; + } + +} + diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c index b253db986a1612fc1322e9d049bd9690a401a5d1..a0a2c380f170724984dae7bc9e12864d4ce16f24 100644 --- a/source/libs/index/src/index_fst_counting_writer.c +++ b/source/libs/index/src/index_fst_counting_writer.c @@ -37,6 +37,9 @@ uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t return bufLen; } +uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter *write) { + return 0; +} int fstCountingWriterFlush(FstCountingWriter *write) { //write->wtr->flush return 1; diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c index c4499f8e0d86d04097024dbbabf0edc6f0aca617..94bf650acd8037e63a5486ce83d8562b09e1f7fd 100644 --- a/source/libs/index/src/index_fst_util.c +++ b/source/libs/index/src/index_fst_util.c @@ -25,7 +25,7 @@ const CompiledAddr NONE_ADDRESS = 1; // This version number is written to every finite state transducer created by // this crate. When a finite state transducer is read, its version number is // checked against this value. -const uint64_t version = 3; +const uint64_t VERSION = 3; // The threshold (in number of transitions) at which an index is created for // a node's transitions. This speeds up lookup time at the expense of FST size