diff --git a/source/libs/index/inc/indexFst.h b/source/libs/index/inc/indexFst.h index c600ca4780e3f762a274c1b3dc0e71c5b3a447a3..4c5bca864a0be6b4926965fc1695a8e61d88feaa 100644 --- a/source/libs/index/inc/indexFst.h +++ b/source/libs/index/inc/indexFst.h @@ -53,7 +53,7 @@ typedef struct FstRange { } FstRange; typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal } State; -typedef enum { Ordered, OutOfOrdered, DuplicateKey } OrderType; +typedef enum { Ordered, OutOfOrdered, DuplicateKey } FstOrderType; FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice* data); bool fstBoundWithDataExceededBy(FstBoundWithData* bound, FstSlice* slice); @@ -106,7 +106,7 @@ bool fstBuilderInsert(FstBuilder* b, FstSlice bs, Output in); void fstBuilderCompileFrom(FstBuilder* b, uint64_t istate); void* fstBuilerIntoInner(FstBuilder* b); void fstBuilderFinish(FstBuilder* b); -OrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup); +FstOrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup); CompiledAddr fstBuilderCompile(FstBuilder* b, FstBuilderNode* bn); typedef struct FstTransitions { @@ -213,14 +213,18 @@ typedef struct FstNode { // If this node is final and has a terminal output value, then it is, returned. // Otherwise, a zero output is returned #define FST_NODE_FINAL_OUTPUT(node) node->finalOutput + // Returns true if and only if this node corresponds to a final or "match", // state in the finite state transducer. #define FST_NODE_IS_FINAL(node) node->isFinal + // Returns the number of transitions in this node, The maximum number of // transitions is 256. #define FST_NODE_LEN(node) node->nTrans + // Returns true if and only if this node has zero transitions. #define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0) + // Return the address of this node. #define FST_NODE_ADDR(node) node->start @@ -277,6 +281,8 @@ FStmBuilder* fstSearch(Fst* fst, FAutoCtx* ctx); FStmStBuilder* fstSearchWithState(Fst* fst, FAutoCtx* ctx); // into stream to expand later +// + FStmSt* stmBuilderIntoStm(FStmBuilder* sb); bool fstVerify(Fst* fst); @@ -325,7 +331,8 @@ FStmBuilder* stmBuilderCreate(Fst* fst, FAutoCtx* aut); void stmBuilderDestroy(FStmBuilder* b); // set up bound range -// refator later: to simple code by marco +// refator later +// simple code by marco void stmBuilderSetRange(FStmBuilder* b, FstSlice* val, RangeType type); #ifdef __cplusplus diff --git a/source/libs/index/src/indexFst.c b/source/libs/index/src/indexFst.c index 40de167a036c3b342e13dccec8a4093a53e37eb7..81ac4c9d40bb13cf06446e03375f91dd0c495af7 100644 --- a/source/libs/index/src/indexFst.c +++ b/source/libs/index/src/indexFst.c @@ -289,22 +289,14 @@ void fstStateCompileForAnyTrans(IdxFstFile* w, CompiledAddr addr, FstBuilderNode for (int32_t i = sz - 1; i >= 0; i--) { FstTransition* t = taosArrayGet(node->trans, i); idxFileWrite(w, (char*)&t->inp, 1); - // fstPackDeltaIn(w, addr, t->addr, tSize); } if (sz > TRANS_INDEX_THRESHOLD) { - // A value of 255 indicates that no transition exists for the byte - // at that index. (Except when there are 256 transitions.) Namely, - // any value greater than or equal to the number of transitions in - // this node indicates an absent transition. + // A value of 255 indicates that no transition exists for the byte at that idx uint8_t* index = (uint8_t*)taosMemoryMalloc(sizeof(uint8_t) * 256); memset(index, 255, sizeof(uint8_t) * 256); - /// for (uint8_t i = 0; i < 256; i++) { - // index[i] = 255; - ///} for (int32_t i = 0; i < sz; i++) { FstTransition* t = taosArrayGet(node->trans, i); index[t->inp] = i; - // fstPackDeltaIn(w, addr, t->addr, tSize); } idxFileWrite(w, (char*)index, 256); taosMemoryFree(index); @@ -344,7 +336,7 @@ uint8_t fstStateCommInput(FstState* s, bool* null) { *null = true; return v; } - // v = 0 indicate that common_input is None + // 0 indicate that common_input is None return v == 0 ? 0 : COMMON_INPUT(v); } @@ -522,7 +514,6 @@ uint64_t fstStateNtrans(FstState* s, FstSlice* slice) { int32_t len; uint8_t* data = fstSliceData(slice, &len); n = data[len - 2]; - // n = data[slice->end - 1]; // data[data.len() - 2] return n == 1 ? 256 : n; // // "1" is never a normal legal value here, because if there, // is only 1 transition, // then it is encoded in the state byte } @@ -546,7 +537,6 @@ uint64_t fstStateFindInput(FstState* s, FstNode* node, uint8_t b, bool* null) { int32_t dlen = 0; uint8_t* data = fstSliceData(slice, &dlen); uint64_t i = data[at + b]; - // uint64_t i = slice->data[slice->start + at + b]; if (i >= node->nTrans) { *null = true; } @@ -558,16 +548,15 @@ uint64_t fstStateFindInput(FstState* s, FstNode* node, uint8_t b, bool* null) { FstSlice t = fstSliceCopy(slice, start, end - 1); int32_t len = 0; uint8_t* data = fstSliceData(&t, &len); - int i = 0; - for (; i < len; i++) { + for (int i = 0; i < len; i++) { uint8_t v = data[i]; if (v == b) { fstSliceDestroy(&t); return node->nTrans - i - 1; // bug } - } - if (i == len) { - *null = true; + if (i + 1 == len) { + *null = true; + } } fstSliceDestroy(&t); } @@ -737,16 +726,13 @@ bool fstNodeCompile(FstNode* node, void* w, CompiledAddr lastAddr, CompiledAddr return true; } else if (sz != 1 || builderNode->isFinal) { fstStateCompileForAnyTrans(w, addr, builderNode); - // AnyTrans->Compile(w, addr, node); } else { FstTransition* tran = taosArrayGet(builderNode->trans, 0); if (tran->addr == lastAddr && tran->out == 0) { fstStateCompileForOneTransNext(w, addr, tran->inp); - // OneTransNext::compile(w, lastAddr, tran->inp); return true; } else { fstStateCompileForOneTrans(w, addr, tran); - // OneTrans::Compile(w, lastAddr, *tran); return true; } } @@ -795,7 +781,7 @@ void fstBuilderDestroy(FstBuilder* b) { } bool fstBuilderInsert(FstBuilder* b, FstSlice bs, Output in) { - OrderType t = fstBuilderCheckLastKey(b, bs, true); + FstOrderType t = fstBuilderCheckLastKey(b, bs, true); if (t == Ordered) { // add log info fstBuilderInsertOutput(b, bs, in); @@ -812,12 +798,6 @@ void fstBuilderInsertOutput(FstBuilder* b, FstSlice bs, Output in) { fstUnFinishedNodesSetRootOutput(b->unfinished, in); return; } - // if (in != 0) { //if let Some(in) = in - // prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); - //} else { - // prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs); - // out = 0; - //} Output out; uint64_t prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); @@ -835,7 +815,7 @@ void fstBuilderInsertOutput(FstBuilder* b, FstSlice bs, Output in) { return; } -OrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup) { +FstOrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup) { FstSlice* input = &bs; if (fstSliceIsEmpty(&b->last)) { fstSliceDestroy(&b->last); @@ -867,7 +847,6 @@ void fstBuilderCompileFrom(FstBuilder* b, uint64_t istate) { fstBuilderNodeDestroy(bn); assert(addr != NONE_ADDRESS); - // fstBuilderNodeDestroy(n); } fstUnFinishedNodesTopLastFreeze(b->unfinished, addr); return; @@ -1044,8 +1023,6 @@ void fstDestroy(Fst* fst) { } bool fstGet(Fst* fst, FstSlice* b, Output* out) { - // dec lock range - // taosThreadMutexLock(&fst->mtx); FstNode* root = fstGetRoot(fst); Output tOut = 0; int32_t len; @@ -1058,7 +1035,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) { uint8_t inp = data[i]; Output res = 0; if (false == fstNodeFindInput(root, inp, &res)) { - // taosThreadMutexUnlock(&fst->mtx); return false; } @@ -1069,7 +1045,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) { taosArrayPush(nodes, &root); } if (!FST_NODE_IS_FINAL(root)) { - // taosThreadMutexUnlock(&fst->mtx); return false; } else { tOut = tOut + FST_NODE_FINAL_OUTPUT(root); @@ -1080,8 +1055,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) { fstNodeDestroy(*node); } taosArrayDestroy(nodes); - // fst->root = NULL; - // taosThreadMutexUnlock(&fst->mtx); *out = tOut; return true; } @@ -1231,20 +1204,17 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) { FstNode* node = fstGetRoot(sws->fst); Output out = 0; - // void* autState = sws->aut->start(); - void* autState = automFuncs[aut->type].start(aut); + void* autState = automFuncs[aut->type].start(aut); int32_t len; uint8_t* data = fstSliceData(key, &len); for (uint32_t i = 0; i < len; i++) { uint8_t b = data[i]; uint64_t res = 0; - bool find = fstNodeFindInput(node, b, &res); - if (find == true) { + if (fstNodeFindInput(node, b, &res)) { FstTransition trn; fstNodeGetTransitionAt(node, res, &trn); void* preState = autState; - // autState = sws->aut->accept(preState, b); autState = automFuncs[aut->type].accept(aut, preState, b); taosArrayPush(sws->inp, &b); @@ -1379,14 +1349,14 @@ FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback) { return NULL; } -FStmStRslt* swsResultCreate(FstSlice* data, FstOutput fOut, void* state) { +FStmStRslt* swsResultCreate(FstSlice* data, FstOutput out, void* state) { FStmStRslt* result = taosMemoryCalloc(1, sizeof(FStmStRslt)); if (result == NULL) { return NULL; } result->data = fstSliceCopy(data, 0, FST_SLICE_LEN(data) - 1); - result->out = fOut; + result->out = out; result->state = state; return result; } diff --git a/source/libs/index/src/indexFstUtil.c b/source/libs/index/src/indexFstUtil.c index 5bda703b1f0d3e825342d9c967523b632b175984..b1a919b365742791fc7daaae1f6ea47f9b012477 100644 --- a/source/libs/index/src/indexFstUtil.c +++ b/source/libs/index/src/indexFstUtil.c @@ -21,12 +21,12 @@ const CompiledAddr EMPTY_ADDRESS = 0; const CompiledAddr NONE_ADDRESS = 1; // This version number is written to every finite state transducer created by -// this crate. When a finite state transducer is read, its version number is +// this version. When a finite state transducer is read, its version number is // checked against this value. const uint64_t VERSION = 3; + // The threshold (in number of transitions) at which an index is created for // a node's transitions. This speeds up lookup time at the expense of FST size - const uint64_t TRANS_INDEX_THRESHOLD = 32; uint8_t packSize(uint64_t n) { @@ -52,7 +52,6 @@ uint8_t packSize(uint64_t n) { uint64_t unpackUint64(uint8_t* ch, uint8_t sz) { uint64_t n = 0; for (uint8_t i = 0; i < sz; i++) { - // n = n | (ch[i] << (8 * i)); } return n; diff --git a/source/libs/index/src/indexTfile.c b/source/libs/index/src/indexTfile.c index 70cdedd91782d0ae39240ac29f47cb98224c401a..b91c5a785be1d99fd7c0d5dd66668702e49a01a8 100644 --- a/source/libs/index/src/indexTfile.c +++ b/source/libs/index/src/indexTfile.c @@ -23,7 +23,7 @@ #include "tcoding.h" #include "tcompare.h" -const static uint64_t tfileMagicNumber = 0xdb4775248b80fb57ull; +const static uint64_t FILE_MAGIC_NUMBER = 0xdb4775248b80fb57ull; typedef struct TFileFstIter { FStmBuilder* fb; @@ -548,9 +548,6 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) { taosArraySortPWithExt((SArray*)(data), tfileValueCompare, &fn); } - int32_t bufLimit = 64 * 4096, offset = 0; - // char* buf = taosMemoryCalloc(1, sizeof(char) * bufLimit); - // char* p = buf; int32_t sz = taosArrayGetSize((SArray*)data); int32_t fstOffset = tw->offset; @@ -564,6 +561,9 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) { } tfileWriteFstOffset(tw, fstOffset); + int32_t bufCap = 8 * 1024; + char* buf = taosMemoryCalloc(1, bufCap); + for (size_t i = 0; i < sz; i++) { TFileValue* v = taosArrayGetP((SArray*)data, i); @@ -571,14 +571,18 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) { // check buf has enough space or not int32_t ttsz = TF_TABLE_TATOAL_SIZE(tbsz); - char* buf = taosMemoryCalloc(1, ttsz * sizeof(char)); + if (bufCap < ttsz) { + bufCap = ttsz; + buf = taosMemoryRealloc(buf, bufCap); + } char* p = buf; tfileSerialTableIdsToBuf(p, v->tableId); tw->ctx->write(tw->ctx, buf, ttsz); v->offset = tw->offset; tw->offset += ttsz; - taosMemoryFree(buf); + memset(buf, 0, sizeof(buf)); } + taosMemoryFree(buf); tw->fb = fstBuilderCreate(tw->ctx, 0); if (tw->fb == NULL) { @@ -869,13 +873,13 @@ static int tfileWriteData(TFileWriter* write, TFileValue* tval) { //} } static int tfileWriteFooter(TFileWriter* write) { - char buf[sizeof(tfileMagicNumber) + 1] = {0}; + char buf[sizeof(FILE_MAGIC_NUMBER) + 1] = {0}; void* pBuf = (void*)buf; - taosEncodeFixedU64((void**)(void*)&pBuf, tfileMagicNumber); + taosEncodeFixedU64((void**)(void*)&pBuf, FILE_MAGIC_NUMBER); int nwrite = write->ctx->write(write->ctx, buf, (int32_t)strlen(buf)); indexInfo("tfile write footer size: %d", write->ctx->size(write->ctx)); - assert(nwrite == sizeof(tfileMagicNumber)); + assert(nwrite == sizeof(FILE_MAGIC_NUMBER)); return nwrite; } static int tfileReaderLoadHeader(TFileReader* reader) { @@ -899,7 +903,7 @@ static int tfileReaderLoadFst(TFileReader* reader) { int size = ctx->size(ctx); // current load fst into memory, refactor it later - int fstSize = size - reader->header.fstOffset - sizeof(tfileMagicNumber); + int fstSize = size - reader->header.fstOffset - sizeof(FILE_MAGIC_NUMBER); char* buf = taosMemoryCalloc(1, fstSize); if (buf == NULL) { return -1; @@ -959,9 +963,8 @@ static int tfileReaderVerify(TFileReader* reader) { IFileCtx* ctx = reader->ctx; uint64_t tMagicNumber = 0; - - char buf[sizeof(tMagicNumber) + 1] = {0}; - int size = ctx->size(ctx); + char buf[sizeof(tMagicNumber) + 1] = {0}; + int size = ctx->size(ctx); if (size < sizeof(tMagicNumber) || size <= sizeof(reader->header)) { return -1; @@ -970,25 +973,25 @@ static int tfileReaderVerify(TFileReader* reader) { } taosDecodeFixedU64(buf, &tMagicNumber); - return tMagicNumber == tfileMagicNumber ? 0 : -1; + return tMagicNumber == FILE_MAGIC_NUMBER ? 0 : -1; } -void tfileReaderRef(TFileReader* reader) { - if (reader == NULL) { +void tfileReaderRef(TFileReader* rd) { + if (rd == NULL) { return; } - int ref = T_REF_INC(reader); + int ref = T_REF_INC(rd); UNUSED(ref); } -void tfileReaderUnRef(TFileReader* reader) { - if (reader == NULL) { +void tfileReaderUnRef(TFileReader* rd) { + if (rd == NULL) { return; } - int ref = T_REF_DEC(reader); + int ref = T_REF_DEC(rd); if (ref == 0) { // do nothing - tfileReaderDestroy(reader); + tfileReaderDestroy(rd); } }