提交 d00158ff 编写于 作者: dengyihao's avatar dengyihao

refactor idx code

上级 856990c6
......@@ -53,7 +53,7 @@ typedef struct FstRange {
} FstRange;
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal } State;
typedef enum { Ordered, OutOfOrdered, DuplicateKey } OrderType;
typedef enum { Ordered, OutOfOrdered, DuplicateKey } FstOrderType;
FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice* data);
bool fstBoundWithDataExceededBy(FstBoundWithData* bound, FstSlice* slice);
......@@ -106,7 +106,7 @@ bool fstBuilderInsert(FstBuilder* b, FstSlice bs, Output in);
void fstBuilderCompileFrom(FstBuilder* b, uint64_t istate);
void* fstBuilerIntoInner(FstBuilder* b);
void fstBuilderFinish(FstBuilder* b);
OrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup);
FstOrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup);
CompiledAddr fstBuilderCompile(FstBuilder* b, FstBuilderNode* bn);
typedef struct FstTransitions {
......@@ -213,14 +213,18 @@ typedef struct FstNode {
// If this node is final and has a terminal output value, then it is, returned.
// Otherwise, a zero output is returned
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
// Returns true if and only if this node corresponds to a final or "match",
// state in the finite state transducer.
#define FST_NODE_IS_FINAL(node) node->isFinal
// Returns the number of transitions in this node, The maximum number of
// transitions is 256.
#define FST_NODE_LEN(node) node->nTrans
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start
......@@ -277,6 +281,8 @@ FStmBuilder* fstSearch(Fst* fst, FAutoCtx* ctx);
FStmStBuilder* fstSearchWithState(Fst* fst, FAutoCtx* ctx);
// into stream to expand later
//
FStmSt* stmBuilderIntoStm(FStmBuilder* sb);
bool fstVerify(Fst* fst);
......@@ -325,7 +331,8 @@ FStmBuilder* stmBuilderCreate(Fst* fst, FAutoCtx* aut);
void stmBuilderDestroy(FStmBuilder* b);
// set up bound range
// refator later: to simple code by marco
// refator later
// simple code by marco
void stmBuilderSetRange(FStmBuilder* b, FstSlice* val, RangeType type);
#ifdef __cplusplus
......
......@@ -289,22 +289,14 @@ void fstStateCompileForAnyTrans(IdxFstFile* w, CompiledAddr addr, FstBuilderNode
for (int32_t i = sz - 1; i >= 0; i--) {
FstTransition* t = taosArrayGet(node->trans, i);
idxFileWrite(w, (char*)&t->inp, 1);
// fstPackDeltaIn(w, addr, t->addr, tSize);
}
if (sz > TRANS_INDEX_THRESHOLD) {
// A value of 255 indicates that no transition exists for the byte
// at that index. (Except when there are 256 transitions.) Namely,
// any value greater than or equal to the number of transitions in
// this node indicates an absent transition.
// A value of 255 indicates that no transition exists for the byte at that idx
uint8_t* index = (uint8_t*)taosMemoryMalloc(sizeof(uint8_t) * 256);
memset(index, 255, sizeof(uint8_t) * 256);
/// for (uint8_t i = 0; i < 256; i++) {
// index[i] = 255;
///}
for (int32_t i = 0; i < sz; i++) {
FstTransition* t = taosArrayGet(node->trans, i);
index[t->inp] = i;
// fstPackDeltaIn(w, addr, t->addr, tSize);
}
idxFileWrite(w, (char*)index, 256);
taosMemoryFree(index);
......@@ -344,7 +336,7 @@ uint8_t fstStateCommInput(FstState* s, bool* null) {
*null = true;
return v;
}
// v = 0 indicate that common_input is None
// 0 indicate that common_input is None
return v == 0 ? 0 : COMMON_INPUT(v);
}
......@@ -522,7 +514,6 @@ uint64_t fstStateNtrans(FstState* s, FstSlice* slice) {
int32_t len;
uint8_t* data = fstSliceData(slice, &len);
n = data[len - 2];
// n = data[slice->end - 1]; // data[data.len() - 2]
return n == 1 ? 256 : n; // // "1" is never a normal legal value here, because if there, // is only 1 transition,
// then it is encoded in the state byte
}
......@@ -546,7 +537,6 @@ uint64_t fstStateFindInput(FstState* s, FstNode* node, uint8_t b, bool* null) {
int32_t dlen = 0;
uint8_t* data = fstSliceData(slice, &dlen);
uint64_t i = data[at + b];
// uint64_t i = slice->data[slice->start + at + b];
if (i >= node->nTrans) {
*null = true;
}
......@@ -558,16 +548,15 @@ uint64_t fstStateFindInput(FstState* s, FstNode* node, uint8_t b, bool* null) {
FstSlice t = fstSliceCopy(slice, start, end - 1);
int32_t len = 0;
uint8_t* data = fstSliceData(&t, &len);
int i = 0;
for (; i < len; i++) {
for (int i = 0; i < len; i++) {
uint8_t v = data[i];
if (v == b) {
fstSliceDestroy(&t);
return node->nTrans - i - 1; // bug
}
}
if (i == len) {
*null = true;
if (i + 1 == len) {
*null = true;
}
}
fstSliceDestroy(&t);
}
......@@ -737,16 +726,13 @@ bool fstNodeCompile(FstNode* node, void* w, CompiledAddr lastAddr, CompiledAddr
return true;
} else if (sz != 1 || builderNode->isFinal) {
fstStateCompileForAnyTrans(w, addr, builderNode);
// AnyTrans->Compile(w, addr, node);
} else {
FstTransition* tran = taosArrayGet(builderNode->trans, 0);
if (tran->addr == lastAddr && tran->out == 0) {
fstStateCompileForOneTransNext(w, addr, tran->inp);
// OneTransNext::compile(w, lastAddr, tran->inp);
return true;
} else {
fstStateCompileForOneTrans(w, addr, tran);
// OneTrans::Compile(w, lastAddr, *tran);
return true;
}
}
......@@ -795,7 +781,7 @@ void fstBuilderDestroy(FstBuilder* b) {
}
bool fstBuilderInsert(FstBuilder* b, FstSlice bs, Output in) {
OrderType t = fstBuilderCheckLastKey(b, bs, true);
FstOrderType t = fstBuilderCheckLastKey(b, bs, true);
if (t == Ordered) {
// add log info
fstBuilderInsertOutput(b, bs, in);
......@@ -812,12 +798,6 @@ void fstBuilderInsertOutput(FstBuilder* b, FstSlice bs, Output in) {
fstUnFinishedNodesSetRootOutput(b->unfinished, in);
return;
}
// if (in != 0) { //if let Some(in) = in
// prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out);
//} else {
// prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs);
// out = 0;
//}
Output out;
uint64_t prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out);
......@@ -835,7 +815,7 @@ void fstBuilderInsertOutput(FstBuilder* b, FstSlice bs, Output in) {
return;
}
OrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup) {
FstOrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup) {
FstSlice* input = &bs;
if (fstSliceIsEmpty(&b->last)) {
fstSliceDestroy(&b->last);
......@@ -867,7 +847,6 @@ void fstBuilderCompileFrom(FstBuilder* b, uint64_t istate) {
fstBuilderNodeDestroy(bn);
assert(addr != NONE_ADDRESS);
// fstBuilderNodeDestroy(n);
}
fstUnFinishedNodesTopLastFreeze(b->unfinished, addr);
return;
......@@ -1044,8 +1023,6 @@ void fstDestroy(Fst* fst) {
}
bool fstGet(Fst* fst, FstSlice* b, Output* out) {
// dec lock range
// taosThreadMutexLock(&fst->mtx);
FstNode* root = fstGetRoot(fst);
Output tOut = 0;
int32_t len;
......@@ -1058,7 +1035,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) {
uint8_t inp = data[i];
Output res = 0;
if (false == fstNodeFindInput(root, inp, &res)) {
// taosThreadMutexUnlock(&fst->mtx);
return false;
}
......@@ -1069,7 +1045,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) {
taosArrayPush(nodes, &root);
}
if (!FST_NODE_IS_FINAL(root)) {
// taosThreadMutexUnlock(&fst->mtx);
return false;
} else {
tOut = tOut + FST_NODE_FINAL_OUTPUT(root);
......@@ -1080,8 +1055,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) {
fstNodeDestroy(*node);
}
taosArrayDestroy(nodes);
// fst->root = NULL;
// taosThreadMutexUnlock(&fst->mtx);
*out = tOut;
return true;
}
......@@ -1231,20 +1204,17 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) {
FstNode* node = fstGetRoot(sws->fst);
Output out = 0;
// void* autState = sws->aut->start();
void* autState = automFuncs[aut->type].start(aut);
void* autState = automFuncs[aut->type].start(aut);
int32_t len;
uint8_t* data = fstSliceData(key, &len);
for (uint32_t i = 0; i < len; i++) {
uint8_t b = data[i];
uint64_t res = 0;
bool find = fstNodeFindInput(node, b, &res);
if (find == true) {
if (fstNodeFindInput(node, b, &res)) {
FstTransition trn;
fstNodeGetTransitionAt(node, res, &trn);
void* preState = autState;
// autState = sws->aut->accept(preState, b);
autState = automFuncs[aut->type].accept(aut, preState, b);
taosArrayPush(sws->inp, &b);
......@@ -1379,14 +1349,14 @@ FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback) {
return NULL;
}
FStmStRslt* swsResultCreate(FstSlice* data, FstOutput fOut, void* state) {
FStmStRslt* swsResultCreate(FstSlice* data, FstOutput out, void* state) {
FStmStRslt* result = taosMemoryCalloc(1, sizeof(FStmStRslt));
if (result == NULL) {
return NULL;
}
result->data = fstSliceCopy(data, 0, FST_SLICE_LEN(data) - 1);
result->out = fOut;
result->out = out;
result->state = state;
return result;
}
......
......@@ -21,12 +21,12 @@ const CompiledAddr EMPTY_ADDRESS = 0;
const CompiledAddr NONE_ADDRESS = 1;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// this version. When a finite state transducer is read, its version number is
// checked against this value.
const uint64_t VERSION = 3;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
const uint64_t TRANS_INDEX_THRESHOLD = 32;
uint8_t packSize(uint64_t n) {
......@@ -52,7 +52,6 @@ uint8_t packSize(uint64_t n) {
uint64_t unpackUint64(uint8_t* ch, uint8_t sz) {
uint64_t n = 0;
for (uint8_t i = 0; i < sz; i++) {
//
n = n | (ch[i] << (8 * i));
}
return n;
......
......@@ -23,7 +23,7 @@
#include "tcoding.h"
#include "tcompare.h"
const static uint64_t tfileMagicNumber = 0xdb4775248b80fb57ull;
const static uint64_t FILE_MAGIC_NUMBER = 0xdb4775248b80fb57ull;
typedef struct TFileFstIter {
FStmBuilder* fb;
......@@ -548,9 +548,6 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
taosArraySortPWithExt((SArray*)(data), tfileValueCompare, &fn);
}
int32_t bufLimit = 64 * 4096, offset = 0;
// char* buf = taosMemoryCalloc(1, sizeof(char) * bufLimit);
// char* p = buf;
int32_t sz = taosArrayGetSize((SArray*)data);
int32_t fstOffset = tw->offset;
......@@ -564,6 +561,9 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
}
tfileWriteFstOffset(tw, fstOffset);
int32_t bufCap = 8 * 1024;
char* buf = taosMemoryCalloc(1, bufCap);
for (size_t i = 0; i < sz; i++) {
TFileValue* v = taosArrayGetP((SArray*)data, i);
......@@ -571,14 +571,18 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
// check buf has enough space or not
int32_t ttsz = TF_TABLE_TATOAL_SIZE(tbsz);
char* buf = taosMemoryCalloc(1, ttsz * sizeof(char));
if (bufCap < ttsz) {
bufCap = ttsz;
buf = taosMemoryRealloc(buf, bufCap);
}
char* p = buf;
tfileSerialTableIdsToBuf(p, v->tableId);
tw->ctx->write(tw->ctx, buf, ttsz);
v->offset = tw->offset;
tw->offset += ttsz;
taosMemoryFree(buf);
memset(buf, 0, sizeof(buf));
}
taosMemoryFree(buf);
tw->fb = fstBuilderCreate(tw->ctx, 0);
if (tw->fb == NULL) {
......@@ -869,13 +873,13 @@ static int tfileWriteData(TFileWriter* write, TFileValue* tval) {
//}
}
static int tfileWriteFooter(TFileWriter* write) {
char buf[sizeof(tfileMagicNumber) + 1] = {0};
char buf[sizeof(FILE_MAGIC_NUMBER) + 1] = {0};
void* pBuf = (void*)buf;
taosEncodeFixedU64((void**)(void*)&pBuf, tfileMagicNumber);
taosEncodeFixedU64((void**)(void*)&pBuf, FILE_MAGIC_NUMBER);
int nwrite = write->ctx->write(write->ctx, buf, (int32_t)strlen(buf));
indexInfo("tfile write footer size: %d", write->ctx->size(write->ctx));
assert(nwrite == sizeof(tfileMagicNumber));
assert(nwrite == sizeof(FILE_MAGIC_NUMBER));
return nwrite;
}
static int tfileReaderLoadHeader(TFileReader* reader) {
......@@ -899,7 +903,7 @@ static int tfileReaderLoadFst(TFileReader* reader) {
int size = ctx->size(ctx);
// current load fst into memory, refactor it later
int fstSize = size - reader->header.fstOffset - sizeof(tfileMagicNumber);
int fstSize = size - reader->header.fstOffset - sizeof(FILE_MAGIC_NUMBER);
char* buf = taosMemoryCalloc(1, fstSize);
if (buf == NULL) {
return -1;
......@@ -959,9 +963,8 @@ static int tfileReaderVerify(TFileReader* reader) {
IFileCtx* ctx = reader->ctx;
uint64_t tMagicNumber = 0;
char buf[sizeof(tMagicNumber) + 1] = {0};
int size = ctx->size(ctx);
char buf[sizeof(tMagicNumber) + 1] = {0};
int size = ctx->size(ctx);
if (size < sizeof(tMagicNumber) || size <= sizeof(reader->header)) {
return -1;
......@@ -970,25 +973,25 @@ static int tfileReaderVerify(TFileReader* reader) {
}
taosDecodeFixedU64(buf, &tMagicNumber);
return tMagicNumber == tfileMagicNumber ? 0 : -1;
return tMagicNumber == FILE_MAGIC_NUMBER ? 0 : -1;
}
void tfileReaderRef(TFileReader* reader) {
if (reader == NULL) {
void tfileReaderRef(TFileReader* rd) {
if (rd == NULL) {
return;
}
int ref = T_REF_INC(reader);
int ref = T_REF_INC(rd);
UNUSED(ref);
}
void tfileReaderUnRef(TFileReader* reader) {
if (reader == NULL) {
void tfileReaderUnRef(TFileReader* rd) {
if (rd == NULL) {
return;
}
int ref = T_REF_DEC(reader);
int ref = T_REF_DEC(rd);
if (ref == 0) {
// do nothing
tfileReaderDestroy(reader);
tfileReaderDestroy(rd);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册