diff --git a/source/libs/index/inc/indexFstSparse.h b/source/libs/index/inc/indexFstSparse.h index 665fb2ba5cb9c0c426e9f3bf345351deed1d4abb..bd704fb427002424e6d4635fa3fef1366bdf9fb9 100644 --- a/source/libs/index/inc/indexFstSparse.h +++ b/source/libs/index/inc/indexFstSparse.h @@ -23,17 +23,18 @@ extern "C" { #endif typedef struct FstSparseSet { - uint32_t *dense; - uint32_t *sparse; - int32_t size; + int32_t *dense; + int32_t *sparse; + int32_t size; + int32_t cap; } FstSparseSet; FstSparseSet *sparSetCreate(int32_t sz); void sparSetDestroy(FstSparseSet *s); uint32_t sparSetLen(FstSparseSet *ss); -uint32_t sparSetAdd(FstSparseSet *ss, uint32_t ip); -uint32_t sparSetGet(FstSparseSet *ss, uint32_t i); -bool sparSetContains(FstSparseSet *ss, uint32_t ip); +bool sparSetAdd(FstSparseSet *ss, int32_t ip, int32_t *val); +bool sparSetGet(FstSparseSet *ss, int32_t i, int32_t *val); +bool sparSetContains(FstSparseSet *ss, int32_t ip); void sparSetClear(FstSparseSet *ss); #ifdef __cplusplus diff --git a/source/libs/index/src/indexFstDfa.c b/source/libs/index/src/indexFstDfa.c index b820f16a2a00f3ca13453bcc61034e840b79ccb2..275580ebdc239c97bff13c90778134e64f8ad25b 100644 --- a/source/libs/index/src/indexFstDfa.c +++ b/source/libs/index/src/indexFstDfa.c @@ -105,8 +105,9 @@ bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet sparSetClear(cur); DfaState *t = taosArrayGet(builder->dfa->states, state); for (int i = 0; i < taosArrayGetSize(t->insts); i++) { - uint32_t ip = *(int32_t *)taosArrayGet(t->insts, i); - sparSetAdd(cur, ip); + int32_t ip = *(int32_t *)taosArrayGet(t->insts, i); + bool succ = sparSetAdd(cur, ip, NULL); + assert(succ == true); } dfaRun(builder->dfa, cur, next, byte); @@ -126,7 +127,9 @@ bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t * bool isMatch = false; for (int i = 0; i < sparSetLen(set); i++) { - uint32_t ip = sparSetGet(set, i); + int32_t ip; + + if (false == sparSetGet(set, i, &ip)) continue; Inst *inst = taosArrayGet(builder->dfa->insts, ip); if (inst->ty == JUMP || inst->ty == SPLIT) { @@ -186,7 +189,8 @@ void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip) { if (sparSetContains(set, ip)) { return; } - sparSetAdd(set, ip); + bool succ = sparSetAdd(set, ip, NULL); + assert(succ == true); Inst *inst = taosArrayGet(dfa->insts, ip); if (inst->ty == MATCH || inst->ty == RANGE) { // do nothing @@ -203,7 +207,8 @@ bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte) { bool isMatch = false; sparSetClear(to); for (int i = 0; i < sparSetLen(from); i++) { - uint32_t ip = sparSetGet(from, i); + int32_t ip; + if (false == sparSetGet(from, i, &ip)) continue; Inst *inst = taosArrayGet(dfa->insts, ip); if (inst->ty == JUMP || inst->ty == SPLIT) { diff --git a/source/libs/index/src/indexFstSparse.c b/source/libs/index/src/indexFstSparse.c index 71d8854dcc2a916240ea4f589c311e107c2b8c09..99ed5d6429c46e2d5b7fddd02c4be771345c9278 100644 --- a/source/libs/index/src/indexFstSparse.c +++ b/source/libs/index/src/indexFstSparse.c @@ -21,8 +21,12 @@ FstSparseSet *sparSetCreate(int32_t sz) { return NULL; } - ss->dense = (uint32_t *)taosMemoryCalloc(sz, sizeof(uint32_t)); - ss->sparse = (uint32_t *)taosMemoryCalloc(sz, sizeof(uint32_t)); + ss->dense = (int32_t *)taosMemoryMalloc(sz * sizeof(int32_t)); + memset(ss->dense, -1, sz * sizeof(int32_t)); + ss->sparse = (int32_t *)taosMemoryMalloc(sz * sizeof(int32_t)); + memset(ss->sparse, -1, sz * sizeof(int32_t)); + ss->cap = sz; + ss->size = 0; return ss; } @@ -38,23 +42,39 @@ uint32_t sparSetLen(FstSparseSet *ss) { // Get occupied size return ss == NULL ? 0 : ss->size; } -uint32_t sparSetAdd(FstSparseSet *ss, uint32_t ip) { +bool sparSetAdd(FstSparseSet *ss, int32_t ip, int32_t *idx) { if (ss == NULL) { - return 0; + return false; + } + if (ip >= ss->cap) { + return false; } uint32_t i = ss->size; ss->dense[i] = ip; ss->sparse[ip] = i; ss->size += 1; - return i; + + if (idx != NULL) *idx = i; + + return true; } -uint32_t sparSetGet(FstSparseSet *ss, uint32_t i) { - // check later - return ss->dense[i]; +bool sparSetGet(FstSparseSet *ss, int32_t idx, int32_t *ip) { + if (idx >= ss->cap || idx >= ss->size) { + return false; + } + int32_t val = ss->dense[idx]; + if (ip != NULL) { + *ip = val; + } + return val == -1 ? false : true; } -bool sparSetContains(FstSparseSet *ss, uint32_t ip) { - uint32_t i = ss->sparse[ip]; - if (i < ss->size && ss->dense[i] == ip) { +bool sparSetContains(FstSparseSet *ss, int32_t ip) { + if (ip >= ss->cap) { + return false; + } + int32_t i = ss->sparse[ip]; + + if (i < ss->cap && i < ss->size && ss->dense[i] == ip) { return true; } else { return false; @@ -64,5 +84,7 @@ void sparSetClear(FstSparseSet *ss) { if (ss == NULL) { return; } + memset(ss->dense, -1, ss->cap * sizeof(int32_t)); + memset(ss->sparse, -1, ss->cap * sizeof(int32_t)); ss->size = 0; } diff --git a/source/libs/index/test/fstUtilUT.cc b/source/libs/index/test/fstUtilUT.cc index 2c29758756704bafc0e650b04771af17bf57199e..22fe1a91503ab8799f7322c15b002e8a8ffdceed 100644 --- a/source/libs/index/test/fstUtilUT.cc +++ b/source/libs/index/test/fstUtilUT.cc @@ -51,10 +51,18 @@ class FstSparseSetEnv : public ::testing::Test { }; // test FstDfaBuilder -TEST_F(FstUtilEnv, test1) {} -TEST_F(FstUtilEnv, test2) {} -TEST_F(FstUtilEnv, test3) {} -TEST_F(FstUtilEnv, test4) {} +TEST_F(FstUtilEnv, test1) { + // test +} +TEST_F(FstUtilEnv, test2) { + // test +} +TEST_F(FstUtilEnv, test3) { + // test +} +TEST_F(FstUtilEnv, test4) { + // test +} // test FstRegex @@ -64,7 +72,38 @@ TEST_F(FstRegexEnv, test3) {} TEST_F(FstRegexEnv, test4) {} // test FstSparseSet -TEST_F(FstSparseSetEnv, test1) {} -TEST_F(FstSparseSetEnv, test2) {} -TEST_F(FstSparseSetEnv, test3) {} -TEST_F(FstSparseSetEnv, test4) {} +TEST_F(FstSparseSetEnv, test1) { + for (int8_t i = 0; i < 20; i++) { + int32_t val = -1; + bool succ = sparSetAdd(set, 'a' + i, &val); + } + EXPECT_EQ(sparSetLen(set), 20); + for (int8_t i = 0; i < 20; i++) { + int val = -1; + bool find = sparSetGet(set, i, &val); + EXPECT_EQ(find, true); + EXPECT_EQ(val, i + 'a'); + } + for (int8_t i = 'a'; i < 'a' + 20; i++) { + EXPECT_EQ(sparSetContains(set, i), true); + } + + for (int8_t i = 'A'; i < 20; i++) { + EXPECT_EQ(sparSetContains(set, 'A'), false); + } + + for (int i = 512; i < 1000; i++) { + EXPECT_EQ(sparSetAdd(set, i, NULL), false); + + EXPECT_EQ(sparSetGet(set, i, NULL), false); + EXPECT_EQ(sparSetContains(set, i), false); + } + sparSetClear(set); + + for (int i = 'a'; i < 'a' + 20; i++) { + EXPECT_EQ(sparSetGet(set, i, NULL), false); + } + for (int i = 1000; i < 2000; i++) { + EXPECT_EQ(sparSetGet(set, i, NULL), false); + } +}