From d56dbb15ff326f62ffde6ae54210397daced9dbe Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sun, 21 Nov 2021 18:39:35 +0800 Subject: [PATCH] add fst_registry --- source/libs/index/inc/index_fst.h | 18 --- source/libs/index/inc/index_fst_node.h | 14 ++ source/libs/index/inc/index_fst_registry.h | 36 ++++- source/libs/index/src/index_fst_node.c | 9 ++ source/libs/index/src/index_fst_registry.c | 147 +++++++++++++++++++++ 5 files changed, 205 insertions(+), 19 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 61c857ed74..1230fe17ff 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -32,12 +32,6 @@ typedef struct FstRange { } FstRange; -typedef struct FstBuilderNode { - bool isFinal; - Output finalOutput; - SArray *trans; // -} FstBuilderNode; - typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State; typedef enum { Included, Excluded, Unbounded} FstBound; @@ -82,14 +76,6 @@ typedef struct FstBuilder { } FstBuilder; - - -typedef struct FstTransition { - uint8_t inp; //The byte input associated with this transition. - Output out; //The output associated with this transition - CompiledAddr addr; //The address of the node that this transition points to -} FstTransition; - typedef struct FstTransitions { FstNode *node; FstRange range; @@ -172,10 +158,6 @@ typedef struct FstIndexedValue { } FstIndexedValue; -typedef struct FstRegistryCell { - CompiledAddr addr; - FstBuilderNode *node; -} FstRegistryCell; diff --git a/source/libs/index/inc/index_fst_node.h b/source/libs/index/inc/index_fst_node.h index ba2d2ccd02..3eec97e3d8 100644 --- a/source/libs/index/inc/index_fst_node.h +++ b/source/libs/index/inc/index_fst_node.h @@ -16,7 +16,21 @@ #ifndef __INDEX_FST_NODE_H__ #define __INDEX_FST_NODE_H__ +#include "index_fst_util.h" +typedef struct FstTransition { + uint8_t inp; //The byte input associated with this transition. + Output out; //The output associated with this transition + CompiledAddr addr; //The address of the node that this transition points to +} FstTransition; + +typedef struct FstBuilderNode { + bool isFinal; + Output finalOutput; + SArray *trans; // +} FstBuilderNode; + +FstBuilderNode *fstBuilderNodeDefault(); #endif diff --git a/source/libs/index/inc/index_fst_registry.h b/source/libs/index/inc/index_fst_registry.h index 6dcb236f29..80c0194f00 100644 --- a/source/libs/index/inc/index_fst_registry.h +++ b/source/libs/index/inc/index_fst_registry.h @@ -16,9 +16,43 @@ #define __FST_REGISTRY_H__ #include "index_fst_util.h" +#include "tarray.h" +#include "index_fst_node.h" +typedef struct FstRegistryCell { + CompiledAddr addr; + FstBuilderNode *node; +} FstRegistryCell; + + +typedef struct FstRegistryCache { + SArray *cells; + uint32_t start; + uint32_t end; +} FstRegistryCache; + +typedef enum {FOUND, NOTFOUND, REJECTED} FstRegistryEntryState; + +typedef struct FstRegistryEntry { + FstRegistryEntryState state; + CompiledAddr addr; + FstRegistryCell *cell; +} FstRegistryEntry; + + + +// Registry relation function typedef struct FstRegistry { - + SArray *table; + uint64_t tableSize; // num of rows + uint64_t mruSize; // num of columns } FstRegistry; + +// +FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize); + +FstRegistryEntry* fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode); + +uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *node); #endif diff --git a/source/libs/index/src/index_fst_node.c b/source/libs/index/src/index_fst_node.c index 3d5efd30f3..3e8e7c12a2 100644 --- a/source/libs/index/src/index_fst_node.c +++ b/source/libs/index/src/index_fst_node.c @@ -12,4 +12,13 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ +#include "index_fst_node.h" + +FstBuilderNode *fstBuilderNodeDefault() { + FstBuilderNode *bn = malloc(sizeof(FstBuilderNode)); + bn->isFinal = false; + bn->finalOutput = 0; + bn->trans = NULL; + return bn; +} diff --git a/source/libs/index/src/index_fst_registry.c b/source/libs/index/src/index_fst_registry.c index 940c5863f4..7b4ef9da19 100644 --- a/source/libs/index/src/index_fst_registry.c +++ b/source/libs/index/src/index_fst_registry.c @@ -15,3 +15,150 @@ #include "index_fst_registry.h" + +static void fstRegistryCellSwap(SArray *arr, uint32_t a, uint32_t b) { + size_t sz = taosArrayGetSize(arr); + if (a >= sz || b >= sz) { return; } + + FstRegistryCell *cell1 = (FstRegistryCell *)taosArrayGet(arr, a); + FstRegistryCell *cell2 = (FstRegistryCell *)taosArrayGet(arr, b); + + FstRegistryCell t = {.addr = cell1->addr, .node = cell1->node}; + + cell1->addr = cell2->addr; + cell1->node = cell2->node; + + cell2->addr = t.addr; + cell2->node = t.node; + return; +} + +static void fstRegistryCellPromote(SArray *arr, uint32_t start, uint32_t end) { + size_t sz = taosArrayGetSize(arr); + if (start >= sz && end >= sz) {return; } + + assert(start >= end); + + int32_t s = (int32_t)start; + int32_t e = (int32_t)end; + while(s > e) { + fstRegistryCellSwap(arr, s - 1, s); + s -= 1; + } +} +FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize) { + FstRegistry *registry = malloc(sizeof(FstRegistry)); + if (registry == NULL) { return NULL ;} + + uint64_t nCells = tableSize * mruSize; + SArray* tb = (SArray *)taosArrayInit(nCells, sizeof(FstRegistryCell)); + for (uint64_t i = 0; i < nCells; i++) { + FstRegistryCell *cell = taosArrayGet(tb, i); + cell->addr = NONE_ADDRESS; + cell->node = fstBuilderNodeDefault(); + } + + registry->table = tb; + registry->tableSize = tableSize; + registry->mruSize = mruSize; + return registry; +} + +FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode) { + if (taosArrayGetSize(registry->table) <= 0) { + return NULL; + } + uint64_t bucket = fstRegistryHash(registry, bNode); + uint64_t start = registry->mruSize * bucket; + uint64_t end = start + registry->mruSize; + + FstRegistryEntry *entry = malloc(sizeof(FstRegistryEntry)); + if (end - start == 1) { + FstRegistryCell *cell = taosArrayGet(registry->table, start); + //cell->isNode && + if (cell->addr != NONE_ADDRESS && cell->node == bNode) { + entry->state = FOUND; + entry->addr = cell->addr ; + return entry; + } else { + // clone from bNode, refactor later + cell->node->isFinal = bNode->isFinal; + cell->node->finalOutput = bNode->finalOutput; + cell->node->trans = bNode->trans; + bNode->trans = NULL; + + entry->state = NOTFOUND; + entry->cell = cell; // copy or not + } + } else if (end - start == 2) { + FstRegistryCell *cell1 = taosArrayGet(registry->table, start); + if (cell1->addr != NONE_ADDRESS && cell1->node == bNode) { + entry->state = FOUND; + entry->addr = cell1->addr; + return entry; + } + FstRegistryCell *cell2 = taosArrayGet(registry->table, start + 1); + if (cell2->addr != NONE_ADDRESS && cell2->node == bNode) { + entry->state = FOUND; + entry->addr = cell2->addr; + // must swap here + fstRegistryCellSwap(registry->table, start, start + 1); + return entry; + } + //clone from bNode, refactor later + cell1->node->isFinal = bNode->isFinal; + cell1->node->finalOutput = bNode->finalOutput; + cell1->node->trans = bNode->trans; + bNode->trans = NULL; + + fstRegistryCellSwap(registry->table, start, start + 1); + FstRegistryCell *cCell = taosArrayGet(registry->table, start); + entry->state = NOTFOUND; + entry->cell = cCell; + } else { + uint32_t i = start; + for (; i < end; i++) { + FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, i); + if (cell->addr != NONE_ADDRESS && cell->node == bNode) { + entry->state = FOUND; + entry->addr = cell->addr; + fstRegistryCellPromote(registry->table, i, start); + break; + } + } + if (i >= end) { + uint64_t last = end - 1; + FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, last); + //clone from bNode, refactor later + cell->node->isFinal = bNode->isFinal; + cell->node->finalOutput = bNode->finalOutput; + cell->node->trans = bNode->trans; + bNode->trans = NULL; + + fstRegistryCellPromote(registry->table, last, start); + FstRegistryCell *cCell = taosArrayGet(registry->table, start); + entry->state = NOTFOUND; + entry->cell = cCell; + } + } + return entry; +} + +uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *bNode) { + //TODO(yihaoDeng): refactor later + const uint64_t FNV_PRIME = 1099511628211; + uint64_t h = 14695981039346656037u; + + h = (h ^ (uint64_t)bNode->isFinal) * FNV_PRIME; + h = (h ^ (bNode)->finalOutput) * FNV_PRIME; + + uint32_t sz = (uint32_t)taosArrayGetSize(bNode->trans); + for (uint32_t i = 0; i < sz; i++) { + FstTransition *trn = taosArrayGet(bNode->trans, i); + h = (h ^ (uint64_t)(trn->inp)) * FNV_PRIME; + h = (h ^ (uint64_t)(trn->out)) * FNV_PRIME; + h = (h ^ (uint64_t)(trn->addr))* FNV_PRIME; + } + return h %(registry->tableSize); +} + -- GitLab