提交 d56dbb15 编写于 作者: dengyihao's avatar dengyihao

add fst_registry

上级 09f3e8e1
......@@ -32,12 +32,6 @@ typedef struct FstRange {
} FstRange;
typedef struct FstBuilderNode {
bool isFinal;
Output finalOutput;
SArray *trans; // <FstTransition>
} FstBuilderNode;
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
typedef enum { Included, Excluded, Unbounded} FstBound;
......@@ -82,14 +76,6 @@ typedef struct FstBuilder {
} FstBuilder;
typedef struct FstTransition {
uint8_t inp; //The byte input associated with this transition.
Output out; //The output associated with this transition
CompiledAddr addr; //The address of the node that this transition points to
} FstTransition;
typedef struct FstTransitions {
FstNode *node;
FstRange range;
......@@ -172,10 +158,6 @@ typedef struct FstIndexedValue {
} FstIndexedValue;
typedef struct FstRegistryCell {
CompiledAddr addr;
FstBuilderNode *node;
} FstRegistryCell;
......
......@@ -16,7 +16,21 @@
#ifndef __INDEX_FST_NODE_H__
#define __INDEX_FST_NODE_H__
#include "index_fst_util.h"
typedef struct FstTransition {
uint8_t inp; //The byte input associated with this transition.
Output out; //The output associated with this transition
CompiledAddr addr; //The address of the node that this transition points to
} FstTransition;
typedef struct FstBuilderNode {
bool isFinal;
Output finalOutput;
SArray *trans; // <FstTransition>
} FstBuilderNode;
FstBuilderNode *fstBuilderNodeDefault();
#endif
......@@ -16,9 +16,43 @@
#define __FST_REGISTRY_H__
#include "index_fst_util.h"
#include "tarray.h"
#include "index_fst_node.h"
typedef struct FstRegistryCell {
CompiledAddr addr;
FstBuilderNode *node;
} FstRegistryCell;
typedef struct FstRegistryCache {
SArray *cells;
uint32_t start;
uint32_t end;
} FstRegistryCache;
typedef enum {FOUND, NOTFOUND, REJECTED} FstRegistryEntryState;
typedef struct FstRegistryEntry {
FstRegistryEntryState state;
CompiledAddr addr;
FstRegistryCell *cell;
} FstRegistryEntry;
// Registry relation function
typedef struct FstRegistry {
SArray *table;
uint64_t tableSize; // num of rows
uint64_t mruSize; // num of columns
} FstRegistry;
//
FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize);
FstRegistryEntry* fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode);
uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *node);
#endif
......@@ -12,4 +12,13 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_node.h"
FstBuilderNode *fstBuilderNodeDefault() {
FstBuilderNode *bn = malloc(sizeof(FstBuilderNode));
bn->isFinal = false;
bn->finalOutput = 0;
bn->trans = NULL;
return bn;
}
......@@ -15,3 +15,150 @@
#include "index_fst_registry.h"
static void fstRegistryCellSwap(SArray *arr, uint32_t a, uint32_t b) {
size_t sz = taosArrayGetSize(arr);
if (a >= sz || b >= sz) { return; }
FstRegistryCell *cell1 = (FstRegistryCell *)taosArrayGet(arr, a);
FstRegistryCell *cell2 = (FstRegistryCell *)taosArrayGet(arr, b);
FstRegistryCell t = {.addr = cell1->addr, .node = cell1->node};
cell1->addr = cell2->addr;
cell1->node = cell2->node;
cell2->addr = t.addr;
cell2->node = t.node;
return;
}
static void fstRegistryCellPromote(SArray *arr, uint32_t start, uint32_t end) {
size_t sz = taosArrayGetSize(arr);
if (start >= sz && end >= sz) {return; }
assert(start >= end);
int32_t s = (int32_t)start;
int32_t e = (int32_t)end;
while(s > e) {
fstRegistryCellSwap(arr, s - 1, s);
s -= 1;
}
}
FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize) {
FstRegistry *registry = malloc(sizeof(FstRegistry));
if (registry == NULL) { return NULL ;}
uint64_t nCells = tableSize * mruSize;
SArray* tb = (SArray *)taosArrayInit(nCells, sizeof(FstRegistryCell));
for (uint64_t i = 0; i < nCells; i++) {
FstRegistryCell *cell = taosArrayGet(tb, i);
cell->addr = NONE_ADDRESS;
cell->node = fstBuilderNodeDefault();
}
registry->table = tb;
registry->tableSize = tableSize;
registry->mruSize = mruSize;
return registry;
}
FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode) {
if (taosArrayGetSize(registry->table) <= 0) {
return NULL;
}
uint64_t bucket = fstRegistryHash(registry, bNode);
uint64_t start = registry->mruSize * bucket;
uint64_t end = start + registry->mruSize;
FstRegistryEntry *entry = malloc(sizeof(FstRegistryEntry));
if (end - start == 1) {
FstRegistryCell *cell = taosArrayGet(registry->table, start);
//cell->isNode &&
if (cell->addr != NONE_ADDRESS && cell->node == bNode) {
entry->state = FOUND;
entry->addr = cell->addr ;
return entry;
} else {
// clone from bNode, refactor later
cell->node->isFinal = bNode->isFinal;
cell->node->finalOutput = bNode->finalOutput;
cell->node->trans = bNode->trans;
bNode->trans = NULL;
entry->state = NOTFOUND;
entry->cell = cell; // copy or not
}
} else if (end - start == 2) {
FstRegistryCell *cell1 = taosArrayGet(registry->table, start);
if (cell1->addr != NONE_ADDRESS && cell1->node == bNode) {
entry->state = FOUND;
entry->addr = cell1->addr;
return entry;
}
FstRegistryCell *cell2 = taosArrayGet(registry->table, start + 1);
if (cell2->addr != NONE_ADDRESS && cell2->node == bNode) {
entry->state = FOUND;
entry->addr = cell2->addr;
// must swap here
fstRegistryCellSwap(registry->table, start, start + 1);
return entry;
}
//clone from bNode, refactor later
cell1->node->isFinal = bNode->isFinal;
cell1->node->finalOutput = bNode->finalOutput;
cell1->node->trans = bNode->trans;
bNode->trans = NULL;
fstRegistryCellSwap(registry->table, start, start + 1);
FstRegistryCell *cCell = taosArrayGet(registry->table, start);
entry->state = NOTFOUND;
entry->cell = cCell;
} else {
uint32_t i = start;
for (; i < end; i++) {
FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, i);
if (cell->addr != NONE_ADDRESS && cell->node == bNode) {
entry->state = FOUND;
entry->addr = cell->addr;
fstRegistryCellPromote(registry->table, i, start);
break;
}
}
if (i >= end) {
uint64_t last = end - 1;
FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, last);
//clone from bNode, refactor later
cell->node->isFinal = bNode->isFinal;
cell->node->finalOutput = bNode->finalOutput;
cell->node->trans = bNode->trans;
bNode->trans = NULL;
fstRegistryCellPromote(registry->table, last, start);
FstRegistryCell *cCell = taosArrayGet(registry->table, start);
entry->state = NOTFOUND;
entry->cell = cCell;
}
}
return entry;
}
uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *bNode) {
//TODO(yihaoDeng): refactor later
const uint64_t FNV_PRIME = 1099511628211;
uint64_t h = 14695981039346656037u;
h = (h ^ (uint64_t)bNode->isFinal) * FNV_PRIME;
h = (h ^ (bNode)->finalOutput) * FNV_PRIME;
uint32_t sz = (uint32_t)taosArrayGetSize(bNode->trans);
for (uint32_t i = 0; i < sz; i++) {
FstTransition *trn = taosArrayGet(bNode->trans, i);
h = (h ^ (uint64_t)(trn->inp)) * FNV_PRIME;
h = (h ^ (uint64_t)(trn->out)) * FNV_PRIME;
h = (h ^ (uint64_t)(trn->addr))* FNV_PRIME;
}
return h %(registry->tableSize);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册