提交 60e339b3 编写于 作者: dengyihao's avatar dengyihao

fst core struct

上级 980ace09
......@@ -13,58 +13,73 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef _INDEX_FST_H_
#define _INDEX_FST_H_
#include "index_fst.h"
#include "tarray.h"
#ifndef __INDEX_FST_H__
#define __INDEX_FST_H__
typedef FstType uint64_t;
typedef CompiledAddr uint64_t;
typedef Output uint64_t;
typedef PackSizes uint8_t;
#include "tarray.h"
#include "index_fst_util.h"
#include "index_fst_registry.h"
//A sentinel value used to indicate an empty final state
const CompileAddr EMPTY_ADDRESS = 0;
/// A sentinel value used to indicate an invalid state.
const CompileAddr NONE_ADDRESS = 1;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
const uint64_t version = 3;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
typedef struct FstNode FstNode;
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
const uint64_t TRANS_INDEX_THRESHOLD = 32;
typedef struct FstRange {
uint64_t start;
uint64_t end;
} FstRange;
enum State { OneTransNext, OneTrans, AnyTrans, EmptyFinal};
enum FstBound { Included, Excluded, Unbounded};
typedef struct CheckSummer {
uint32_t sum;
typedef struct FstBuilderNode {
bool isFinal;
Output finalOutput;
SArray *trans; // <FstTransition>
} FstBuilderNode;
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
typedef enum { Included, Excluded, Unbounded} FstBound;
typedef uint32_t CheckSummer;
typedef struct FstBuilder {
FstCountingWriter wtr; // The FST raw data is written directly to `wtr`.
FstUnFinishedNodes unfinished // The stack of unfinished nodes
Registry registry // A map of finished nodes.
SArray* last // The last word added
CompiledAddr lastAddr // The address of the last compiled node
uint64_t len // num of keys added
} FstBuilder;
* UnFinished node and helper function
* TODO: simple function name
typedef struct FstUnFinishedNodes {
SArray *stack; // <FstBuilderNodeUnfinished> } FstUnFinishedNodes;
} FstUnFinishedNodes;
#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
FstUnFinishedNodes *FstUnFinishedNodesCreate();
void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal);
FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes);
FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr);
FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes);
void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out);
void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr);
void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out);
uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs);
uint64_t FstUnFinishedNodesFindCommPreifxAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out);
typedef struct FstCountingWriter {
void* wtr; // wrap any writer that counts and checksum bytes written
uint64_t count;
CheckSummer summer;
} FstCountingWriter;
typedef struct FstBuilder {
FstCountingWriter wtr; // The FST raw data is written directly to `wtr`.
FstUnFinishedNodes *unfinished; // The stack of unfinished nodes
FstRegistry registry; // A map of finished nodes.
SArray* last; // The last word added
CompiledAddr lastAddr; // The address of the last compiled node
uint64_t len; // num of keys added
} FstBuilder;
......@@ -80,16 +95,6 @@ typedef struct FstTransitions {
FstRange range;
} FstTransitions;
typedef struct FstUnFinishedNodes {
SArray *stack; // <FstBuilderNodeUnfinished>
} FstUnFinishedNodes;
typedef struct FstBuilderNode {
bool isFinal;
Output finalOutput;
SArray *trans; // <FstTransition>
} FstBuilderNode;
typedef struct FstLastTransition {
......@@ -97,13 +102,23 @@ typedef struct FstLastTransition {
Output out;
} FstLastTransition;
* FstBuilderNodeUnfinished and helper function
* TODO: simple function name
typedef struct FstBuilderNodeUnfinished {
FstBuilderNode node;
FstLastTransition last;
FstBuilderNode *node;
FstLastTransition* last;
} FstBuilderNodeUnfinished;
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr);
* FstNode and helper function
typedef struct FstNode {
uint8_t* data;
FstSlice data;
uint64_t version;
State state;
CompiledAddr start;
......@@ -114,6 +129,28 @@ typedef struct FstNode {
Output finalOutput;
} FstNode;
// If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
#define FST_NODE_IS_FINAL(node) node->isFinal
// Returns the number of transitions in this node, The maximum number of transitions is 256.
#define FST_NODE_LEN(node) node->nTrans
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
FstTransitions fstNodeTransitionIter(FstNode *node);
FstTransitions* fstNodeTransitions(FstNode *node);
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res);
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res);
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res);
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode);
FstSlice fstNodeAsSlice(FstNode *node);
typedef struct FstMeta {
uint64_t version;
CompiledAddr rootAddr;
......@@ -125,42 +162,21 @@ typedef struct FstMeta {
typedef struct Fst {
FstMeta meta;
void *data; //
} Fst;
// ops
// ops
typedef struct FstIndexedValue {
uint64_t index;
uint64_t value;
// relate to Regist
typedef struct FstRegistry {
SArray *table; // <Registtry cell>
uint64_t tableSize; // num of rows
uint64_t mruSize; // num of columns
} FstRegistry;
} FstIndexedValue;
typedef struct FstRegistryCache {
SArray *cells; // <RegistryCell>
} FstRegistryCache;
typedef struct FstRegistryCell {
CompiledAddr addr;
FstBuilderNode *node;
} FstRegistryCell;
enum FstRegistryEntry {Found, NotFound, Rejected};
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, uint8_t *data);
FstTransitions fstNodeTransitionIter(FstNode *node);
FstTransition fstNodeGetTransitionAt(FstNode *node, uint64_t i);
CompiledAddr fstNodeGetTransitionAddr(FstNode *node, uint64_t i);
int64_t fstNodeFindInput(FstNode *node, int8_t b);
Output fstNodeGetFinalOutput(FstNode *node);
void* fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledArr addr, FstBuilderNode *builderNode);
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
struct AutomationCtx;
typedef struct StartWith {
AutomationCtx *autoSelf;
} StartWith;
typedef struct Complement {
AutomationCtx *autoSelf;
} Complement;
// automation
typedef struct AutomationCtx {
void *data;
} AutomationCtx;
// automation interface
void (*start)(AutomationCtx *ctx);
bool (*isMatch)(AutomationCtx *ctx);
bool (*canMatch)(AutomationCtx *ctx, void *data);
bool (*willAlwaysMatch)(AutomationCtx *ctx, void *state);
void* (*accpet)(AutomationCtx *ctx, void *state, uint8_t byte);
void* (*accpetEof)(AutomationCtx *ctx, *state);
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef __INDEX_FST_NODE_H__
#define __INDEX_FST_NODE_H__
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef __FST_REGISTRY_H__
#define __FST_REGISTRY_H__
#include "index_fst_util.h"
typedef struct FstRegistry {
} FstRegistry;
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef __INDEX_FST_UTIL_H__
#define __INDEX_FST_UTIL_H__
#include "tarray.h"
typedef uint64_t FstType;
typedef uint64_t CompiledAddr;
typedef uint64_t Output;
typedef uint8_t PackSizes;
//A sentinel value used to indicate an empty final state
extern const CompiledAddr EMPTY_ADDRESS;
/// A sentinel value used to indicate an invalid state.
extern const CompiledAddr NONE_ADDRESS;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
extern const uint64_t version;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
extern const uint64_t TRANS_INDEX_THRESHOLD;
// high 4 bits is transition address packed size.
// low 4 bits is output value packed size.
// `0` is a legal value which means there are no transitions/outputs
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
#define COMMON_INDEX(v, max, val) do { \
val = ((uint16_t)COMMON_INPUTS[v] + 1)%256; \
val = val > max ? 0: val; \
} while(0)
//uint8_t commonInput(uint8_t idx);
//uint8_t commonIdx(uint8_t v, uint8_t max);
uint8_t packSize(uint64_t n);
uint64_t unpackUint64(uint8_t *ch, uint8_t sz);
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr);
CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr);
typedef struct FstSlice {
uint8_t *data;
uint64_t dLen;
uint32_t start;
uint32_t end;
} FstSlice;
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end);
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen);
bool fstSliceEmpty(FstSlice *slice);
......@@ -15,13 +15,143 @@
#include "index_fst.h"
FstUnFinishedNodes *fstUnFinishedNodesCreate() {
FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes));
if (nodes == NULL) { return NULL; }
nodes->stack = (SArray *)taosArrayInit(64, sizeof(FstBuilderNodeUnfinished));
fstUnFinishedNodesPushEmpty(nodes, false);
return nodes;
void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal) {
FstBuilderNode *node = malloc(sizeof(FstBuilderNode));
node->isFinal = isFinal;
node->finalOutput = 0;
node->trans = NULL;
FstBuilderNodeUnfinished un = {.node = node, .last = NULL};
taosArrayPush(nodes->stack, &un);
FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes) {
assert(taosArrayGetSize(nodes->stack) == 1);
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
assert(un->last == NULL);
return un->node;
FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) {
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
fstBuilderNodeUnfinishedLastCompiled(un, addr);
free(un->last); // TODO add func FstLastTransitionFree()
return un->node;
FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes) {
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
assert(un->last == NULL);
return un->node;
void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *nodes, Output out) {
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, 0);
un->node->isFinal = true;
un->node->finalOutput = out;
//un->node->trans = NULL;
void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) {
size_t sz = taosArrayGetSize(nodes->stack) - 1;
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
fstBuilderNodeUnfinishedLastCompiled(un, addr);
void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output out) {
FstSlice *s = &bs;
if (s->data == NULL || s->dLen == 0 || s->start > s->end) {
size_t sz = taosArrayGetSize(nodes->stack) - 1;
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
assert(un->last == NULL);
FstLastTransition *trn = malloc(sizeof(FstLastTransition));
trn->inp = s->data[s->start];
trn->out = out;
un->last = trn;
for (uint64_t i = s->start; i <= s->end; i++) {
FstBuilderNode *n = malloc(sizeof(FstBuilderNode));
n->isFinal = false;
n->finalOutput = 0;
n->trans = NULL;
FstLastTransition *trn = malloc(sizeof(FstLastTransition));
trn->inp = s->data[i];
trn->out = out;
FstBuilderNodeUnfinished un = {.node = n, .last = trn};
taosArrayPush(nodes->stack, &un);
fstUnFinishedNodesPushEmpty(nodes, true);
uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs) {
FstSlice *s = &bs;
size_t lsz = (size_t)(s->end - s->start + 1); // data len
size_t ssz = taosArrayGetSize(node->stack); // stack size
uint64_t count = 0;
for (size_t i = 0; i < ssz && i < lsz; i++) {
FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
if (un->last->inp == s->data[s->start + i]) {
} else {
return count;
uint64_t FstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out) {
FstSlice *s = &bs;
size_t lsz = (size_t)(s->end - s->start + 1); // data len
size_t ssz = taosArrayGetSize(node->stack); // stack size
uint64_t res = 0;
for (size_t i = 0; i < lsz && i < ssz; i++) {
FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
FstLastTransition *last = un->last;
if (last->inp == s->data[s->start + i]) {
uint64_t commPrefix = last->out;
uint64_t addPrefix = last->out - commPrefix;
out = out - commPrefix;
last->out = commPrefix;
if (addPrefix != 0) {
fstBuilderNodeUnfinishedAddOutputPrefix(un, addPrefix);
} else {
return res;
// fst node function
FstNode *fstNodeCreate(int64_t version, ComiledAddr addr, uint8_t *data) {
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) {
FstNode *n = (FstNode *)malloc(sizeof(FstNode));
if (n == NULL) { return NULL; }
if (addr == EMPTY_ADDRESS) {
n->date = NULL;
n->data = fstSliceCreate(NULL, 0);
n->version = version;
n->state = EmptyFinal;
n->start = EMPTY_ADDRESS;
......@@ -29,20 +159,138 @@ FstNode *fstNodeCreate(int64_t version, ComiledAddr addr, uint8_t *data) {
n->isFinal = true;
n->nTrans = 0;
n->sizes = 0;
n->finalOutpu = 0;
return n;
n->finalOutput = 0;
uint8_t v = slice->data[addr];
uint8_t s = (v & 0b11000000) >> 6;
if (s == 0b11) { // oneTransNext
n->data = fstSliceCopy(slice, 0, addr);
n->version = version;
n->state = OneTransNext;
n->start = addr;
n->end = addr; //? s.end_addr(data);
n->isFinal = false;
n->sizes = 0;
n->nTrans = 0;
n->finalOutput = 0;
} else if (v == 0b10) { // oneTrans
uint64_t sz; // fetch sz from addr
n->data = fstSliceCopy(slice, 0, addr);
n->version = version;
n->state = OneTrans;
n->start = addr;
n->end = addr; // s.end_addr(data, sz);
n->isFinal = false;
n->nTrans = 1;
n->sizes = sz;
n->finalOutput = 0;
} else { // anyTrans
uint64_t sz; // s.sizes(data)
uint32_t nTrans; // s.ntrans(data)
n->data = *slice;
n->version = version;
n->state = AnyTrans;
n->start = addr;
n->end = addr; // s.end_addr(version, data, sz, ntrans);
n->isFinal = false; // s.is_final_state();
n->nTrans = nTrans;
n->sizes = sz;
n->finalOutput = 0; // s.final_output(version, data, sz, ntrans);
uint8_t v = (data[addr] & 0b1100000) >> 6;
if (v == 0b11) {
} else if (v == 0b10) {
return n;
FstTransitions* fstNodeTransitions(FstNode *node) {
FstTransitions *t = malloc(sizeof(FstTransitions));
if (NULL == t) {
return NULL;
FstRange range = {.start = 0, .end = FST_NODE_LEN(node)};
t->node = node;
t->range = range;
return t;
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res) {
bool s = true;
if (node->state == OneTransNext) {
} else if (node->state == OneTrans) {
} else {
} else if (node->state == AnyTrans) {
} else {
s = false;
return s;
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) {
bool s = true;
if (node->state == OneTransNext) {
} else if (node->state == OneTrans) {
} else if (node->state == AnyTrans) {
} else if (node->state == EmptyFinal){
s = false;
return s;
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) {
bool s = true;
uint8_t input; // s.input
if (node->state == OneTransNext) {
if (b == input) { *res = 0; }
else { return s ; }
} else if (node->state == OneTrans) {
if (b == input) { *res = 0; }
else {return s;}
} else if (node->state == AnyTrans) {
} else if (node->state == EmptyFinal) {
s = false;
return s;
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode) {
size_t sz = taosArrayGetSize(builderNode->trans);
assert(sz < 256);
if (sz == 0 && builderNode->isFinal && builderNode->finalOutput == 0) {
return true;
} else if (sz != 1 || builderNode->isFinal) {
// AnyTrans->Compile(w, addr, node);
} else {
FstTransition *tran = taosArrayGet(builderNode->trans, 0);
if (tran->addr == lastAddr && tran->out == 0) {
//OneTransNext::compile(w, lastAddr, tran->inp);
return true;
} else {
//OneTrans::Compile(w, lastAddr, *tran);
return true;
return true;
FstBuilder *fstBuilderCreate(void *w, FstType ty) {
FstBuilder *b = malloc(sizeof(FstBuilder));
if (NULL == b) { return b; }
FstCountingWriter wtr = {.wtr = w, .count = 0, .summer = 0};
b->wtr = wtr;
b->unfinished = malloc(sizeof(FstUnFinishedNodes));
return b;
FstSlice fstNodeAsSlice(FstNode *node) {
FstSlice *slice = &node->data;
FstSlice s = fstSliceCopy(slice, slice->end, slice->dLen - 1);
return s;
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
......@@ -12,6 +12,8 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "tutil.h"
const uint8_t COMMON_INPUTS[] = {
84, // '\x00'
85, // '\x01'
......@@ -271,7 +273,7 @@ const uint8_t COMMON_INPUTS[] = {
255, // 'ÿ'
char const COMMON_INPUTS_INV[] = [
char const COMMON_INPUTS_INV[] = {
't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w',
'.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=',
':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6',
......@@ -300,5 +302,5 @@ char const COMMON_INPUTS_INV[] = [
'\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', '\xf0',
'\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', '\xf8',
'\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "index_fst_registry.h"
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "index_fst_util.h"
//A sentinel value used to indicate an empty final state
const CompiledAddr EMPTY_ADDRESS = 0;
/// A sentinel value used to indicate an invalid state.
const CompiledAddr NONE_ADDRESS = 1;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
const uint64_t version = 3;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
const uint64_t TRANS_INDEX_THRESHOLD = 32;
//uint8_t commonInput(uint8_t idx) {
// if (idx == 0) { return -1; }
// else {
// return COMMON_INPUTS_INV[idx - 1];
// }
//uint8_t commonIdx(uint8_t v, uint8_t max) {
// uint8_t v = ((uint16_t)tCOMMON_INPUTS[v] + 1)%256;
// return v > max ? 0: v;
uint8_t packSize(uint64_t n) {
if (n < (1u << 8)) {
return 1;
} else if (n < (1u << 16)) {
return 2;
} else if (n < (1u << 24)) {
return 3;
} else if (n < ((uint64_t)(1) << 32)) {
return 4;
} else if (n < ((uint64_t)(1) << 40)) {
return 5;
} else if (n < ((uint64_t)(1) << 48)) {
return 6;
} else if (n < ((uint64_t)(1) << 56)) {
return 7;
} else {
return 8;
uint64_t unpackUint64(uint8_t *ch, uint8_t sz) {
uint64_t n;
for (uint8_t i = 0; i < sz; i++) {
n = n | (ch[i] << (8 * i));
return n;
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) {
if (transAddr == EMPTY_ADDRESS) {
return packSize(EMPTY_ADDRESS);
} else {
return packSize(nodeAddr - transAddr);
CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr) {
uint64_t delta = unpackUint64(data, len);
// delta_add = u64_to_usize
if (delta == EMPTY_ADDRESS) {
} else {
return nodeAddr - delta;
// fst slice func
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) {
FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1};
return slice;
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end) {
FstSlice t;
if (start >= slice->dLen || end >= slice->dLen || start > end) {
t.data = NULL;
return t;
t.data = slice->data;
t.dLen = slice->dLen;
t.start = start;
t.end = end;
return t;
bool fstSliceEmpty(FstSlice *slice) {
return slice->data == NULL || slice->dLen <= 0;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册