From 169f6b3ad8c148aa576585d79c3428da6da822dc Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 23 Nov 2021 23:53:45 +0800 Subject: [PATCH] update fst build struct --- source/libs/index/inc/index_fst.h | 50 ++++++++-- source/libs/index/inc/index_fst_common.h | 7 ++ source/libs/index/inc/index_fst_util.h | 6 +- source/libs/index/src/index_fst.c | 111 +++++++++++++++++++++++ source/libs/index/src/index_fst_common.c | 3 +- source/libs/index/src/index_fst_util.c | 1 + 6 files changed, 166 insertions(+), 12 deletions(-) create mode 100644 source/libs/index/inc/index_fst_common.h diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 40e79b716e..97f3232867 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -99,22 +99,54 @@ FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr); FstState fstStateCreate(State state); //compile -void fstStateCompileForOneTransNext(FstState state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp); -void fstStateCompileForOneTrans(FstState state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn); -void fstStateCompileForAnyTrans(FstState state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); +void fstStateCompileForOneTransNext(FstState* state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp); +void fstStateCompileForOneTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn); +void fstStateCompileForAnyTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); // set_comm_input -void fstStateSetCommInputForOneTransNext(FstState state, uint8_t inp); -void fstStateSetCommInputForOneTrans(FstState state, uint8_t inp); +void fstStateSetCommInput(FstState* state, uint8_t inp); // comm_input -uint8_t fstStateCommInputForOneTransNext(FstState state); -uint8_t fstStateCommInputForOneTrans(FstState state); +uint8_t fstStateCommInput(FstState* state); // input_len -uint64_t fstStateInputLenForOneTransNext(FstState state); -uint64_t fstStateInputLenForOneTrans(FstState state); +uint64_t fstStateInputLen(FstState* state); + + +// end_addr +uint64_t fstStateEndAddrForOneTransNext(FstState* state, FstSlice *data); +uint64_t fstStateEndAddrForOneTrans(FstState *state, FstSlice *data, PackSizes sizes); +uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); +// input +uint8_t fstStateInput(FstState *state, FstNode *node); +uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i); + +// trans_addr +CompiledAddr fstStateTransAddr(FstState *state, FstNode *node); +CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i); + +// sizes +PackSizes fstStateSizes(FstState *state, FstSlice *data); +// Output +Output fstStateOutput(FstState *state, FstNode *node); +Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i); + +// anyTrans specify function + +void fstStateSetFinalState(FstState *state, bool yes); +bool fstStateIsFinalState(FstState *state); +void fstStateSetStateNtrans(FstState *state, uint8_t n); +// state_ntrans +void fstStateStateNtrans(FstState *state); +uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans); +uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans); +uint64_t fstStateNtransLen(FstState *state); +uint64_t fstStateNtrans(FstState *state); +Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); +uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b); + + diff --git a/source/libs/index/inc/index_fst_common.h b/source/libs/index/inc/index_fst_common.h new file mode 100644 index 0000000000..b261f4090c --- /dev/null +++ b/source/libs/index/inc/index_fst_common.h @@ -0,0 +1,7 @@ +#ifndef __INDEX_FST_COMM_H__ +#define __INDEX_FST_COMM_H__ + +extern const uint8_t COMMON_INPUTS[]; +extern char const COMMON_INPUTS_INV[]; + +#endif diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index 6490054b91..ad2e5510d7 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -18,7 +18,7 @@ #define __INDEX_FST_UTIL_H__ #include "tarray.h" - +#include "index_fst_common.h" typedef uint64_t FstType; typedef uint64_t CompiledAddr; @@ -44,6 +44,8 @@ extern const uint64_t TRANS_INDEX_THRESHOLD; // // `0` is a legal value which means there are no transitions/outputs + +extern #define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0) #define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4) #define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0) @@ -79,7 +81,7 @@ FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen); bool fstSliceEmpty(FstSlice *slice); int fstSliceCompare(FstSlice *a, FstSlice *b); -#define FST_SLICE_LEN(s) (s->end - s->start + 1) +#define FST_SLICE_LEN(s) ((s)->end - (s)->start + 1) #endif diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 5031c071fa..9198fadbe4 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -185,8 +185,119 @@ static FstState stateDict[] = { FstState fstStateCreate(State state){ uint8_t idx = (uint8_t)state; return stateDict[idx]; +} +//compile +void fstStateCompileForOneTransNext(FstState* state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp) { + return ; +} +void fstStateCompileForOneTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn) { + return ; + +} +void fstStateCompileForAnyTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node) { + return; +} + +// set_comm_input +void fstStateSetCommInput(FstState* s, uint8_t inp) { + assert(s->state == OneTransNext || s->state == OneTrans); + + uint8_t val; + COMMON_INDEX(inp, 0x111111, val); + s->val = (s->val & stateDict[s->state].val) | val; +} + +// comm_input +uint8_t fstStateCommInput(FstState* s) { + assert(s->state == OneTransNext || s->state == OneTrans); + uint8_t v = s->val & 0b00111111; + //v = 0 indicate that common_input is None + return v == 0 ? 0 : COMMON_INPUT(v); +} + +// input_len + +uint64_t fstStateInputLen(FstState* s) { + assert(s->state == OneTransNext || s->state == OneTrans); + return fstStateCommInput(s) == 0 ? 1 : 0; +} + +// end_addr +uint64_t fstStateEndAddrForOneTransNext(FstState* s, FstSlice *data) { + return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s); +} +uint64_t fstStateEndAddrForOneTrans(FstState *s, FstSlice *data, PackSizes sizes) { + return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s) - 1 - FST_GET_TRANSITION_PACK_SIZE(sizes) - FST_GET_OUTPUT_PACK_SIZE(sizes); +} +uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) { + return 1; +} +// input +uint8_t fstStateInput(FstState *state, FstNode *node) { + return 1; } +uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i) { + return 1; +} + +// trans_addr +CompiledAddr fstStateTransAddr(FstState *state, FstNode *node) { + return 1; +} +CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i) { + return 1; +} + +// sizes +PackSizes fstStateSizes(FstState *state, FstSlice *data) { + return 1; +} +// Output +Output fstStateOutput(FstState *state, FstNode *node) { + return 1; + +} +Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i) { + return 1; +} + +// anyTrans specify function + +void fstStateSetFinalState(FstState *state, bool yes) { + return; +} +bool fstStateIsFinalState(FstState *state) { + return false; +} +void fstStateSetStateNtrans(FstState *state, uint8_t n) { + return; +} +// state_ntrans +void fstStateStateNtrans(FstState *state) { + return ; +} +uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans) { + return 1; +} +uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans) { + return 1; +} +uint64_t fstStateNtransLen(FstState *state) { + return 1; +} +uint64_t fstStateNtrans(FstState *state) { + return 1; +} +Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) { + return 1; + +} +uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b) { + return 1; + +} + // fst node function diff --git a/source/libs/index/src/index_fst_common.c b/source/libs/index/src/index_fst_common.c index 4ab78cddc5..97fb88d60e 100644 --- a/source/libs/index/src/index_fst_common.c +++ b/source/libs/index/src/index_fst_common.c @@ -14,6 +14,7 @@ */ #include "tutil.h" + const uint8_t COMMON_INPUTS[] = { 84, // '\x00' 85, // '\x01' @@ -273,7 +274,7 @@ const uint8_t COMMON_INPUTS[] = { 255, // 'ΓΏ' }; -char const COMMON_INPUTS_INV[] = { +const char COMMON_INPUTS_INV[] = { 't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w', '.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=', ':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6', diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c index 8e0a104b5f..c4499f8e0d 100644 --- a/source/libs/index/src/index_fst_util.c +++ b/source/libs/index/src/index_fst_util.c @@ -13,6 +13,7 @@ * along with this program. If not, see . */ #include "index_fst_util.h" +#include "index_fst_common.h" -- GitLab