未验证 提交 437985c4 编写于 作者: dengyihao's avatar dengyihao 提交者: GitHub

Merge pull request #8802 from taosdata/origin/3.0/fst

Origin/3.0/fst
......@@ -36,6 +36,8 @@ typedef struct FstRange {
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
typedef enum { Included, Excluded, Unbounded} FstBound;
typedef enum {Ordered, OutOfOrdered, DuplicateKey} OrderType;
/*
......@@ -66,17 +68,95 @@ typedef struct FstBuilder {
FstCountingWriter *wrt; // The FST raw data is written directly to `wtr`.
FstUnFinishedNodes *unfinished; // The stack of unfinished nodes
FstRegistry* registry; // A map of finished nodes.
SArray* last; // The last word added
FstSlice last; // The last word added
CompiledAddr lastAddr; // The address of the last compiled node
uint64_t len; // num of keys added
} FstBuilder;
FstBuilder *fstBuilderCreate(void *w, FstType ty);
void fstBuilderDestroy(FstBuilder *b);
void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in);
OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup);
void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate);
CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn);
typedef struct FstTransitions {
FstNode *node;
FstRange range;
} FstTransitions;
//FstState and relation function
typedef struct FstState {
State state;
uint8_t val;
} FstState;
FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr);
FstState fstStateCreate(State state);
//compile
void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp);
void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition *trn);
void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node);
// set_comm_input
void fstStateSetCommInput(FstState* state, uint8_t inp);
// comm_input
uint8_t fstStateCommInput(FstState* state, bool *null);
// input_len
uint64_t fstStateInputLen(FstState* state);
// end_addr
uint64_t fstStateEndAddrForOneTransNext(FstState* state, FstSlice *data);
uint64_t fstStateEndAddrForOneTrans(FstState *state, FstSlice *data, PackSizes sizes);
uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans);
// input
uint8_t fstStateInput(FstState *state, FstNode *node);
uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i);
// trans_addr
CompiledAddr fstStateTransAddr(FstState *state, FstNode *node);
CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i);
// sizes
PackSizes fstStateSizes(FstState *state, FstSlice *data);
// Output
Output fstStateOutput(FstState *state, FstNode *node);
Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i);
// anyTrans specify function
void fstStateSetFinalState(FstState *state, bool yes);
bool fstStateIsFinalState(FstState *state);
void fstStateSetStateNtrans(FstState *state, uint8_t n);
// state_ntrans
uint8_t fstStateStateNtrans(FstState *state, bool *null);
uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans);
uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans);
uint64_t fstStateNtransLen(FstState *state);
uint64_t fstStateNtrans(FstState *state, FstSlice *slice);
Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans);
uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b, bool *null);
#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext)
#define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans)
#define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans)
#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal)
typedef struct FstLastTransition {
......@@ -93,8 +173,10 @@ typedef struct FstBuilderNodeUnfinished {
FstLastTransition* last;
} FstBuilderNodeUnfinished;
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr);
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out);
/*
* FstNode and helper function
......@@ -102,7 +184,7 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Com
typedef struct FstNode {
FstSlice data;
uint64_t version;
State state;
FstState state;
CompiledAddr start;
CompiledAddr end;
bool isFinal;
......@@ -122,6 +204,7 @@ typedef struct FstNode {
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
void fstNodeDestroy(FstNode *fstNode);
......@@ -160,6 +243,4 @@ void fstLastTransitionDestroy(FstLastTransition *trn);
#endif
#ifndef __INDEX_FST_COMM_H__
#define __INDEX_FST_COMM_H__
extern const uint8_t COMMON_INPUTS[];
extern char const COMMON_INPUTS_INV[];
#endif
......@@ -34,6 +34,10 @@ FstCountingWriter *fstCountingWriterCreate(void *wtr);
void fstCountingWriterDestroy(FstCountingWriter *w);
void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes);
uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n);
#define FST_WRITER_COUNT(writer) (writer->count)
#define FST_WRITER_INTER_WRITER(writer) (writer->wtr)
#define FST_WRITE_CHECK_SUMMER(writer) (writer->summer)
......
......@@ -18,7 +18,7 @@
#define __INDEX_FST_UTIL_H__
#include "tarray.h"
#include "index_fst_common.h"
typedef uint64_t FstType;
typedef uint64_t CompiledAddr;
......@@ -44,9 +44,10 @@ extern const uint64_t TRANS_INDEX_THRESHOLD;
//
// `0` is a legal value which means there are no transitions/outputs
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4); } while(0)
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz; } while(0)
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
......@@ -70,13 +71,16 @@ CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr);
typedef struct FstSlice {
uint8_t *data;
uint64_t dLen;
uint32_t start;
uint32_t end;
int32_t start;
int32_t end;
} FstSlice;
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end);
FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end);
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen);
bool fstSliceEmpty(FstSlice *slice);
int fstSliceCompare(FstSlice *a, FstSlice *b);
#define FST_SLICE_LEN(s) ((s)->end - (s)->start + 1)
#endif
此差异已折叠。
......@@ -14,6 +14,7 @@
*/
#include "tutil.h"
const uint8_t COMMON_INPUTS[] = {
84, // '\x00'
85, // '\x01'
......@@ -273,7 +274,7 @@ const uint8_t COMMON_INPUTS[] = {
255, // 'ÿ'
};
char const COMMON_INPUTS_INV[] = {
const char COMMON_INPUTS_INV[] = {
't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w',
'.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=',
':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6',
......
......@@ -13,6 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tutil.h"
#include "index_fst_util.h"
#include "index_fst_counting_writer.h"
FstCountingWriter *fstCountingWriterCreate(void *wrt) {
......@@ -36,10 +37,27 @@ uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t
return bufLen;
}
int FstCountingWriterFlush(FstCountingWriter *write) {
int fstCountingWriterFlush(FstCountingWriter *write) {
//write->wtr->flush
return 1;
}
void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes) {
assert(1 <= nBytes && nBytes <= 8);
uint8_t *buf = calloc(8, sizeof(uint8_t));
for (uint8_t i = 0; i < nBytes; i++) {
buf[i] = (uint8_t)n;
n = n >> 8;
}
fstCountingWriterWrite(writer, buf, nBytes);
free(buf);
return;
}
uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n) {
uint8_t nBytes = packSize(n);
fstCountingWriterPackUintIn(writer, n, nBytes);
return nBytes;
}
......@@ -13,6 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_util.h"
#include "index_fst_common.h"
......@@ -94,7 +95,7 @@ FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) {
FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1};
return slice;
}
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end) {
FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end) {
FstSlice t;
if (start >= slice->dLen || end >= slice->dLen || start > end) {
t.data = NULL;
......@@ -111,5 +112,21 @@ bool fstSliceEmpty(FstSlice *slice) {
return slice->data == NULL || slice->dLen <= 0;
}
int fstSliceCompare(FstSlice *a, FstSlice *b) {
int32_t aLen = (a->end - a->start + 1);
int32_t bLen = (b->end - b->start + 1);
int32_t mLen = (aLen < bLen ? aLen : bLen);
for (int i = 0; i < mLen; i++) {
uint8_t x = a->data[i + a->start];
uint8_t y = b->data[i + b->start];
if (x == y) { continue; }
else if (x < y) { return -1; }
else { return 1; }
}
if (aLen == bLen) { return 0; }
else if (aLen < bLen) { return -1; }
else { return 1; }
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册