提交 735bd1c7 编写于 作者: H Hongze Cheng

Merge branch '3.0' into feature/vnode

...@@ -36,6 +36,8 @@ typedef struct FstRange { ...@@ -36,6 +36,8 @@ typedef struct FstRange {
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State; typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
typedef enum { Included, Excluded, Unbounded} FstBound; typedef enum { Included, Excluded, Unbounded} FstBound;
typedef enum {Ordered, OutOfOrdered, DuplicateKey} OrderType;
/* /*
...@@ -66,17 +68,95 @@ typedef struct FstBuilder { ...@@ -66,17 +68,95 @@ typedef struct FstBuilder {
FstCountingWriter *wrt; // The FST raw data is written directly to `wtr`. FstCountingWriter *wrt; // The FST raw data is written directly to `wtr`.
FstUnFinishedNodes *unfinished; // The stack of unfinished nodes FstUnFinishedNodes *unfinished; // The stack of unfinished nodes
FstRegistry* registry; // A map of finished nodes. FstRegistry* registry; // A map of finished nodes.
SArray* last; // The last word added FstSlice last; // The last word added
CompiledAddr lastAddr; // The address of the last compiled node CompiledAddr lastAddr; // The address of the last compiled node
uint64_t len; // num of keys added uint64_t len; // num of keys added
} FstBuilder; } FstBuilder;
FstBuilder *fstBuilderCreate(void *w, FstType ty);
void fstBuilderDestroy(FstBuilder *b);
void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in);
OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup);
void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate);
CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn);
typedef struct FstTransitions { typedef struct FstTransitions {
FstNode *node; FstNode *node;
FstRange range; FstRange range;
} FstTransitions; } FstTransitions;
//FstState and relation function
typedef struct FstState {
State state;
uint8_t val;
} FstState;
FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr);
FstState fstStateCreate(State state);
//compile
void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp);
void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition *trn);
void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node);
// set_comm_input
void fstStateSetCommInput(FstState* state, uint8_t inp);
// comm_input
uint8_t fstStateCommInput(FstState* state, bool *null);
// input_len
uint64_t fstStateInputLen(FstState* state);
// end_addr
uint64_t fstStateEndAddrForOneTransNext(FstState* state, FstSlice *data);
uint64_t fstStateEndAddrForOneTrans(FstState *state, FstSlice *data, PackSizes sizes);
uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans);
// input
uint8_t fstStateInput(FstState *state, FstNode *node);
uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i);
// trans_addr
CompiledAddr fstStateTransAddr(FstState *state, FstNode *node);
CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i);
// sizes
PackSizes fstStateSizes(FstState *state, FstSlice *data);
// Output
Output fstStateOutput(FstState *state, FstNode *node);
Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i);
// anyTrans specify function
void fstStateSetFinalState(FstState *state, bool yes);
bool fstStateIsFinalState(FstState *state);
void fstStateSetStateNtrans(FstState *state, uint8_t n);
// state_ntrans
uint8_t fstStateStateNtrans(FstState *state, bool *null);
uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans);
uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans);
uint64_t fstStateNtransLen(FstState *state);
uint64_t fstStateNtrans(FstState *state, FstSlice *slice);
Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans);
uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b, bool *null);
#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext)
#define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans)
#define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans)
#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal)
typedef struct FstLastTransition { typedef struct FstLastTransition {
...@@ -93,8 +173,10 @@ typedef struct FstBuilderNodeUnfinished { ...@@ -93,8 +173,10 @@ typedef struct FstBuilderNodeUnfinished {
FstLastTransition* last; FstLastTransition* last;
} FstBuilderNodeUnfinished; } FstBuilderNodeUnfinished;
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr); void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr); void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out);
/* /*
* FstNode and helper function * FstNode and helper function
...@@ -102,7 +184,7 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Com ...@@ -102,7 +184,7 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Com
typedef struct FstNode { typedef struct FstNode {
FstSlice data; FstSlice data;
uint64_t version; uint64_t version;
State state; FstState state;
CompiledAddr start; CompiledAddr start;
CompiledAddr end; CompiledAddr end;
bool isFinal; bool isFinal;
...@@ -122,6 +204,7 @@ typedef struct FstNode { ...@@ -122,6 +204,7 @@ typedef struct FstNode {
// Return the address of this node. // Return the address of this node.
#define FST_NODE_ADDR(node) node->start #define FST_NODE_ADDR(node) node->start
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data); FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
void fstNodeDestroy(FstNode *fstNode); void fstNodeDestroy(FstNode *fstNode);
...@@ -160,6 +243,4 @@ void fstLastTransitionDestroy(FstLastTransition *trn); ...@@ -160,6 +243,4 @@ void fstLastTransitionDestroy(FstLastTransition *trn);
#endif #endif
#ifndef __INDEX_FST_COMM_H__
#define __INDEX_FST_COMM_H__
extern const uint8_t COMMON_INPUTS[];
extern char const COMMON_INPUTS_INV[];
#endif
...@@ -34,6 +34,10 @@ FstCountingWriter *fstCountingWriterCreate(void *wtr); ...@@ -34,6 +34,10 @@ FstCountingWriter *fstCountingWriterCreate(void *wtr);
void fstCountingWriterDestroy(FstCountingWriter *w); void fstCountingWriterDestroy(FstCountingWriter *w);
void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes);
uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n);
#define FST_WRITER_COUNT(writer) (writer->count) #define FST_WRITER_COUNT(writer) (writer->count)
#define FST_WRITER_INTER_WRITER(writer) (writer->wtr) #define FST_WRITER_INTER_WRITER(writer) (writer->wtr)
#define FST_WRITE_CHECK_SUMMER(writer) (writer->summer) #define FST_WRITE_CHECK_SUMMER(writer) (writer->summer)
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#define __INDEX_FST_UTIL_H__ #define __INDEX_FST_UTIL_H__
#include "tarray.h" #include "tarray.h"
#include "index_fst_common.h"
typedef uint64_t FstType; typedef uint64_t FstType;
typedef uint64_t CompiledAddr; typedef uint64_t CompiledAddr;
...@@ -44,9 +44,10 @@ extern const uint64_t TRANS_INDEX_THRESHOLD; ...@@ -44,9 +44,10 @@ extern const uint64_t TRANS_INDEX_THRESHOLD;
// //
// `0` is a legal value which means there are no transitions/outputs // `0` is a legal value which means there are no transitions/outputs
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4); } while(0)
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4) #define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0) #define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz; } while(0)
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111) #define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1] #define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
...@@ -70,13 +71,16 @@ CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr); ...@@ -70,13 +71,16 @@ CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr);
typedef struct FstSlice { typedef struct FstSlice {
uint8_t *data; uint8_t *data;
uint64_t dLen; uint64_t dLen;
uint32_t start; int32_t start;
uint32_t end; int32_t end;
} FstSlice; } FstSlice;
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end); FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end);
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen); FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen);
bool fstSliceEmpty(FstSlice *slice); bool fstSliceEmpty(FstSlice *slice);
int fstSliceCompare(FstSlice *a, FstSlice *b);
#define FST_SLICE_LEN(s) ((s)->end - (s)->start + 1)
#endif #endif
此差异已折叠。
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
*/ */
#include "tutil.h" #include "tutil.h"
const uint8_t COMMON_INPUTS[] = { const uint8_t COMMON_INPUTS[] = {
84, // '\x00' 84, // '\x00'
85, // '\x01' 85, // '\x01'
...@@ -273,7 +274,7 @@ const uint8_t COMMON_INPUTS[] = { ...@@ -273,7 +274,7 @@ const uint8_t COMMON_INPUTS[] = {
255, // 'ÿ' 255, // 'ÿ'
}; };
char const COMMON_INPUTS_INV[] = { const char COMMON_INPUTS_INV[] = {
't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w', 't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w',
'.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=', '.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=',
':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6', ':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6',
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "tutil.h" #include "tutil.h"
#include "index_fst_util.h"
#include "index_fst_counting_writer.h" #include "index_fst_counting_writer.h"
FstCountingWriter *fstCountingWriterCreate(void *wrt) { FstCountingWriter *fstCountingWriterCreate(void *wrt) {
...@@ -36,10 +37,27 @@ uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t ...@@ -36,10 +37,27 @@ uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t
return bufLen; return bufLen;
} }
int FstCountingWriterFlush(FstCountingWriter *write) { int fstCountingWriterFlush(FstCountingWriter *write) {
//write->wtr->flush //write->wtr->flush
return 1; return 1;
} }
void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes) {
assert(1 <= nBytes && nBytes <= 8);
uint8_t *buf = calloc(8, sizeof(uint8_t));
for (uint8_t i = 0; i < nBytes; i++) {
buf[i] = (uint8_t)n;
n = n >> 8;
}
fstCountingWriterWrite(writer, buf, nBytes);
free(buf);
return;
}
uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n) {
uint8_t nBytes = packSize(n);
fstCountingWriterPackUintIn(writer, n, nBytes);
return nBytes;
}
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "index_fst_util.h" #include "index_fst_util.h"
#include "index_fst_common.h"
...@@ -94,7 +95,7 @@ FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) { ...@@ -94,7 +95,7 @@ FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) {
FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1}; FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1};
return slice; return slice;
} }
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end) { FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end) {
FstSlice t; FstSlice t;
if (start >= slice->dLen || end >= slice->dLen || start > end) { if (start >= slice->dLen || end >= slice->dLen || start > end) {
t.data = NULL; t.data = NULL;
...@@ -111,5 +112,21 @@ bool fstSliceEmpty(FstSlice *slice) { ...@@ -111,5 +112,21 @@ bool fstSliceEmpty(FstSlice *slice) {
return slice->data == NULL || slice->dLen <= 0; return slice->data == NULL || slice->dLen <= 0;
} }
int fstSliceCompare(FstSlice *a, FstSlice *b) {
int32_t aLen = (a->end - a->start + 1);
int32_t bLen = (b->end - b->start + 1);
int32_t mLen = (aLen < bLen ? aLen : bLen);
for (int i = 0; i < mLen; i++) {
uint8_t x = a->data[i + a->start];
uint8_t y = b->data[i + b->start];
if (x == y) { continue; }
else if (x < y) { return -1; }
else { return 1; }
}
if (aLen == bLen) { return 0; }
else if (aLen < bLen) { return -1; }
else { return 1; }
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册