Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
60e339b3
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1187
Star
22018
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
60e339b3
编写于
11月 20, 2021
作者:
dengyihao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fst core struct
上级
980ace09
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
681 addition
and
84 deletion
+681
-84
source/libs/index/inc/index_fst.h
source/libs/index/inc/index_fst.h
+86
-70
source/libs/index/inc/index_fst_automation.h
source/libs/index/inc/index_fst_automation.h
+42
-0
source/libs/index/inc/index_fst_node.h
source/libs/index/inc/index_fst_node.h
+22
-0
source/libs/index/inc/index_fst_registry.h
source/libs/index/inc/index_fst_registry.h
+24
-0
source/libs/index/inc/index_fst_util.h
source/libs/index/inc/index_fst_util.h
+82
-0
source/libs/index/src/index_fst.c
source/libs/index/src/index_fst.c
+260
-12
source/libs/index/src/index_fst_automation.c
source/libs/index/src/index_fst_automation.c
+14
-0
source/libs/index/src/index_fst_common.c
source/libs/index/src/index_fst_common.c
+4
-2
source/libs/index/src/index_fst_node.c
source/libs/index/src/index_fst_node.c
+15
-0
source/libs/index/src/index_fst_registry.c
source/libs/index/src/index_fst_registry.c
+17
-0
source/libs/index/src/index_fst_util.c
source/libs/index/src/index_fst_util.c
+115
-0
未找到文件。
source/libs/index/inc/index_fst.h
浏览文件 @
60e339b3
...
...
@@ -13,58 +13,73 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _INDEX_FST_H_
#define _INDEX_FST_H_
#include "index_fst.h"
#include "tarray.h"
#ifndef __INDEX_FST_H__
#define __INDEX_FST_H__
typedef
FstType
uint64_t
;
typedef
CompiledAddr
uint64_t
;
typedef
Output
uint64_t
;
typedef
PackSizes
uint8_t
;
#include "tarray.h"
#include "index_fst_util.h"
#include "index_fst_registry.h"
//A sentinel value used to indicate an empty final state
const
CompileAddr
EMPTY_ADDRESS
=
0
;
/// A sentinel value used to indicate an invalid state.
const
CompileAddr
NONE_ADDRESS
=
1
;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
const
uint64_t
version
=
3
;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
typedef
struct
FstNode
FstNode
;
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
const
uint64_t
TRANS_INDEX_THRESHOLD
=
32
;
typedef
struct
FstRange
{
uint64_t
start
;
uint64_t
end
;
}
FstRange
;
enum
State
{
OneTransNext
,
OneTrans
,
AnyTrans
,
EmptyFinal
};
enum
FstBound
{
Included
,
Excluded
,
Unbounded
};
typedef
struct
CheckSummer
{
uint32_t
sum
;
};
typedef
struct
FstBuilderNode
{
bool
isFinal
;
Output
finalOutput
;
SArray
*
trans
;
// <FstTransition>
}
FstBuilderNode
;
typedef
enum
{
OneTransNext
,
OneTrans
,
AnyTrans
,
EmptyFinal
}
State
;
typedef
enum
{
Included
,
Excluded
,
Unbounded
}
FstBound
;
typedef
uint32_t
CheckSummer
;
typedef
struct
FstBuilder
{
FstCountingWriter
wtr
;
// The FST raw data is written directly to `wtr`.
FstUnFinishedNodes
unfinished
// The stack of unfinished nodes
Registry
registry
// A map of finished nodes.
SArray
*
last
// The last word added
CompiledAddr
lastAddr
// The address of the last compiled node
uint64_t
len
// num of keys added
}
FstBuilder
;
/*
*
* UnFinished node and helper function
* TODO: simple function name
*/
typedef
struct
FstUnFinishedNodes
{
SArray
*
stack
;
// <FstBuilderNodeUnfinished> } FstUnFinishedNodes;
}
FstUnFinishedNodes
;
#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
FstUnFinishedNodes
*
FstUnFinishedNodesCreate
();
void
fstUnFinishedNodesPushEmpty
(
FstUnFinishedNodes
*
nodes
,
bool
isFinal
);
FstBuilderNode
*
fstUnFinishedNodesPopRoot
(
FstUnFinishedNodes
*
nodes
);
FstBuilderNode
*
fstUnFinishedNodesPopFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
);
FstBuilderNode
*
fstUnFinishedNodesPopEmpty
(
FstUnFinishedNodes
*
nodes
);
void
fstUnFinishedNodesSetRootOutput
(
FstUnFinishedNodes
*
node
,
Output
out
);
void
fstUnFinishedNodesTopLastFreeze
(
FstUnFinishedNodes
*
node
,
CompiledAddr
addr
);
void
fstUnFinishedNodesAddSuffix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
out
);
uint64_t
fstUnFinishedNodesFindCommPrefix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
);
uint64_t
FstUnFinishedNodesFindCommPreifxAndSetOutput
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
in
,
Output
*
out
);
typedef
struct
FstCountingWriter
{
void
*
wtr
;
// wrap any writer that counts and checksum bytes written
uint64_t
count
;
CheckSummer
summer
;
};
}
FstCountingWriter
;
typedef
struct
FstBuilder
{
FstCountingWriter
wtr
;
// The FST raw data is written directly to `wtr`.
FstUnFinishedNodes
*
unfinished
;
// The stack of unfinished nodes
FstRegistry
registry
;
// A map of finished nodes.
SArray
*
last
;
// The last word added
CompiledAddr
lastAddr
;
// The address of the last compiled node
uint64_t
len
;
// num of keys added
}
FstBuilder
;
...
...
@@ -80,16 +95,6 @@ typedef struct FstTransitions {
FstRange
range
;
}
FstTransitions
;
typedef
struct
FstUnFinishedNodes
{
SArray
*
stack
;
// <FstBuilderNodeUnfinished>
}
FstUnFinishedNodes
;
typedef
struct
FstBuilderNode
{
bool
isFinal
;
Output
finalOutput
;
SArray
*
trans
;
// <FstTransition>
}
FstBuilderNode
;
typedef
struct
FstLastTransition
{
...
...
@@ -97,13 +102,23 @@ typedef struct FstLastTransition {
Output
out
;
}
FstLastTransition
;
/*
* FstBuilderNodeUnfinished and helper function
* TODO: simple function name
*/
typedef
struct
FstBuilderNodeUnfinished
{
FstBuilderNode
node
;
FstLastTransition
last
;
FstBuilderNode
*
node
;
FstLastTransition
*
last
;
}
FstBuilderNodeUnfinished
;
void
fstBuilderNodeUnfinishedLastCompiled
(
FstBuilderNodeUnfinished
*
node
,
CompiledAddr
addr
);
void
fstBuilderNodeUnfinishedAddOutputPrefix
(
FstBuilderNodeUnfinished
*
node
,
CompiledAddr
addr
);
/*
* FstNode and helper function
*/
typedef
struct
FstNode
{
uint8_t
*
data
;
FstSlice
data
;
uint64_t
version
;
State
state
;
CompiledAddr
start
;
...
...
@@ -114,6 +129,28 @@ typedef struct FstNode {
Output
finalOutput
;
}
FstNode
;
// If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
#define FST_NODE_IS_FINAL(node) node->isFinal
// Returns the number of transitions in this node, The maximum number of transitions is 256.
#define FST_NODE_LEN(node) node->nTrans
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start
FstNode
*
fstNodeCreate
(
int64_t
version
,
CompiledAddr
addr
,
FstSlice
*
data
);
FstTransitions
fstNodeTransitionIter
(
FstNode
*
node
);
FstTransitions
*
fstNodeTransitions
(
FstNode
*
node
);
bool
fstNodeGetTransitionAt
(
FstNode
*
node
,
uint64_t
i
,
FstTransition
*
res
);
bool
fstNodeGetTransitionAddrAt
(
FstNode
*
node
,
uint64_t
i
,
CompiledAddr
*
res
);
bool
fstNodeFindInput
(
FstNode
*
node
,
uint8_t
b
,
uint64_t
*
res
);
bool
fstNodeCompile
(
FstNode
*
node
,
void
*
w
,
CompiledAddr
lastAddr
,
CompiledAddr
addr
,
FstBuilderNode
*
builderNode
);
FstSlice
fstNodeAsSlice
(
FstNode
*
node
);
typedef
struct
FstMeta
{
uint64_t
version
;
CompiledAddr
rootAddr
;
...
...
@@ -125,42 +162,21 @@ typedef struct FstMeta {
typedef
struct
Fst
{
FstMeta
meta
;
void
*
data
;
//
};
}
Fst
;
// ops
// ops
typedef
struct
FstIndexedValue
{
uint64_t
index
;
uint64_t
value
;
};
// relate to Regist
typedef
struct
FstRegistry
{
SArray
*
table
;
// <Registtry cell>
uint64_t
tableSize
;
// num of rows
uint64_t
mruSize
;
// num of columns
}
FstRegistry
;
}
FstIndexedValue
;
typedef
struct
FstRegistryCache
{
SArray
*
cells
;
// <RegistryCell>
}
FstRegistryCache
;
typedef
struct
FstRegistryCell
{
CompiledAddr
addr
;
FstBuilderNode
*
node
;
}
FstRegistryCell
;
enum
FstRegistryEntry
{
Found
,
NotFound
,
Rejected
};
FstNode
*
fstNodeCreate
(
int64_t
version
,
CompiledAddr
addr
,
uint8_t
*
data
);
FstTransitions
fstNodeTransitionIter
(
FstNode
*
node
);
FstTransition
fstNodeGetTransitionAt
(
FstNode
*
node
,
uint64_t
i
);
CompiledAddr
fstNodeGetTransitionAddr
(
FstNode
*
node
,
uint64_t
i
);
int64_t
fstNodeFindInput
(
FstNode
*
node
,
int8_t
b
);
Output
fstNodeGetFinalOutput
(
FstNode
*
node
);
void
*
fstNodeCompile
(
FstNode
*
node
,
void
*
w
,
CompiledAddr
lastAddr
,
CompiledArr
addr
,
FstBuilderNode
*
builderNode
);
#endif
source/libs/index/inc/index_fst_automation.h
0 → 100644
浏览文件 @
60e339b3
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_AUTAOMATION_H__
#define __INDEX_FST_AUTAOMATION_H__
struct
AutomationCtx
;
typedef
struct
StartWith
{
AutomationCtx
*
autoSelf
;
}
StartWith
;
typedef
struct
Complement
{
AutomationCtx
*
autoSelf
;
}
Complement
;
// automation
typedef
struct
AutomationCtx
{
void
*
data
;
}
AutomationCtx
;
// automation interface
void
(
*
start
)(
AutomationCtx
*
ctx
);
bool
(
*
isMatch
)(
AutomationCtx
*
ctx
);
bool
(
*
canMatch
)(
AutomationCtx
*
ctx
,
void
*
data
);
bool
(
*
willAlwaysMatch
)(
AutomationCtx
*
ctx
,
void
*
state
);
void
*
(
*
accpet
)(
AutomationCtx
*
ctx
,
void
*
state
,
uint8_t
byte
);
void
*
(
*
accpetEof
)(
AutomationCtx
*
ctx
,
*
state
);
#endif
source/libs/index/inc/index_fst_node.h
0 → 100644
浏览文件 @
60e339b3
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_NODE_H__
#define __INDEX_FST_NODE_H__
#endif
source/libs/index/inc/index_fst_registry.h
0 → 100644
浏览文件 @
60e339b3
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __FST_REGISTRY_H__
#define __FST_REGISTRY_H__
#include "index_fst_util.h"
typedef
struct
FstRegistry
{
}
FstRegistry
;
#endif
source/libs/index/inc/index_fst_util.h
0 → 100644
浏览文件 @
60e339b3
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_UTIL_H__
#define __INDEX_FST_UTIL_H__
#include "tarray.h"
typedef
uint64_t
FstType
;
typedef
uint64_t
CompiledAddr
;
typedef
uint64_t
Output
;
typedef
uint8_t
PackSizes
;
//A sentinel value used to indicate an empty final state
extern
const
CompiledAddr
EMPTY_ADDRESS
;
/// A sentinel value used to indicate an invalid state.
extern
const
CompiledAddr
NONE_ADDRESS
;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
extern
const
uint64_t
version
;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
extern
const
uint64_t
TRANS_INDEX_THRESHOLD
;
// high 4 bits is transition address packed size.
// low 4 bits is output value packed size.
//
// `0` is a legal value which means there are no transitions/outputs
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
#define COMMON_INDEX(v, max, val) do { \
val = ((uint16_t)COMMON_INPUTS[v] + 1)%256; \
val = val > max ? 0: val; \
} while(0)
//uint8_t commonInput(uint8_t idx);
//uint8_t commonIdx(uint8_t v, uint8_t max);
uint8_t
packSize
(
uint64_t
n
);
uint64_t
unpackUint64
(
uint8_t
*
ch
,
uint8_t
sz
);
uint8_t
packDeltaSize
(
CompiledAddr
nodeAddr
,
CompiledAddr
transAddr
);
CompiledAddr
unpackDelta
(
char
*
data
,
uint64_t
len
,
uint64_t
nodeAddr
);
typedef
struct
FstSlice
{
uint8_t
*
data
;
uint64_t
dLen
;
uint32_t
start
;
uint32_t
end
;
}
FstSlice
;
FstSlice
fstSliceCopy
(
FstSlice
*
slice
,
uint32_t
start
,
uint32_t
end
);
FstSlice
fstSliceCreate
(
uint8_t
*
data
,
uint64_t
dLen
);
bool
fstSliceEmpty
(
FstSlice
*
slice
);
#endif
source/libs/index/src/index_fst.c
浏览文件 @
60e339b3
...
...
@@ -15,13 +15,143 @@
#include "index_fst.h"
FstUnFinishedNodes
*
fstUnFinishedNodesCreate
()
{
FstUnFinishedNodes
*
nodes
=
malloc
(
sizeof
(
FstUnFinishedNodes
));
if
(
nodes
==
NULL
)
{
return
NULL
;
}
nodes
->
stack
=
(
SArray
*
)
taosArrayInit
(
64
,
sizeof
(
FstBuilderNodeUnfinished
));
fstUnFinishedNodesPushEmpty
(
nodes
,
false
);
return
nodes
;
}
void
fstUnFinishedNodesPushEmpty
(
FstUnFinishedNodes
*
nodes
,
bool
isFinal
)
{
FstBuilderNode
*
node
=
malloc
(
sizeof
(
FstBuilderNode
));
node
->
isFinal
=
isFinal
;
node
->
finalOutput
=
0
;
node
->
trans
=
NULL
;
FstBuilderNodeUnfinished
un
=
{.
node
=
node
,
.
last
=
NULL
};
taosArrayPush
(
nodes
->
stack
,
&
un
);
}
FstBuilderNode
*
fstUnFinishedNodesPopRoot
(
FstUnFinishedNodes
*
nodes
)
{
assert
(
taosArrayGetSize
(
nodes
->
stack
)
==
1
);
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
assert
(
un
->
last
==
NULL
);
return
un
->
node
;
}
FstBuilderNode
*
fstUnFinishedNodesPopFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
fstBuilderNodeUnfinishedLastCompiled
(
un
,
addr
);
free
(
un
->
last
);
// TODO add func FstLastTransitionFree()
return
un
->
node
;
}
FstBuilderNode
*
fstUnFinishedNodesPopEmpty
(
FstUnFinishedNodes
*
nodes
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
assert
(
un
->
last
==
NULL
);
return
un
->
node
;
}
void
fstUnFinishedNodesSetRootOutput
(
FstUnFinishedNodes
*
nodes
,
Output
out
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
0
);
un
->
node
->
isFinal
=
true
;
un
->
node
->
finalOutput
=
out
;
//un->node->trans = NULL;
}
void
fstUnFinishedNodesTopLastFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
)
{
size_t
sz
=
taosArrayGetSize
(
nodes
->
stack
)
-
1
;
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
sz
);
fstBuilderNodeUnfinishedLastCompiled
(
un
,
addr
);
}
void
fstUnFinishedNodesAddSuffix
(
FstUnFinishedNodes
*
nodes
,
FstSlice
bs
,
Output
out
)
{
FstSlice
*
s
=
&
bs
;
if
(
s
->
data
==
NULL
||
s
->
dLen
==
0
||
s
->
start
>
s
->
end
)
{
return
;
}
size_t
sz
=
taosArrayGetSize
(
nodes
->
stack
)
-
1
;
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
sz
);
assert
(
un
->
last
==
NULL
);
FstLastTransition
*
trn
=
malloc
(
sizeof
(
FstLastTransition
));
trn
->
inp
=
s
->
data
[
s
->
start
];
trn
->
out
=
out
;
un
->
last
=
trn
;
for
(
uint64_t
i
=
s
->
start
;
i
<=
s
->
end
;
i
++
)
{
FstBuilderNode
*
n
=
malloc
(
sizeof
(
FstBuilderNode
));
n
->
isFinal
=
false
;
n
->
finalOutput
=
0
;
n
->
trans
=
NULL
;
FstLastTransition
*
trn
=
malloc
(
sizeof
(
FstLastTransition
));
trn
->
inp
=
s
->
data
[
i
];
trn
->
out
=
out
;
FstBuilderNodeUnfinished
un
=
{.
node
=
n
,
.
last
=
trn
};
taosArrayPush
(
nodes
->
stack
,
&
un
);
}
fstUnFinishedNodesPushEmpty
(
nodes
,
true
);
}
uint64_t
fstUnFinishedNodesFindCommPrefix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
)
{
FstSlice
*
s
=
&
bs
;
size_t
lsz
=
(
size_t
)(
s
->
end
-
s
->
start
+
1
);
// data len
size_t
ssz
=
taosArrayGetSize
(
node
->
stack
);
// stack size
uint64_t
count
=
0
;
for
(
size_t
i
=
0
;
i
<
ssz
&&
i
<
lsz
;
i
++
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
node
->
stack
,
i
);
if
(
un
->
last
->
inp
==
s
->
data
[
s
->
start
+
i
])
{
count
++
;
}
else
{
break
;
}
}
return
count
;
}
uint64_t
FstUnFinishedNodesFindCommPrefixAndSetOutput
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
in
,
Output
*
out
)
{
FstSlice
*
s
=
&
bs
;
size_t
lsz
=
(
size_t
)(
s
->
end
-
s
->
start
+
1
);
// data len
size_t
ssz
=
taosArrayGetSize
(
node
->
stack
);
// stack size
uint64_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
lsz
&&
i
<
ssz
;
i
++
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
node
->
stack
,
i
);
FstLastTransition
*
last
=
un
->
last
;
if
(
last
->
inp
==
s
->
data
[
s
->
start
+
i
])
{
uint64_t
commPrefix
=
last
->
out
;
uint64_t
addPrefix
=
last
->
out
-
commPrefix
;
out
=
out
-
commPrefix
;
last
->
out
=
commPrefix
;
if
(
addPrefix
!=
0
)
{
fstBuilderNodeUnfinishedAddOutputPrefix
(
un
,
addPrefix
);
}
}
else
{
break
;
}
}
return
res
;
}
// fst node function
FstNode
*
fstNodeCreate
(
int64_t
version
,
ComiledAddr
addr
,
uint8_t
*
data
)
{
FstNode
*
fstNodeCreate
(
int64_t
version
,
CompiledAddr
addr
,
FstSlice
*
slice
)
{
FstNode
*
n
=
(
FstNode
*
)
malloc
(
sizeof
(
FstNode
));
if
(
n
==
NULL
)
{
return
NULL
;
}
if
(
addr
==
EMPTY_ADDRESS
)
{
n
->
dat
e
=
NULL
;
n
->
dat
a
=
fstSliceCreate
(
NULL
,
0
);
n
->
version
=
version
;
n
->
state
=
EmptyFinal
;
n
->
start
=
EMPTY_ADDRESS
;
...
...
@@ -29,20 +159,138 @@ FstNode *fstNodeCreate(int64_t version, ComiledAddr addr, uint8_t *data) {
n
->
isFinal
=
true
;
n
->
nTrans
=
0
;
n
->
sizes
=
0
;
n
->
finalOutpu
=
0
;
return
n
;
n
->
finalOutput
=
0
;
}
uint8_t
v
=
slice
->
data
[
addr
];
uint8_t
s
=
(
v
&
0
b11000000
)
>>
6
;
if
(
s
==
0
b11
)
{
// oneTransNext
n
->
data
=
fstSliceCopy
(
slice
,
0
,
addr
);
n
->
version
=
version
;
n
->
state
=
OneTransNext
;
n
->
start
=
addr
;
n
->
end
=
addr
;
//? s.end_addr(data);
n
->
isFinal
=
false
;
n
->
sizes
=
0
;
n
->
nTrans
=
0
;
n
->
finalOutput
=
0
;
}
else
if
(
v
==
0
b10
)
{
// oneTrans
uint64_t
sz
;
// fetch sz from addr
n
->
data
=
fstSliceCopy
(
slice
,
0
,
addr
);
n
->
version
=
version
;
n
->
state
=
OneTrans
;
n
->
start
=
addr
;
n
->
end
=
addr
;
// s.end_addr(data, sz);
n
->
isFinal
=
false
;
n
->
nTrans
=
1
;
n
->
sizes
=
sz
;
n
->
finalOutput
=
0
;
}
else
{
// anyTrans
uint64_t
sz
;
// s.sizes(data)
uint32_t
nTrans
;
// s.ntrans(data)
n
->
data
=
*
slice
;
n
->
version
=
version
;
n
->
state
=
AnyTrans
;
n
->
start
=
addr
;
n
->
end
=
addr
;
// s.end_addr(version, data, sz, ntrans);
n
->
isFinal
=
false
;
// s.is_final_state();
n
->
nTrans
=
nTrans
;
n
->
sizes
=
sz
;
n
->
finalOutput
=
0
;
// s.final_output(version, data, sz, ntrans);
}
uint8_t
v
=
(
data
[
addr
]
&
0
b1100000
)
>>
6
;
if
(
v
==
0
b11
)
{
}
else
if
(
v
==
0
b10
)
{
return
n
;
}
FstTransitions
*
fstNodeTransitions
(
FstNode
*
node
)
{
FstTransitions
*
t
=
malloc
(
sizeof
(
FstTransitions
));
if
(
NULL
==
t
)
{
return
NULL
;
}
FstRange
range
=
{.
start
=
0
,
.
end
=
FST_NODE_LEN
(
node
)};
t
->
node
=
node
;
t
->
range
=
range
;
return
t
;
}
bool
fstNodeGetTransitionAt
(
FstNode
*
node
,
uint64_t
i
,
FstTransition
*
res
)
{
bool
s
=
true
;
if
(
node
->
state
==
OneTransNext
)
{
}
else
if
(
node
->
state
==
OneTrans
)
{
}
else
{
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
}
else
{
s
=
false
;
}
return
s
;
}
bool
fstNodeGetTransitionAddrAt
(
FstNode
*
node
,
uint64_t
i
,
CompiledAddr
*
res
)
{
bool
s
=
true
;
if
(
node
->
state
==
OneTransNext
)
{
}
else
if
(
node
->
state
==
OneTrans
)
{
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
else
if
(
node
->
state
==
EmptyFinal
){
s
=
false
;
}
return
s
;
}
bool
fstNodeFindInput
(
FstNode
*
node
,
uint8_t
b
,
uint64_t
*
res
)
{
bool
s
=
true
;
uint8_t
input
;
// s.input
if
(
node
->
state
==
OneTransNext
)
{
if
(
b
==
input
)
{
*
res
=
0
;
}
else
{
return
s
;
}
}
else
if
(
node
->
state
==
OneTrans
)
{
if
(
b
==
input
)
{
*
res
=
0
;
}
else
{
return
s
;}
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
else
if
(
node
->
state
==
EmptyFinal
)
{
s
=
false
;
}
return
s
;
}
bool
fstNodeCompile
(
FstNode
*
node
,
void
*
w
,
CompiledAddr
lastAddr
,
CompiledAddr
addr
,
FstBuilderNode
*
builderNode
)
{
size_t
sz
=
taosArrayGetSize
(
builderNode
->
trans
);
assert
(
sz
<
256
);
if
(
sz
==
0
&&
builderNode
->
isFinal
&&
builderNode
->
finalOutput
==
0
)
{
return
true
;
}
else
if
(
sz
!=
1
||
builderNode
->
isFinal
)
{
// AnyTrans->Compile(w, addr, node);
}
else
{
FstTransition
*
tran
=
taosArrayGet
(
builderNode
->
trans
,
0
);
if
(
tran
->
addr
==
lastAddr
&&
tran
->
out
==
0
)
{
//OneTransNext::compile(w, lastAddr, tran->inp);
return
true
;
}
else
{
//OneTrans::Compile(w, lastAddr, *tran);
return
true
;
}
}
return
true
;
}
FstBuilder
*
fstBuilderCreate
(
void
*
w
,
FstType
ty
)
{
FstBuilder
*
b
=
malloc
(
sizeof
(
FstBuilder
));
if
(
NULL
==
b
)
{
return
b
;
}
FstCountingWriter
wtr
=
{.
wtr
=
w
,
.
count
=
0
,
.
summer
=
0
};
b
->
wtr
=
wtr
;
b
->
unfinished
=
malloc
(
sizeof
(
FstUnFinishedNodes
));
return
b
;
}
FstSlice
fstNodeAsSlice
(
FstNode
*
node
)
{
FstSlice
*
slice
=
&
node
->
data
;
FstSlice
s
=
fstSliceCopy
(
slice
,
slice
->
end
,
slice
->
dLen
-
1
);
return
s
;
}
source/libs/index/src/index_fst_automation.c
0 → 100644
浏览文件 @
60e339b3
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
source/libs/index/src/index_fst_common.c
浏览文件 @
60e339b3
...
...
@@ -12,6 +12,8 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tutil.h"
const
uint8_t
COMMON_INPUTS
[]
=
{
84
,
// '\x00'
85
,
// '\x01'
...
...
@@ -271,7 +273,7 @@ const uint8_t COMMON_INPUTS[] = {
255
,
// 'ÿ'
};
char
const
COMMON_INPUTS_INV
[]
=
[
char
const
COMMON_INPUTS_INV
[]
=
{
't'
,
'e'
,
'/'
,
'o'
,
'a'
,
's'
,
'r'
,
'i'
,
'p'
,
'c'
,
'n'
,
'w'
,
'.'
,
'h'
,
'l'
,
'm'
,
'-'
,
'd'
,
'u'
,
'0'
,
'1'
,
'2'
,
'g'
,
'='
,
':'
,
'b'
,
'f'
,
'3'
,
'y'
,
'5'
,
'&'
,
'_'
,
'4'
,
'v'
,
'9'
,
'6'
,
...
...
@@ -300,5 +302,5 @@ char const COMMON_INPUTS_INV[] = [
'\xe9'
,
'\xea'
,
'\xeb'
,
'\xec'
,
'\xed'
,
'\xee'
,
'\xef'
,
'\xf0'
,
'\xf1'
,
'\xf2'
,
'\xf3'
,
'\xf4'
,
'\xf5'
,
'\xf6'
,
'\xf7'
,
'\xf8'
,
'\xf9'
,
'\xfa'
,
'\xfb'
,
'\xfc'
,
'\xfd'
,
'\xfe'
,
'\xff'
,
]
;
}
;
source/libs/index/src/index_fst_node.c
0 → 100644
浏览文件 @
60e339b3
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
source/libs/index/src/index_fst_registry.c
0 → 100644
浏览文件 @
60e339b3
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_registry.h"
source/libs/index/src/index_fst_util.c
0 → 100644
浏览文件 @
60e339b3
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_util.h"
//A sentinel value used to indicate an empty final state
const
CompiledAddr
EMPTY_ADDRESS
=
0
;
/// A sentinel value used to indicate an invalid state.
const
CompiledAddr
NONE_ADDRESS
=
1
;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
const
uint64_t
version
=
3
;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
const
uint64_t
TRANS_INDEX_THRESHOLD
=
32
;
//uint8_t commonInput(uint8_t idx) {
// if (idx == 0) { return -1; }
// else {
// return COMMON_INPUTS_INV[idx - 1];
// }
//}
//
//uint8_t commonIdx(uint8_t v, uint8_t max) {
// uint8_t v = ((uint16_t)tCOMMON_INPUTS[v] + 1)%256;
// return v > max ? 0: v;
//}
uint8_t
packSize
(
uint64_t
n
)
{
if
(
n
<
(
1u
<<
8
))
{
return
1
;
}
else
if
(
n
<
(
1u
<<
16
))
{
return
2
;
}
else
if
(
n
<
(
1u
<<
24
))
{
return
3
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
32
))
{
return
4
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
40
))
{
return
5
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
48
))
{
return
6
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
56
))
{
return
7
;
}
else
{
return
8
;
}
}
uint64_t
unpackUint64
(
uint8_t
*
ch
,
uint8_t
sz
)
{
uint64_t
n
;
for
(
uint8_t
i
=
0
;
i
<
sz
;
i
++
)
{
n
=
n
|
(
ch
[
i
]
<<
(
8
*
i
));
}
return
n
;
}
uint8_t
packDeltaSize
(
CompiledAddr
nodeAddr
,
CompiledAddr
transAddr
)
{
if
(
transAddr
==
EMPTY_ADDRESS
)
{
return
packSize
(
EMPTY_ADDRESS
);
}
else
{
return
packSize
(
nodeAddr
-
transAddr
);
}
}
CompiledAddr
unpackDelta
(
char
*
data
,
uint64_t
len
,
uint64_t
nodeAddr
)
{
uint64_t
delta
=
unpackUint64
(
data
,
len
);
// delta_add = u64_to_usize
if
(
delta
==
EMPTY_ADDRESS
)
{
return
EMPTY_ADDRESS
;
}
else
{
return
nodeAddr
-
delta
;
}
}
// fst slice func
FstSlice
fstSliceCreate
(
uint8_t
*
data
,
uint64_t
dLen
)
{
FstSlice
slice
=
{.
data
=
data
,
.
dLen
=
dLen
,
.
start
=
0
,
.
end
=
dLen
-
1
};
return
slice
;
}
FstSlice
fstSliceCopy
(
FstSlice
*
slice
,
uint32_t
start
,
uint32_t
end
)
{
FstSlice
t
;
if
(
start
>=
slice
->
dLen
||
end
>=
slice
->
dLen
||
start
>
end
)
{
t
.
data
=
NULL
;
return
t
;
};
t
.
data
=
slice
->
data
;
t
.
dLen
=
slice
->
dLen
;
t
.
start
=
start
;
t
.
end
=
end
;
return
t
;
}
bool
fstSliceEmpty
(
FstSlice
*
slice
)
{
return
slice
->
data
==
NULL
||
slice
->
dLen
<=
0
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录