Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
980ace09
T
TDengine
项目概览
taosdata
/
TDengine
接近 2 年 前同步成功
通知
1191
Star
22018
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
980ace09
编写于
11月 15, 2021
作者:
dengyihao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add interface
上级
d2485c4c
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
518 addition
and
0 deletion
+518
-0
source/libs/index/inc/index_fst.h
source/libs/index/inc/index_fst.h
+166
-0
source/libs/index/src/index_fst.c
source/libs/index/src/index_fst.c
+48
-0
source/libs/index/src/index_fst_common.c
source/libs/index/src/index_fst_common.c
+304
-0
未找到文件。
source/libs/index/inc/index_fst.h
0 → 100644
浏览文件 @
980ace09
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _INDEX_FST_H_
#define _INDEX_FST_H_
#include "index_fst.h"
#include "tarray.h"
typedef
FstType
uint64_t
;
typedef
CompiledAddr
uint64_t
;
typedef
Output
uint64_t
;
typedef
PackSizes
uint8_t
;
//A sentinel value used to indicate an empty final state
const
CompileAddr
EMPTY_ADDRESS
=
0
;
/// A sentinel value used to indicate an invalid state.
const
CompileAddr
NONE_ADDRESS
=
1
;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
const
uint64_t
version
=
3
;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
const
uint64_t
TRANS_INDEX_THRESHOLD
=
32
;
typedef
struct
FstRange
{
uint64_t
start
;
uint64_t
end
;
}
FstRange
;
enum
State
{
OneTransNext
,
OneTrans
,
AnyTrans
,
EmptyFinal
};
enum
FstBound
{
Included
,
Excluded
,
Unbounded
};
typedef
struct
CheckSummer
{
uint32_t
sum
;
};
typedef
struct
FstBuilder
{
FstCountingWriter
wtr
;
// The FST raw data is written directly to `wtr`.
FstUnFinishedNodes
unfinished
// The stack of unfinished nodes
Registry
registry
// A map of finished nodes.
SArray
*
last
// The last word added
CompiledAddr
lastAddr
// The address of the last compiled node
uint64_t
len
// num of keys added
}
FstBuilder
;
typedef
struct
FstCountingWriter
{
void
*
wtr
;
// wrap any writer that counts and checksum bytes written
uint64_t
count
;
CheckSummer
summer
;
};
typedef
struct
FstTransition
{
uint8_t
inp
;
//The byte input associated with this transition.
Output
out
;
//The output associated with this transition
CompiledAddr
addr
;
//The address of the node that this transition points to
}
FstTransition
;
typedef
struct
FstTransitions
{
FstNode
*
node
;
FstRange
range
;
}
FstTransitions
;
typedef
struct
FstUnFinishedNodes
{
SArray
*
stack
;
// <FstBuilderNodeUnfinished>
}
FstUnFinishedNodes
;
typedef
struct
FstBuilderNode
{
bool
isFinal
;
Output
finalOutput
;
SArray
*
trans
;
// <FstTransition>
}
FstBuilderNode
;
typedef
struct
FstLastTransition
{
uint8_t
inp
;
Output
out
;
}
FstLastTransition
;
typedef
struct
FstBuilderNodeUnfinished
{
FstBuilderNode
node
;
FstLastTransition
last
;
}
FstBuilderNodeUnfinished
;
typedef
struct
FstNode
{
uint8_t
*
data
;
uint64_t
version
;
State
state
;
CompiledAddr
start
;
CompiledAddr
end
;
bool
isFinal
;
uint64_t
nTrans
;
PackSizes
sizes
;
Output
finalOutput
;
}
FstNode
;
typedef
struct
FstMeta
{
uint64_t
version
;
CompiledAddr
rootAddr
;
FstType
ty
;
uint64_t
len
;
uint32_t
checkSum
;
}
FstMeta
;
typedef
struct
Fst
{
FstMeta
meta
;
void
*
data
;
//
};
// ops
typedef
struct
FstIndexedValue
{
uint64_t
index
;
uint64_t
value
;
};
// relate to Regist
typedef
struct
FstRegistry
{
SArray
*
table
;
// <Registtry cell>
uint64_t
tableSize
;
// num of rows
uint64_t
mruSize
;
// num of columns
}
FstRegistry
;
typedef
struct
FstRegistryCache
{
SArray
*
cells
;
// <RegistryCell>
}
FstRegistryCache
;
typedef
struct
FstRegistryCell
{
CompiledAddr
addr
;
FstBuilderNode
*
node
;
}
FstRegistryCell
;
enum
FstRegistryEntry
{
Found
,
NotFound
,
Rejected
};
FstNode
*
fstNodeCreate
(
int64_t
version
,
CompiledAddr
addr
,
uint8_t
*
data
);
FstTransitions
fstNodeTransitionIter
(
FstNode
*
node
);
FstTransition
fstNodeGetTransitionAt
(
FstNode
*
node
,
uint64_t
i
);
CompiledAddr
fstNodeGetTransitionAddr
(
FstNode
*
node
,
uint64_t
i
);
int64_t
fstNodeFindInput
(
FstNode
*
node
,
int8_t
b
);
Output
fstNodeGetFinalOutput
(
FstNode
*
node
);
void
*
fstNodeCompile
(
FstNode
*
node
,
void
*
w
,
CompiledAddr
lastAddr
,
CompiledArr
addr
,
FstBuilderNode
*
builderNode
);
#endif
source/libs/index/src/index_fst.c
0 → 100644
浏览文件 @
980ace09
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst.h"
// fst node function
FstNode
*
fstNodeCreate
(
int64_t
version
,
ComiledAddr
addr
,
uint8_t
*
data
)
{
FstNode
*
n
=
(
FstNode
*
)
malloc
(
sizeof
(
FstNode
));
if
(
n
==
NULL
)
{
return
NULL
;
}
if
(
addr
==
EMPTY_ADDRESS
)
{
n
->
date
=
NULL
;
n
->
version
=
version
;
n
->
state
=
EmptyFinal
;
n
->
start
=
EMPTY_ADDRESS
;
n
->
end
=
EMPTY_ADDRESS
;
n
->
isFinal
=
true
;
n
->
nTrans
=
0
;
n
->
sizes
=
0
;
n
->
finalOutpu
=
0
;
return
n
;
}
uint8_t
v
=
(
data
[
addr
]
&
0
b1100000
)
>>
6
;
if
(
v
==
0
b11
)
{
}
else
if
(
v
==
0
b10
)
{
}
else
{
}
}
source/libs/index/src/index_fst_common.c
0 → 100644
浏览文件 @
980ace09
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
const
uint8_t
COMMON_INPUTS
[]
=
{
84
,
// '\x00'
85
,
// '\x01'
86
,
// '\x02'
87
,
// '\x03'
88
,
// '\x04'
89
,
// '\x05'
90
,
// '\x06'
91
,
// '\x07'
92
,
// '\x08'
93
,
// '\t'
94
,
// '\n'
95
,
// '\x0b'
96
,
// '\x0c'
97
,
// '\r'
98
,
// '\x0e'
99
,
// '\x0f'
100
,
// '\x10'
101
,
// '\x11'
102
,
// '\x12'
103
,
// '\x13'
104
,
// '\x14'
105
,
// '\x15'
106
,
// '\x16'
107
,
// '\x17'
108
,
// '\x18'
109
,
// '\x19'
110
,
// '\x1a'
111
,
// '\x1b'
112
,
// '\x1c'
113
,
// '\x1d'
114
,
// '\x1e'
115
,
// '\x1f'
116
,
// ' '
80
,
// '!'
117
,
// '"'
118
,
// '#'
79
,
// '$'
39
,
// '%'
30
,
// '&'
81
,
// "'"
75
,
// '('
74
,
// ')'
82
,
// '*'
57
,
// '+'
66
,
// ','
16
,
// '-'
12
,
// '.'
2
,
// '/'
19
,
// '0'
20
,
// '1'
21
,
// '2'
27
,
// '3'
32
,
// '4'
29
,
// '5'
35
,
// '6'
36
,
// '7'
37
,
// '8'
34
,
// '9'
24
,
// ':'
73
,
// ';'
119
,
// '<'
23
,
// '='
120
,
// '>'
40
,
// '?'
83
,
// '@'
44
,
// 'A'
48
,
// 'B'
42
,
// 'C'
43
,
// 'D'
49
,
// 'E'
46
,
// 'F'
62
,
// 'G'
61
,
// 'H'
47
,
// 'I'
69
,
// 'J'
68
,
// 'K'
58
,
// 'L'
56
,
// 'M'
55
,
// 'N'
59
,
// 'O'
51
,
// 'P'
72
,
// 'Q'
54
,
// 'R'
45
,
// 'S'
52
,
// 'T'
64
,
// 'U'
65
,
// 'V'
63
,
// 'W'
71
,
// 'X'
67
,
// 'Y'
70
,
// 'Z'
77
,
// '['
121
,
// '\\'
78
,
// ']'
122
,
// '^'
31
,
// '_'
123
,
// '`'
4
,
// 'a'
25
,
// 'b'
9
,
// 'c'
17
,
// 'd'
1
,
// 'e'
26
,
// 'f'
22
,
// 'g'
13
,
// 'h'
7
,
// 'i'
50
,
// 'j'
38
,
// 'k'
14
,
// 'l'
15
,
// 'm'
10
,
// 'n'
3
,
// 'o'
8
,
// 'p'
60
,
// 'q'
6
,
// 'r'
5
,
// 's'
0
,
// 't'
18
,
// 'u'
33
,
// 'v'
11
,
// 'w'
41
,
// 'x'
28
,
// 'y'
53
,
// 'z'
124
,
// '{'
125
,
// '|'
126
,
// '}'
76
,
// '~'
127
,
// '\x7f'
128
,
// '\x80'
129
,
// '\x81'
130
,
// '\x82'
131
,
// '\x83'
132
,
// '\x84'
133
,
// '\x85'
134
,
// '\x86'
135
,
// '\x87'
136
,
// '\x88'
137
,
// '\x89'
138
,
// '\x8a'
139
,
// '\x8b'
140
,
// '\x8c'
141
,
// '\x8d'
142
,
// '\x8e'
143
,
// '\x8f'
144
,
// '\x90'
145
,
// '\x91'
146
,
// '\x92'
147
,
// '\x93'
148
,
// '\x94'
149
,
// '\x95'
150
,
// '\x96'
151
,
// '\x97'
152
,
// '\x98'
153
,
// '\x99'
154
,
// '\x9a'
155
,
// '\x9b'
156
,
// '\x9c'
157
,
// '\x9d'
158
,
// '\x9e'
159
,
// '\x9f'
160
,
// '\xa0'
161
,
// '¡'
162
,
// '¢'
163
,
// '£'
164
,
// '¤'
165
,
// '¥'
166
,
// '¦'
167
,
// '§'
168
,
// '¨'
169
,
// '©'
170
,
// 'ª'
171
,
// '«'
172
,
// '¬'
173
,
// '\xad'
174
,
// '®'
175
,
// '¯'
176
,
// '°'
177
,
// '±'
178
,
// '²'
179
,
// '³'
180
,
// '´'
181
,
// 'µ'
182
,
// '¶'
183
,
// '·'
184
,
// '¸'
185
,
// '¹'
186
,
// 'º'
187
,
// '»'
188
,
// '¼'
189
,
// '½'
190
,
// '¾'
191
,
// '¿'
192
,
// 'À'
193
,
// 'Á'
194
,
// 'Â'
195
,
// 'Ã'
196
,
// 'Ä'
197
,
// 'Å'
198
,
// 'Æ'
199
,
// 'Ç'
200
,
// 'È'
201
,
// 'É'
202
,
// 'Ê'
203
,
// 'Ë'
204
,
// 'Ì'
205
,
// 'Í'
206
,
// 'Î'
207
,
// 'Ï'
208
,
// 'Ð'
209
,
// 'Ñ'
210
,
// 'Ò'
211
,
// 'Ó'
212
,
// 'Ô'
213
,
// 'Õ'
214
,
// 'Ö'
215
,
// '×'
216
,
// 'Ø'
217
,
// 'Ù'
218
,
// 'Ú'
219
,
// 'Û'
220
,
// 'Ü'
221
,
// 'Ý'
222
,
// 'Þ'
223
,
// 'ß'
224
,
// 'à'
225
,
// 'á'
226
,
// 'â'
227
,
// 'ã'
228
,
// 'ä'
229
,
// 'å'
230
,
// 'æ'
231
,
// 'ç'
232
,
// 'è'
233
,
// 'é'
234
,
// 'ê'
235
,
// 'ë'
236
,
// 'ì'
237
,
// 'í'
238
,
// 'î'
239
,
// 'ï'
240
,
// 'ð'
241
,
// 'ñ'
242
,
// 'ò'
243
,
// 'ó'
244
,
// 'ô'
245
,
// 'õ'
246
,
// 'ö'
247
,
// '÷'
248
,
// 'ø'
249
,
// 'ù'
250
,
// 'ú'
251
,
// 'û'
252
,
// 'ü'
253
,
// 'ý'
254
,
// 'þ'
255
,
// 'ÿ'
};
char
const
COMMON_INPUTS_INV
[]
=
[
't'
,
'e'
,
'/'
,
'o'
,
'a'
,
's'
,
'r'
,
'i'
,
'p'
,
'c'
,
'n'
,
'w'
,
'.'
,
'h'
,
'l'
,
'm'
,
'-'
,
'd'
,
'u'
,
'0'
,
'1'
,
'2'
,
'g'
,
'='
,
':'
,
'b'
,
'f'
,
'3'
,
'y'
,
'5'
,
'&'
,
'_'
,
'4'
,
'v'
,
'9'
,
'6'
,
'7'
,
'8'
,
'k'
,
'%'
,
'?'
,
'x'
,
'C'
,
'D'
,
'A'
,
'S'
,
'F'
,
'I'
,
'B'
,
'E'
,
'j'
,
'P'
,
'T'
,
'z'
,
'R'
,
'N'
,
'M'
,
'+'
,
'L'
,
'O'
,
'q'
,
'H'
,
'G'
,
'W'
,
'U'
,
'V'
,
','
,
'Y'
,
'K'
,
'J'
,
'Z'
,
'X'
,
'Q'
,
';'
,
')'
,
'('
,
'~'
,
'['
,
']'
,
'$'
,
'!'
,
'\''
,
'*'
,
'@'
,
'\x00'
,
'\x01'
,
'\x02'
,
'\x03'
,
'\x04'
,
'\x05'
,
'\x06'
,
'\x07'
,
'\x08'
,
'\t'
,
'\n'
,
'\x0b'
,
'\x0c'
,
'\r'
,
'\x0e'
,
'\x0f'
,
'\x10'
,
'\x11'
,
'\x12'
,
'\x13'
,
'\x14'
,
'\x15'
,
'\x16'
,
'\x17'
,
'\x18'
,
'\x19'
,
'\x1a'
,
'\x1b'
,
'\x1c'
,
'\x1d'
,
'\x1e'
,
'\x1f'
,
' '
,
'"'
,
'#'
,
'<'
,
'>'
,
'\\'
,
'^'
,
'`'
,
'{'
,
'|'
,
'}'
,
'\x7f'
,
'\x80'
,
'\x81'
,
'\x82'
,
'\x83'
,
'\x84'
,
'\x85'
,
'\x86'
,
'\x87'
,
'\x88'
,
'\x89'
,
'\x8a'
,
'\x8b'
,
'\x8c'
,
'\x8d'
,
'\x8e'
,
'\x8f'
,
'\x90'
,
'\x91'
,
'\x92'
,
'\x93'
,
'\x94'
,
'\x95'
,
'\x96'
,
'\x97'
,
'\x98'
,
'\x99'
,
'\x9a'
,
'\x9b'
,
'\x9c'
,
'\x9d'
,
'\x9e'
,
'\x9f'
,
'\xa0'
,
'\xa1'
,
'\xa2'
,
'\xa3'
,
'\xa4'
,
'\xa5'
,
'\xa6'
,
'\xa7'
,
'\xa8'
,
'\xa9'
,
'\xaa'
,
'\xab'
,
'\xac'
,
'\xad'
,
'\xae'
,
'\xaf'
,
'\xb0'
,
'\xb1'
,
'\xb2'
,
'\xb3'
,
'\xb4'
,
'\xb5'
,
'\xb6'
,
'\xb7'
,
'\xb8'
,
'\xb9'
,
'\xba'
,
'\xbb'
,
'\xbc'
,
'\xbd'
,
'\xbe'
,
'\xbf'
,
'\xc0'
,
'\xc1'
,
'\xc2'
,
'\xc3'
,
'\xc4'
,
'\xc5'
,
'\xc6'
,
'\xc7'
,
'\xc8'
,
'\xc9'
,
'\xca'
,
'\xcb'
,
'\xcc'
,
'\xcd'
,
'\xce'
,
'\xcf'
,
'\xd0'
,
'\xd1'
,
'\xd2'
,
'\xd3'
,
'\xd4'
,
'\xd5'
,
'\xd6'
,
'\xd7'
,
'\xd8'
,
'\xd9'
,
'\xda'
,
'\xdb'
,
'\xdc'
,
'\xdd'
,
'\xde'
,
'\xdf'
,
'\xe0'
,
'\xe1'
,
'\xe2'
,
'\xe3'
,
'\xe4'
,
'\xe5'
,
'\xe6'
,
'\xe7'
,
'\xe8'
,
'\xe9'
,
'\xea'
,
'\xeb'
,
'\xec'
,
'\xed'
,
'\xee'
,
'\xef'
,
'\xf0'
,
'\xf1'
,
'\xf2'
,
'\xf3'
,
'\xf4'
,
'\xf5'
,
'\xf6'
,
'\xf7'
,
'\xf8'
,
'\xf9'
,
'\xfa'
,
'\xfb'
,
'\xfc'
,
'\xfd'
,
'\xfe'
,
'\xff'
,
];
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录