Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
6ce25563
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1187
Star
22018
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6ce25563
编写于
3月 31, 2022
作者:
dengyihao
提交者:
GitHub
3月 31, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #11137 from taosdata/feature/fst_update_query
add fuzzy search
上级
22305b01
f2c9f40d
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
403 addition
and
3 deletion
+403
-3
source/libs/index/inc/indexFstDfa.h
source/libs/index/inc/indexFstDfa.h
+74
-0
source/libs/index/inc/indexFstRegex.h
source/libs/index/inc/indexFstRegex.h
+74
-0
source/libs/index/inc/indexFstSparse.h
source/libs/index/inc/indexFstSparse.h
+2
-2
source/libs/index/src/indexFstDfa.c
source/libs/index/src/indexFstDfa.c
+218
-0
source/libs/index/src/indexFstRegex.c
source/libs/index/src/indexFstRegex.c
+34
-0
source/libs/index/src/indexSparse.c
source/libs/index/src/indexSparse.c
+1
-1
未找到文件。
source/libs/index/inc/indexFstDfa.h
0 → 100644
浏览文件 @
6ce25563
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_DFA_H__
#define __INDEX_FST_DFA_H__
#include "indexFstRegex.h"
#include "indexFstSparse.h"
#include "tarray.h"
#include "thash.h"
#ifdef __cplusplus
extern
"C"
{
#endif
typedef
struct
FstDfa
FstDfa
;
typedef
struct
{
SArray
*
insts
;
uint32_t
next
[
256
];
bool
isMatch
;
}
State
;
/*
* dfa builder related func
**/
typedef
struct
FstDfaBuilder
{
FstDfa
*
dfa
;
SHashObj
*
cache
;
}
FstDfaBuilder
;
FstDfaBuilder
*
dfaBuilderCreate
(
SArray
*
insts
);
void
dfaBuilderDestroy
(
FstDfaBuilder
*
builder
);
FstDfa
*
dfaBuilderBuild
(
FstDfaBuilder
*
builder
);
bool
dfaBuilderRunState
(
FstDfaBuilder
*
builder
,
FstSparseSet
*
cur
,
FstSparseSet
*
next
,
uint32_t
state
,
uint8_t
bytes
,
uint32_t
*
result
);
bool
dfaBuilderCachedState
(
FstDfaBuilder
*
builder
,
FstSparseSet
*
set
,
uint32_t
*
result
);
/*
* dfa related func
**/
typedef
struct
FstDfa
{
SArray
*
insts
;
SArray
*
states
;
}
FstDfa
;
FstDfa
*
dfaCreate
(
SArray
*
insts
,
SArray
*
states
);
bool
dfaIsMatch
(
FstDfa
*
dfa
,
uint32_t
si
);
bool
dfaAccept
(
FstDfa
*
dfa
,
uint32_t
si
,
uint8_t
byte
,
uint32_t
*
result
);
void
dfaAdd
(
FstDfa
*
dfa
,
FstSparseSet
*
set
,
uint32_t
ip
);
bool
dfaRun
(
FstDfa
*
dfa
,
FstSparseSet
*
from
,
FstSparseSet
*
to
,
uint8_t
byte
);
#ifdef __cplusplus
}
#endif
#endif
source/libs/index/inc/indexFstRegex.h
0 → 100644
浏览文件 @
6ce25563
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_INDEX_FST_REGEX_H_
#define _TD_INDEX_FST_REGEX_H_
//#include "indexFstDfa.h"
#include "taos.h"
#include "tarray.h"
#include "tchecksum.h"
#include "thash.h"
#include "tlog.h"
#include "tutil.h"
#ifdef __cplusplus
extern
"C"
{
#endif
typedef
enum
{
MATCH
,
JUMP
,
SPLIT
,
RANGE
}
InstType
;
typedef
struct
MatchValue
{
}
MatchValue
;
typedef
struct
JumpValue
{
uint32_t
step
;
}
JumpValue
;
typedef
struct
SplitValue
{
uint32_t
len1
;
uint32_t
len2
;
}
SplitValue
;
typedef
struct
RangeValue
{
uint8_t
start
;
uint8_t
end
;
}
RangeValue
;
typedef
struct
{
InstType
ty
;
union
{
MatchValue
mv
;
JumpValue
jv
;
SplitValue
sv
;
RangeValue
rv
;
};
}
Inst
;
typedef
struct
{
char
*
orig
;
void
*
dfa
;
}
FstRegex
;
FstRegex
*
regexCreate
(
const
char
*
str
);
void
regexSetup
(
FstRegex
*
regex
,
uint32_t
size
,
const
char
*
str
);
// uint32_t regexStart()
#ifdef __cplusplus
}
#endif
#endif
source/libs/index/inc/indexSparse.h
→
source/libs/index/inc/index
Fst
Sparse.h
浏览文件 @
6ce25563
...
...
@@ -13,8 +13,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_INDEX_SPARSE_H_
#define _TD_INDEX_SPARSE_H_
#ifndef _TD_INDEX_
FST_
SPARSE_H_
#define _TD_INDEX_
FST_
SPARSE_H_
#include "tarray.h"
...
...
source/libs/index/src/indexFstDfa.c
0 → 100644
浏览文件 @
6ce25563
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "indexFstDfa.h"
#include "thash.h"
const
static
uint32_t
STATE_LIMIT
=
1000
;
static
int
dfaInstsEqual
(
const
void
*
a
,
const
void
*
b
,
size_t
size
)
{
SArray
*
ar
=
(
SArray
*
)
a
;
SArray
*
br
=
(
SArray
*
)
b
;
size_t
al
=
ar
!=
NULL
?
taosArrayGetSize
(
ar
)
:
0
;
size_t
bl
=
br
!=
NULL
?
taosArrayGetSize
(
br
)
:
0
;
if
(
al
!=
bl
)
{
return
-
1
;
}
for
(
int
i
=
0
;
i
<
al
;
i
++
)
{
uint32_t
v1
=
*
(
uint32_t
*
)
taosArrayGet
(
ar
,
i
);
uint32_t
v2
=
*
(
uint32_t
*
)
taosArrayGet
(
br
,
i
);
if
(
v1
!=
v2
)
{
return
-
1
;
}
}
return
0
;
}
FstDfaBuilder
*
dfaBuilderCreate
(
SArray
*
insts
)
{
FstDfaBuilder
*
builder
=
taosMemoryCalloc
(
1
,
sizeof
(
FstDfaBuilder
));
if
(
builder
==
NULL
)
{
return
NULL
;
}
SArray
*
states
=
taosArrayInit
(
4
,
sizeof
(
State
));
builder
->
dfa
=
dfaCreate
(
insts
,
states
);
builder
->
cache
=
taosHashInit
(
4
,
taosGetDefaultHashFunction
(
POINTER_BYTES
==
sizeof
(
int64_t
)
?
TSDB_DATA_TYPE_BIGINT
:
TSDB_DATA_TYPE_INT
),
false
,
HASH_NO_LOCK
);
taosHashSetEqualFp
(
builder
->
cache
,
dfaInstsEqual
);
return
builder
;
}
void
dfaBuilderDestroy
(
FstDfaBuilder
*
builder
)
{
if
(
builder
==
NULL
)
{
return
;
}
void
*
pIter
=
builder
->
cache
!=
NULL
?
taosHashIterate
(
builder
->
cache
,
NULL
)
:
NULL
;
while
(
pIter
)
{
SArray
**
key
=
pIter
;
taosArrayDestroy
(
*
key
);
pIter
=
taosHashIterate
(
builder
->
cache
,
pIter
);
}
taosHashCleanup
(
builder
->
cache
);
}
FstDfa
*
dfaBuilderBuild
(
FstDfaBuilder
*
builder
)
{
uint32_t
sz
=
taosArrayGetSize
(
builder
->
dfa
->
insts
);
FstSparseSet
*
cur
=
sparSetCreate
(
sz
);
FstSparseSet
*
nxt
=
sparSetCreate
(
sz
);
dfaAdd
(
builder
->
dfa
,
cur
,
0
);
SArray
*
states
=
taosArrayInit
(
0
,
sizeof
(
uint32_t
));
uint32_t
result
;
if
(
dfaBuilderCachedState
(
builder
,
cur
,
&
result
))
{
taosArrayPush
(
states
,
&
result
);
}
SHashObj
*
seen
=
taosHashInit
(
12
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_INT
),
false
,
HASH_NO_LOCK
);
while
(
taosArrayGetSize
(
states
)
!=
0
)
{
result
=
*
(
uint32_t
*
)
taosArrayPop
(
states
);
for
(
int
i
=
0
;
i
<
256
;
i
++
)
{
uint32_t
ns
,
dummpy
=
0
;
if
(
dfaBuilderRunState
(
builder
,
cur
,
nxt
,
result
,
i
,
&
ns
))
{
if
(
taosHashGet
(
seen
,
&
ns
,
sizeof
(
ns
))
==
NULL
)
{
taosHashPut
(
seen
,
&
ns
,
sizeof
(
ns
),
&
dummpy
,
sizeof
(
dummpy
));
taosArrayPush
(
states
,
&
ns
);
}
}
if
(
taosArrayGetSize
(
builder
->
dfa
->
states
)
>
STATE_LIMIT
)
{
// Too many state;
//
}
}
}
taosArrayDestroy
(
states
);
taosHashCleanup
(
seen
);
return
builder
->
dfa
;
}
bool
dfaBuilderRunState
(
FstDfaBuilder
*
builder
,
FstSparseSet
*
cur
,
FstSparseSet
*
next
,
uint32_t
state
,
uint8_t
byte
,
uint32_t
*
result
)
{
sparSetClear
(
cur
);
State
*
t
=
taosArrayGet
(
builder
->
dfa
->
states
,
state
);
for
(
int
i
=
0
;
i
<
taosArrayGetSize
(
t
->
insts
);
i
++
)
{
uint32_t
ip
=
*
(
int32_t
*
)
taosArrayGet
(
t
->
insts
,
i
);
sparSetAdd
(
cur
,
ip
);
}
dfaRun
(
builder
->
dfa
,
cur
,
next
,
byte
);
t
=
taosArrayGet
(
builder
->
dfa
->
states
,
state
);
uint32_t
nxtState
;
if
(
dfaBuilderCachedState
(
builder
,
next
,
&
nxtState
))
{
t
->
next
[
byte
]
=
nxtState
;
*
result
=
nxtState
;
return
true
;
}
return
false
;
}
bool
dfaBuilderCachedState
(
FstDfaBuilder
*
builder
,
FstSparseSet
*
set
,
uint32_t
*
result
)
{
SArray
*
tinsts
=
taosArrayInit
(
4
,
sizeof
(
uint32_t
));
bool
isMatch
=
false
;
for
(
int
i
=
0
;
i
<
sparSetLen
(
set
);
i
++
)
{
uint32_t
ip
=
sparSetGet
(
set
,
i
);
Inst
*
inst
=
taosArrayGet
(
builder
->
dfa
->
insts
,
ip
);
if
(
inst
->
ty
==
JUMP
||
inst
->
ty
==
SPLIT
)
{
continue
;
}
else
if
(
inst
->
ty
==
RANGE
)
{
taosArrayPush
(
tinsts
,
&
ip
);
}
else
if
(
inst
->
ty
==
MATCH
)
{
isMatch
=
true
;
taosArrayPush
(
tinsts
,
&
ip
);
}
}
if
(
taosArrayGetSize
(
tinsts
)
==
0
)
{
return
false
;
}
uint32_t
*
v
=
taosHashGet
(
builder
->
cache
,
&
tinsts
,
sizeof
(
POINTER_BYTES
));
if
(
v
!=
NULL
)
{
*
result
=
*
v
;
taosArrayDestroy
(
tinsts
);
}
else
{
State
st
;
st
.
insts
=
tinsts
;
st
.
isMatch
=
isMatch
;
taosArrayPush
(
builder
->
dfa
->
states
,
&
st
);
int32_t
sz
=
taosArrayGetSize
(
builder
->
dfa
->
states
)
-
1
;
taosHashPut
(
builder
->
cache
,
&
tinsts
,
sizeof
(
POINTER_BYTES
),
&
sz
,
sizeof
(
sz
));
*
result
=
sz
;
}
return
true
;
}
FstDfa
*
dfaCreate
(
SArray
*
insts
,
SArray
*
states
)
{
FstDfa
*
dfa
=
taosMemoryCalloc
(
1
,
sizeof
(
FstDfa
));
if
(
dfa
==
NULL
)
{
return
NULL
;
}
dfa
->
insts
=
insts
;
dfa
->
states
=
states
;
return
dfa
;
}
bool
dfaIsMatch
(
FstDfa
*
dfa
,
uint32_t
si
)
{
if
(
dfa
->
states
==
NULL
||
si
<
taosArrayGetSize
(
dfa
->
states
))
{
return
false
;
}
State
*
st
=
taosArrayGet
(
dfa
->
states
,
si
);
return
st
!=
NULL
?
st
->
isMatch
:
false
;
}
bool
dfaAccept
(
FstDfa
*
dfa
,
uint32_t
si
,
uint8_t
byte
,
uint32_t
*
result
)
{
if
(
dfa
->
states
==
NULL
||
si
<
taosArrayGetSize
(
dfa
->
states
))
{
return
false
;
}
State
*
st
=
taosArrayGet
(
dfa
->
states
,
si
);
*
result
=
st
->
next
[
byte
];
return
true
;
}
void
dfaAdd
(
FstDfa
*
dfa
,
FstSparseSet
*
set
,
uint32_t
ip
)
{
if
(
sparSetContains
(
set
,
ip
))
{
return
;
}
sparSetAdd
(
set
,
ip
);
Inst
*
inst
=
taosArrayGet
(
dfa
->
insts
,
ip
);
if
(
inst
->
ty
==
MATCH
||
inst
->
ty
==
RANGE
)
{
// do nothing
}
else
if
(
inst
->
ty
==
JUMP
)
{
dfaAdd
(
dfa
,
set
,
inst
->
jv
.
step
);
}
else
if
(
inst
->
ty
==
SPLIT
)
{
dfaAdd
(
dfa
,
set
,
inst
->
sv
.
len1
);
dfaAdd
(
dfa
,
set
,
inst
->
sv
.
len2
);
}
return
;
}
bool
dfaRun
(
FstDfa
*
dfa
,
FstSparseSet
*
from
,
FstSparseSet
*
to
,
uint8_t
byte
)
{
bool
isMatch
=
false
;
sparSetClear
(
to
);
for
(
int
i
=
0
;
i
<
sparSetLen
(
from
);
i
++
)
{
uint32_t
ip
=
sparSetGet
(
from
,
i
);
Inst
*
inst
=
taosArrayGet
(
dfa
->
insts
,
ip
);
if
(
inst
->
ty
==
JUMP
||
inst
->
ty
==
SPLIT
)
{
continue
;
}
else
if
(
inst
->
ty
==
MATCH
)
{
isMatch
=
true
;
}
else
if
(
inst
->
ty
==
RANGE
)
{
if
(
inst
->
rv
.
start
<=
byte
&&
byte
<=
inst
->
rv
.
end
)
{
dfaAdd
(
dfa
,
to
,
ip
+
1
);
}
}
}
return
isMatch
;
}
source/libs/index/src/indexFstRegex.c
0 → 100644
浏览文件 @
6ce25563
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "indexFstRegex.h"
#include "indexFstSparse.h"
FstRegex
*
regexCreate
(
const
char
*
str
)
{
FstRegex
*
regex
=
taosMemoryCalloc
(
1
,
sizeof
(
FstRegex
));
if
(
regex
==
NULL
)
{
return
NULL
;
}
int32_t
sz
=
(
int32_t
)
strlen
(
str
);
char
*
orig
=
taosMemoryCalloc
(
1
,
sz
);
memcpy
(
orig
,
str
,
sz
);
regex
->
orig
=
orig
;
}
void
regexSetup
(
FstRegex
*
regex
,
uint32_t
size
,
const
char
*
str
)
{
// return
// return;
}
source/libs/index/src/indexSparse.c
浏览文件 @
6ce25563
...
...
@@ -13,7 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "indexSparse.h"
#include "index
Fst
Sparse.h"
FstSparseSet
*
sparSetCreate
(
int32_t
sz
)
{
FstSparseSet
*
ss
=
taosMemoryCalloc
(
1
,
sizeof
(
FstSparseSet
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录