Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
09f3e8e1
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
09f3e8e1
编写于
11月 20, 2021
作者:
dengyihao
浏览文件
操作
浏览文件
下载
差异文件
merge develop
上级
fed05bb6
60e339b3
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
1484 addition
and
15 deletion
+1484
-15
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/cmake.options
cmake/cmake.options
+2
-2
cmake/lucene_CMakeLists.txt.in
cmake/lucene_CMakeLists.txt.in
+2
-3
deps/CMakeLists.txt
deps/CMakeLists.txt
+5
-0
include/libs/index/index.h
include/libs/index/index.h
+42
-1
source/libs/index/CMakeLists.txt
source/libs/index/CMakeLists.txt
+24
-1
source/libs/index/inc/indexInt.h
source/libs/index/inc/indexInt.h
+41
-1
source/libs/index/inc/index_fst.h
source/libs/index/inc/index_fst.h
+182
-0
source/libs/index/inc/index_fst_automation.h
source/libs/index/inc/index_fst_automation.h
+42
-0
source/libs/index/inc/index_fst_node.h
source/libs/index/inc/index_fst_node.h
+22
-0
source/libs/index/inc/index_fst_registry.h
source/libs/index/inc/index_fst_registry.h
+24
-0
source/libs/index/inc/index_fst_util.h
source/libs/index/inc/index_fst_util.h
+82
-0
source/libs/index/src/index.c
source/libs/index/src/index.c
+167
-6
source/libs/index/src/index_fst.c
source/libs/index/src/index_fst.c
+296
-0
source/libs/index/src/index_fst_automation.c
source/libs/index/src/index_fst_automation.c
+14
-0
source/libs/index/src/index_fst_common.c
source/libs/index/src/index_fst_common.c
+306
-0
source/libs/index/src/index_fst_node.c
source/libs/index/src/index_fst_node.c
+15
-0
source/libs/index/src/index_fst_registry.c
source/libs/index/src/index_fst_registry.c
+17
-0
source/libs/index/src/index_fst_util.c
source/libs/index/src/index_fst_util.c
+115
-0
source/libs/index/test/CMakeLists.txt
source/libs/index/test/CMakeLists.txt
+23
-0
source/libs/index/test/indexTests.cpp
source/libs/index/test/indexTests.cpp
+59
-0
source/libs/tkv/src/tkv.c
source/libs/tkv/src/tkv.c
+3
-1
未找到文件。
CMakeLists.txt
浏览文件 @
09f3e8e1
...
...
@@ -48,6 +48,7 @@ endif(${BUILD_WITH_ROCKSDB})
## lucene
if
(
${
BUILD_WITH_LUCENE
}
)
cat
(
"
${
CMAKE_SUPPORT_DIR
}
/lucene_CMakeLists.txt.in"
${
DEPS_TMP_FILE
}
)
add_definitions
(
-DUSE_LUCENE
)
endif
(
${
BUILD_WITH_LUCENE
}
)
## NuRaft
...
...
cmake/cmake.options
浏览文件 @
09f3e8e1
...
...
@@ -22,7 +22,7 @@ option(
option(
BUILD_WITH_LUCENE
"If build with lucene"
OFF
off
)
option(
...
...
@@ -41,4 +41,4 @@ option(
BUILD_DOCS
"If use doxygen build documents"
ON
)
\ No newline at end of file
)
cmake/lucene_CMakeLists.txt.in
浏览文件 @
09f3e8e1
# lucene
ExternalProject_Add(lucene
GIT_REPOSITORY https://github.com/taosdata-contrib/LucenePlusPlus.git
GIT_TAG rel_3.0.8_td
GIT_REPOSITORY https://github.com/yihaoDeng/LucenePlusPlus.git
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/lucene"
BINARY_DIR ""
#BUILD_IN_SOURCE TRUE
...
...
@@ -10,4 +9,4 @@ ExternalProject_Add(lucene
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
\ No newline at end of file
)
deps/CMakeLists.txt
浏览文件 @
09f3e8e1
...
...
@@ -68,6 +68,11 @@ endif(${BUILD_WITH_ROCKSDB})
if
(
${
BUILD_WITH_LUCENE
}
)
option
(
ENABLE_TEST
"Enable the tests"
OFF
)
add_subdirectory
(
lucene
)
target_include_directories
(
lucene++
PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/lucene/include>
)
endif
(
${
BUILD_WITH_LUCENE
}
)
# NuRaft
...
...
include/libs/index/index.h
浏览文件 @
09f3e8e1
...
...
@@ -16,12 +16,53 @@
#ifndef _TD_INDEX_H_
#define _TD_INDEX_H_
#include "os.h"
#include "tarray.h"
#ifdef __cplusplus
extern
"C"
{
#endif
typedef
struct
SIndex
SIndex
;
typedef
struct
SIndexOpts
SIndexOpts
;
typedef
struct
SIndexMultiTermQuery
SIndexMultiTermQuery
;
typedef
struct
SArray
SIndexMultiTerm
;
typedef
enum
{
MUST
=
0
,
SHOULD
=
1
,
NOT
=
2
}
EIndexOperatorType
;
typedef
enum
{
QUERY_TERM
=
0
,
QUERY_PREFIX
=
1
,
QUERY_SUFFIX
=
2
,
QUERY_REGEX
=
3
}
EIndexQueryType
;
/*
* @param: oper
*
*/
SIndexMultiTermQuery
*
indexMultiTermQueryCreate
(
EIndexOperatorType
oper
);
void
indexMultiTermQueryDestroy
(
SIndexMultiTermQuery
*
pQuery
);
int
indexMultiTermQueryAdd
(
SIndexMultiTermQuery
*
pQuery
,
const
char
*
field
,
int32_t
nFields
,
const
char
*
value
,
int32_t
nValue
,
EIndexQueryType
type
);
/*
* @param:
* @param:
*/
SIndex
*
indexOpen
(
SIndexOpts
*
opt
,
const
char
*
path
);
void
indexClose
(
SIndex
*
index
);
int
indexPut
(
SIndex
*
index
,
SIndexMultiTerm
*
terms
,
int
uid
);
int
indexDelete
(
SIndex
*
index
,
SIndexMultiTermQuery
*
query
);
int
indexSearch
(
SIndex
*
index
,
SIndexMultiTermQuery
*
query
,
SArray
*
result
);
int
indexRebuild
(
SIndex
*
index
,
SIndexOpts
*
opt
);
/*
* @param
* @param
*/
SIndexMultiTerm
*
indexMultiTermCreate
();
int
indexMultiTermAdd
(
SIndexMultiTerm
*
terms
,
const
char
*
field
,
int32_t
nFields
,
const
char
*
value
,
int32_t
nValue
);
void
indexMultiTermDestroy
(
SIndexMultiTerm
*
terms
);
/*
* @param:
* @param:
*/
SIndexOpts
*
indexOptsCreate
();
void
indexOptsDestroy
(
SIndexOpts
*
opts
);
#ifdef __cplusplus
}
#endif
#endif
/*_TD_INDEX_H_*/
\ No newline at end of file
#endif
/*_TD_INDEX_H_*/
source/libs/index/CMakeLists.txt
浏览文件 @
09f3e8e1
...
...
@@ -4,4 +4,27 @@ target_include_directories(
index
PUBLIC
"
${
CMAKE_SOURCE_DIR
}
/include/libs/index"
PRIVATE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/inc"
)
\ No newline at end of file
)
target_link_libraries
(
index
PUBLIC os
PUBLIC util
)
if
(
${
BUILD_WITH_LUCENE
}
)
target_include_directories
(
index
PUBLIC
"
${
CMAKE_SOURCE_DIR
}
/deps/lucene/include"
)
LINK_DIRECTORIES
(
"
${
CMAKE_SOURCE_DIR
}
/deps/lucene/debug/src/core"
)
target_link_libraries
(
index
PUBLIC lucene++
)
endif
(
${
BUILD_WITH_LUCENE
}
)
if
(
${
BUILD_TEST
}
)
add_subdirectory
(
test
)
endif
(
${
BUILD_TEST
}
)
source/libs/index/inc/indexInt.h
浏览文件 @
09f3e8e1
...
...
@@ -16,12 +16,52 @@
#ifndef _TD_INDEX_INT_H_
#define _TD_INDEX_INT_H_
#include "index.h"
#ifdef USE_LUCENE
#include <lucene++/Lucene_c.h>
#endif
#ifdef __cplusplus
extern
"C"
{
#endif
struct
SIndex
{
#ifdef USE_LUCENE
index_t
*
index
;
#endif
};
struct
SIndexOpts
{
#ifdef USE_LUCENE
void
*
opts
;
#endif
};
struct
SIndexMultiTermQuery
{
EIndexOperatorType
opera
;
SArray
*
query
;
};
// field and key;
typedef
struct
SIndexTerm
{
char
*
key
;
int32_t
nKey
;
char
*
val
;
int32_t
nVal
;
}
SIndexTerm
;
typedef
struct
SIndexTermQuery
{
SIndexTerm
*
field_value
;
EIndexQueryType
type
;
}
SIndexTermQuery
;
SIndexTerm
*
indexTermCreate
(
const
char
*
key
,
int32_t
nKey
,
const
char
*
val
,
int32_t
nVal
);
void
indexTermDestroy
(
SIndexTerm
*
p
);
#ifdef __cplusplus
}
#endif
#endif
/*_TD_INDEX_INT_H_*/
\ No newline at end of file
#endif
/*_TD_INDEX_INT_H_*/
source/libs/index/inc/index_fst.h
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_H__
#define __INDEX_FST_H__
#include "tarray.h"
#include "index_fst_util.h"
#include "index_fst_registry.h"
typedef
struct
FstNode
FstNode
;
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
typedef
struct
FstRange
{
uint64_t
start
;
uint64_t
end
;
}
FstRange
;
typedef
struct
FstBuilderNode
{
bool
isFinal
;
Output
finalOutput
;
SArray
*
trans
;
// <FstTransition>
}
FstBuilderNode
;
typedef
enum
{
OneTransNext
,
OneTrans
,
AnyTrans
,
EmptyFinal
}
State
;
typedef
enum
{
Included
,
Excluded
,
Unbounded
}
FstBound
;
typedef
uint32_t
CheckSummer
;
/*
*
* UnFinished node and helper function
* TODO: simple function name
*/
typedef
struct
FstUnFinishedNodes
{
SArray
*
stack
;
// <FstBuilderNodeUnfinished> } FstUnFinishedNodes;
}
FstUnFinishedNodes
;
#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
FstUnFinishedNodes
*
FstUnFinishedNodesCreate
();
void
fstUnFinishedNodesPushEmpty
(
FstUnFinishedNodes
*
nodes
,
bool
isFinal
);
FstBuilderNode
*
fstUnFinishedNodesPopRoot
(
FstUnFinishedNodes
*
nodes
);
FstBuilderNode
*
fstUnFinishedNodesPopFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
);
FstBuilderNode
*
fstUnFinishedNodesPopEmpty
(
FstUnFinishedNodes
*
nodes
);
void
fstUnFinishedNodesSetRootOutput
(
FstUnFinishedNodes
*
node
,
Output
out
);
void
fstUnFinishedNodesTopLastFreeze
(
FstUnFinishedNodes
*
node
,
CompiledAddr
addr
);
void
fstUnFinishedNodesAddSuffix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
out
);
uint64_t
fstUnFinishedNodesFindCommPrefix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
);
uint64_t
FstUnFinishedNodesFindCommPreifxAndSetOutput
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
in
,
Output
*
out
);
typedef
struct
FstCountingWriter
{
void
*
wtr
;
// wrap any writer that counts and checksum bytes written
uint64_t
count
;
CheckSummer
summer
;
}
FstCountingWriter
;
typedef
struct
FstBuilder
{
FstCountingWriter
wtr
;
// The FST raw data is written directly to `wtr`.
FstUnFinishedNodes
*
unfinished
;
// The stack of unfinished nodes
FstRegistry
registry
;
// A map of finished nodes.
SArray
*
last
;
// The last word added
CompiledAddr
lastAddr
;
// The address of the last compiled node
uint64_t
len
;
// num of keys added
}
FstBuilder
;
typedef
struct
FstTransition
{
uint8_t
inp
;
//The byte input associated with this transition.
Output
out
;
//The output associated with this transition
CompiledAddr
addr
;
//The address of the node that this transition points to
}
FstTransition
;
typedef
struct
FstTransitions
{
FstNode
*
node
;
FstRange
range
;
}
FstTransitions
;
typedef
struct
FstLastTransition
{
uint8_t
inp
;
Output
out
;
}
FstLastTransition
;
/*
* FstBuilderNodeUnfinished and helper function
* TODO: simple function name
*/
typedef
struct
FstBuilderNodeUnfinished
{
FstBuilderNode
*
node
;
FstLastTransition
*
last
;
}
FstBuilderNodeUnfinished
;
void
fstBuilderNodeUnfinishedLastCompiled
(
FstBuilderNodeUnfinished
*
node
,
CompiledAddr
addr
);
void
fstBuilderNodeUnfinishedAddOutputPrefix
(
FstBuilderNodeUnfinished
*
node
,
CompiledAddr
addr
);
/*
* FstNode and helper function
*/
typedef
struct
FstNode
{
FstSlice
data
;
uint64_t
version
;
State
state
;
CompiledAddr
start
;
CompiledAddr
end
;
bool
isFinal
;
uint64_t
nTrans
;
PackSizes
sizes
;
Output
finalOutput
;
}
FstNode
;
// If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
#define FST_NODE_IS_FINAL(node) node->isFinal
// Returns the number of transitions in this node, The maximum number of transitions is 256.
#define FST_NODE_LEN(node) node->nTrans
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start
FstNode
*
fstNodeCreate
(
int64_t
version
,
CompiledAddr
addr
,
FstSlice
*
data
);
FstTransitions
fstNodeTransitionIter
(
FstNode
*
node
);
FstTransitions
*
fstNodeTransitions
(
FstNode
*
node
);
bool
fstNodeGetTransitionAt
(
FstNode
*
node
,
uint64_t
i
,
FstTransition
*
res
);
bool
fstNodeGetTransitionAddrAt
(
FstNode
*
node
,
uint64_t
i
,
CompiledAddr
*
res
);
bool
fstNodeFindInput
(
FstNode
*
node
,
uint8_t
b
,
uint64_t
*
res
);
bool
fstNodeCompile
(
FstNode
*
node
,
void
*
w
,
CompiledAddr
lastAddr
,
CompiledAddr
addr
,
FstBuilderNode
*
builderNode
);
FstSlice
fstNodeAsSlice
(
FstNode
*
node
);
typedef
struct
FstMeta
{
uint64_t
version
;
CompiledAddr
rootAddr
;
FstType
ty
;
uint64_t
len
;
uint32_t
checkSum
;
}
FstMeta
;
typedef
struct
Fst
{
FstMeta
meta
;
void
*
data
;
//
}
Fst
;
// ops
typedef
struct
FstIndexedValue
{
uint64_t
index
;
uint64_t
value
;
}
FstIndexedValue
;
typedef
struct
FstRegistryCell
{
CompiledAddr
addr
;
FstBuilderNode
*
node
;
}
FstRegistryCell
;
#endif
source/libs/index/inc/index_fst_automation.h
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_AUTAOMATION_H__
#define __INDEX_FST_AUTAOMATION_H__
struct
AutomationCtx
;
typedef
struct
StartWith
{
AutomationCtx
*
autoSelf
;
}
StartWith
;
typedef
struct
Complement
{
AutomationCtx
*
autoSelf
;
}
Complement
;
// automation
typedef
struct
AutomationCtx
{
void
*
data
;
}
AutomationCtx
;
// automation interface
void
(
*
start
)(
AutomationCtx
*
ctx
);
bool
(
*
isMatch
)(
AutomationCtx
*
ctx
);
bool
(
*
canMatch
)(
AutomationCtx
*
ctx
,
void
*
data
);
bool
(
*
willAlwaysMatch
)(
AutomationCtx
*
ctx
,
void
*
state
);
void
*
(
*
accpet
)(
AutomationCtx
*
ctx
,
void
*
state
,
uint8_t
byte
);
void
*
(
*
accpetEof
)(
AutomationCtx
*
ctx
,
*
state
);
#endif
source/libs/index/inc/index_fst_node.h
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_NODE_H__
#define __INDEX_FST_NODE_H__
#endif
source/libs/index/inc/index_fst_registry.h
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __FST_REGISTRY_H__
#define __FST_REGISTRY_H__
#include "index_fst_util.h"
typedef
struct
FstRegistry
{
}
FstRegistry
;
#endif
source/libs/index/inc/index_fst_util.h
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_UTIL_H__
#define __INDEX_FST_UTIL_H__
#include "tarray.h"
typedef
uint64_t
FstType
;
typedef
uint64_t
CompiledAddr
;
typedef
uint64_t
Output
;
typedef
uint8_t
PackSizes
;
//A sentinel value used to indicate an empty final state
extern
const
CompiledAddr
EMPTY_ADDRESS
;
/// A sentinel value used to indicate an invalid state.
extern
const
CompiledAddr
NONE_ADDRESS
;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
extern
const
uint64_t
version
;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
extern
const
uint64_t
TRANS_INDEX_THRESHOLD
;
// high 4 bits is transition address packed size.
// low 4 bits is output value packed size.
//
// `0` is a legal value which means there are no transitions/outputs
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
#define COMMON_INDEX(v, max, val) do { \
val = ((uint16_t)COMMON_INPUTS[v] + 1)%256; \
val = val > max ? 0: val; \
} while(0)
//uint8_t commonInput(uint8_t idx);
//uint8_t commonIdx(uint8_t v, uint8_t max);
uint8_t
packSize
(
uint64_t
n
);
uint64_t
unpackUint64
(
uint8_t
*
ch
,
uint8_t
sz
);
uint8_t
packDeltaSize
(
CompiledAddr
nodeAddr
,
CompiledAddr
transAddr
);
CompiledAddr
unpackDelta
(
char
*
data
,
uint64_t
len
,
uint64_t
nodeAddr
);
typedef
struct
FstSlice
{
uint8_t
*
data
;
uint64_t
dLen
;
uint32_t
start
;
uint32_t
end
;
}
FstSlice
;
FstSlice
fstSliceCopy
(
FstSlice
*
slice
,
uint32_t
start
,
uint32_t
end
);
FstSlice
fstSliceCreate
(
uint8_t
*
data
,
uint64_t
dLen
);
bool
fstSliceEmpty
(
FstSlice
*
slice
);
#endif
source/libs/index/src/index.c
浏览文件 @
09f3e8e1
...
...
@@ -13,15 +13,176 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#i
fndef _TD_INDEX_H_
#
define _TD_INDEX_H_
#i
nclude "index.h"
#
include "indexInt.h"
#ifdef
__cplusplus
extern
"C"
{
#ifdef
USE_LUCENE
#include "lucene++/Lucene_c.h"
#endif
#ifdef __cplusplus
static
pthread_once_t
isInit
=
PTHREAD_ONCE_INIT
;
static
void
indexInit
();
SIndex
*
indexOpen
(
SIndexOpts
*
opts
,
const
char
*
path
)
{
pthread_once
(
&
isInit
,
indexInit
);
#ifdef USE_LUCENE
index_t
*
index
=
index_open
(
path
);
SIndex
*
p
=
malloc
(
sizeof
(
SIndex
));
p
->
index
=
index
;
return
p
;
#endif
return
NULL
;
}
void
indexClose
(
SIndex
*
index
)
{
#ifdef USE_LUCENE
index_close
(
index
->
index
);
index
->
index
=
NULL
;
#endif
free
(
index
);
return
;
}
#ifdef USE_LUCENE
#endif
int
indexPut
(
SIndex
*
index
,
SArray
*
field_vals
,
int
uid
)
{
#ifdef USE_LUCENE
index_document_t
*
doc
=
index_document_create
();
char
buf
[
16
]
=
{
0
};
sprintf
(
buf
,
"%d"
,
uid
);
for
(
int
i
=
0
;
i
<
taosArrayGetSize
(
field_vals
);
i
++
)
{
SIndexTerm
*
p
=
taosArrayGetP
(
field_vals
,
i
);
index_document_add
(
doc
,
(
const
char
*
)(
p
->
key
),
p
->
nKey
,
(
const
char
*
)(
p
->
val
),
p
->
nVal
,
1
);
}
index_document_add
(
doc
,
NULL
,
0
,
buf
,
strlen
(
buf
),
0
);
index_put
(
index
->
index
,
doc
);
index_document_destroy
(
doc
);
#endif
return
1
;
}
int
indexSearch
(
SIndex
*
index
,
SIndexMultiTermQuery
*
multiQuerys
,
SArray
*
result
)
{
#ifdef USE_LUCENE
EIndexOperatorType
opera
=
multiQuerys
->
opera
;
int
nQuery
=
taosArrayGetSize
(
multiQuerys
->
query
);
char
**
fields
=
malloc
(
sizeof
(
char
*
)
*
nQuery
);
char
**
keys
=
malloc
(
sizeof
(
char
*
)
*
nQuery
);
int
*
types
=
malloc
(
sizeof
(
int
)
*
nQuery
);
for
(
int
i
=
0
;
i
<
nQuery
;
i
++
)
{
SIndexTermQuery
*
p
=
taosArrayGet
(
multiQuerys
->
query
,
i
);
SIndexTerm
*
term
=
p
->
field_value
;
fields
[
i
]
=
calloc
(
1
,
term
->
nKey
+
1
);
keys
[
i
]
=
calloc
(
1
,
term
->
nVal
+
1
);
memcpy
(
fields
[
i
],
term
->
key
,
term
->
nKey
);
memcpy
(
keys
[
i
],
term
->
val
,
term
->
nVal
);
types
[
i
]
=
(
int
)(
p
->
type
);
}
int
*
tResult
=
NULL
;
int
tsz
=
0
;
index_multi_search
(
index
->
index
,
(
const
char
**
)
fields
,
(
const
char
**
)
keys
,
types
,
nQuery
,
opera
,
&
tResult
,
&
tsz
);
for
(
int
i
=
0
;
i
<
tsz
;
i
++
)
{
taosArrayPush
(
result
,
&
tResult
[
i
]);
}
for
(
int
i
=
0
;
i
<
nQuery
;
i
++
)
{
free
(
fields
[
i
]);
free
(
keys
[
i
]);
}
free
(
fields
);
free
(
keys
);
free
(
types
);
#endif
return
1
;
}
int
indexDelete
(
SIndex
*
index
,
SIndexMultiTermQuery
*
query
)
{
return
1
;
}
int
indexRebuild
(
SIndex
*
index
,
SIndexOpts
*
opts
);
SIndexOpts
*
indexOptsCreate
()
{
#ifdef USE_LUCENE
#endif
return
NULL
;
}
void
indexOptsDestroy
(
SIndexOpts
*
opts
)
{
#ifdef USE_LUCENE
#endif
}
/*
* @param: oper
*
*/
#endif
/*_TD_INDEX_H_*/
\ No newline at end of file
SIndexMultiTermQuery
*
indexMultiTermQueryCreate
(
EIndexOperatorType
opera
)
{
SIndexMultiTermQuery
*
p
=
(
SIndexMultiTermQuery
*
)
malloc
(
sizeof
(
SIndexMultiTermQuery
));
if
(
p
==
NULL
)
{
return
NULL
;
}
p
->
opera
=
opera
;
p
->
query
=
taosArrayInit
(
1
,
sizeof
(
SIndexTermQuery
));
return
p
;
}
void
indexMultiTermQueryDestroy
(
SIndexMultiTermQuery
*
pQuery
)
{
for
(
int
i
=
0
;
i
<
taosArrayGetSize
(
pQuery
->
query
);
i
++
)
{
SIndexTermQuery
*
p
=
(
SIndexTermQuery
*
)
taosArrayGet
(
pQuery
->
query
,
i
);
indexTermDestroy
(
p
->
field_value
);
}
taosArrayDestroy
(
pQuery
->
query
);
free
(
pQuery
);
};
int
indexMultiTermQueryAdd
(
SIndexMultiTermQuery
*
pQuery
,
const
char
*
field
,
int32_t
nFields
,
const
char
*
value
,
int32_t
nValue
,
EIndexQueryType
type
){
SIndexTerm
*
t
=
indexTermCreate
(
field
,
nFields
,
value
,
nValue
);
if
(
t
==
NULL
)
{
return
-
1
;}
SIndexTermQuery
q
=
{.
type
=
type
,
.
field_value
=
t
};
taosArrayPush
(
pQuery
->
query
,
&
q
);
return
0
;
}
SIndexTerm
*
indexTermCreate
(
const
char
*
key
,
int32_t
nKey
,
const
char
*
val
,
int32_t
nVal
)
{
SIndexTerm
*
t
=
(
SIndexTerm
*
)
malloc
(
sizeof
(
SIndexTerm
));
t
->
key
=
(
char
*
)
calloc
(
nKey
+
1
,
1
);
memcpy
(
t
->
key
,
key
,
nKey
);
t
->
nKey
=
nKey
;
t
->
val
=
(
char
*
)
calloc
(
nVal
+
1
,
1
);
memcpy
(
t
->
val
,
val
,
nVal
);
t
->
nVal
=
nVal
;
return
t
;
}
void
indexTermDestroy
(
SIndexTerm
*
p
)
{
free
(
p
->
key
);
free
(
p
->
val
);
free
(
p
);
}
SArray
*
indexMultiTermCreate
()
{
return
taosArrayInit
(
4
,
sizeof
(
SIndexTerm
*
));
}
int
indexMultiTermAdd
(
SArray
*
array
,
const
char
*
field
,
int32_t
nField
,
const
char
*
val
,
int32_t
nVal
)
{
SIndexTerm
*
term
=
indexTermCreate
(
field
,
nField
,
val
,
nVal
);
if
(
term
==
NULL
)
{
return
-
1
;
}
taosArrayPush
(
array
,
&
term
);
return
0
;
}
void
indexMultiTermDestroy
(
SArray
*
array
)
{
for
(
int32_t
i
=
0
;
i
<
taosArrayGetSize
(
array
);
i
++
)
{
SIndexTerm
*
p
=
taosArrayGetP
(
array
,
i
);
indexTermDestroy
(
p
);
}
taosArrayDestroy
(
array
);
}
void
indexInit
()
{
//do nothing
}
source/libs/index/src/index_fst.c
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst.h"
FstUnFinishedNodes
*
fstUnFinishedNodesCreate
()
{
FstUnFinishedNodes
*
nodes
=
malloc
(
sizeof
(
FstUnFinishedNodes
));
if
(
nodes
==
NULL
)
{
return
NULL
;
}
nodes
->
stack
=
(
SArray
*
)
taosArrayInit
(
64
,
sizeof
(
FstBuilderNodeUnfinished
));
fstUnFinishedNodesPushEmpty
(
nodes
,
false
);
return
nodes
;
}
void
fstUnFinishedNodesPushEmpty
(
FstUnFinishedNodes
*
nodes
,
bool
isFinal
)
{
FstBuilderNode
*
node
=
malloc
(
sizeof
(
FstBuilderNode
));
node
->
isFinal
=
isFinal
;
node
->
finalOutput
=
0
;
node
->
trans
=
NULL
;
FstBuilderNodeUnfinished
un
=
{.
node
=
node
,
.
last
=
NULL
};
taosArrayPush
(
nodes
->
stack
,
&
un
);
}
FstBuilderNode
*
fstUnFinishedNodesPopRoot
(
FstUnFinishedNodes
*
nodes
)
{
assert
(
taosArrayGetSize
(
nodes
->
stack
)
==
1
);
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
assert
(
un
->
last
==
NULL
);
return
un
->
node
;
}
FstBuilderNode
*
fstUnFinishedNodesPopFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
fstBuilderNodeUnfinishedLastCompiled
(
un
,
addr
);
free
(
un
->
last
);
// TODO add func FstLastTransitionFree()
return
un
->
node
;
}
FstBuilderNode
*
fstUnFinishedNodesPopEmpty
(
FstUnFinishedNodes
*
nodes
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
assert
(
un
->
last
==
NULL
);
return
un
->
node
;
}
void
fstUnFinishedNodesSetRootOutput
(
FstUnFinishedNodes
*
nodes
,
Output
out
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
0
);
un
->
node
->
isFinal
=
true
;
un
->
node
->
finalOutput
=
out
;
//un->node->trans = NULL;
}
void
fstUnFinishedNodesTopLastFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
)
{
size_t
sz
=
taosArrayGetSize
(
nodes
->
stack
)
-
1
;
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
sz
);
fstBuilderNodeUnfinishedLastCompiled
(
un
,
addr
);
}
void
fstUnFinishedNodesAddSuffix
(
FstUnFinishedNodes
*
nodes
,
FstSlice
bs
,
Output
out
)
{
FstSlice
*
s
=
&
bs
;
if
(
s
->
data
==
NULL
||
s
->
dLen
==
0
||
s
->
start
>
s
->
end
)
{
return
;
}
size_t
sz
=
taosArrayGetSize
(
nodes
->
stack
)
-
1
;
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
sz
);
assert
(
un
->
last
==
NULL
);
FstLastTransition
*
trn
=
malloc
(
sizeof
(
FstLastTransition
));
trn
->
inp
=
s
->
data
[
s
->
start
];
trn
->
out
=
out
;
un
->
last
=
trn
;
for
(
uint64_t
i
=
s
->
start
;
i
<=
s
->
end
;
i
++
)
{
FstBuilderNode
*
n
=
malloc
(
sizeof
(
FstBuilderNode
));
n
->
isFinal
=
false
;
n
->
finalOutput
=
0
;
n
->
trans
=
NULL
;
FstLastTransition
*
trn
=
malloc
(
sizeof
(
FstLastTransition
));
trn
->
inp
=
s
->
data
[
i
];
trn
->
out
=
out
;
FstBuilderNodeUnfinished
un
=
{.
node
=
n
,
.
last
=
trn
};
taosArrayPush
(
nodes
->
stack
,
&
un
);
}
fstUnFinishedNodesPushEmpty
(
nodes
,
true
);
}
uint64_t
fstUnFinishedNodesFindCommPrefix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
)
{
FstSlice
*
s
=
&
bs
;
size_t
lsz
=
(
size_t
)(
s
->
end
-
s
->
start
+
1
);
// data len
size_t
ssz
=
taosArrayGetSize
(
node
->
stack
);
// stack size
uint64_t
count
=
0
;
for
(
size_t
i
=
0
;
i
<
ssz
&&
i
<
lsz
;
i
++
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
node
->
stack
,
i
);
if
(
un
->
last
->
inp
==
s
->
data
[
s
->
start
+
i
])
{
count
++
;
}
else
{
break
;
}
}
return
count
;
}
uint64_t
FstUnFinishedNodesFindCommPrefixAndSetOutput
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
in
,
Output
*
out
)
{
FstSlice
*
s
=
&
bs
;
size_t
lsz
=
(
size_t
)(
s
->
end
-
s
->
start
+
1
);
// data len
size_t
ssz
=
taosArrayGetSize
(
node
->
stack
);
// stack size
uint64_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
lsz
&&
i
<
ssz
;
i
++
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
node
->
stack
,
i
);
FstLastTransition
*
last
=
un
->
last
;
if
(
last
->
inp
==
s
->
data
[
s
->
start
+
i
])
{
uint64_t
commPrefix
=
last
->
out
;
uint64_t
addPrefix
=
last
->
out
-
commPrefix
;
out
=
out
-
commPrefix
;
last
->
out
=
commPrefix
;
if
(
addPrefix
!=
0
)
{
fstBuilderNodeUnfinishedAddOutputPrefix
(
un
,
addPrefix
);
}
}
else
{
break
;
}
}
return
res
;
}
// fst node function
FstNode
*
fstNodeCreate
(
int64_t
version
,
CompiledAddr
addr
,
FstSlice
*
slice
)
{
FstNode
*
n
=
(
FstNode
*
)
malloc
(
sizeof
(
FstNode
));
if
(
n
==
NULL
)
{
return
NULL
;
}
if
(
addr
==
EMPTY_ADDRESS
)
{
n
->
data
=
fstSliceCreate
(
NULL
,
0
);
n
->
version
=
version
;
n
->
state
=
EmptyFinal
;
n
->
start
=
EMPTY_ADDRESS
;
n
->
end
=
EMPTY_ADDRESS
;
n
->
isFinal
=
true
;
n
->
nTrans
=
0
;
n
->
sizes
=
0
;
n
->
finalOutput
=
0
;
}
uint8_t
v
=
slice
->
data
[
addr
];
uint8_t
s
=
(
v
&
0
b11000000
)
>>
6
;
if
(
s
==
0
b11
)
{
// oneTransNext
n
->
data
=
fstSliceCopy
(
slice
,
0
,
addr
);
n
->
version
=
version
;
n
->
state
=
OneTransNext
;
n
->
start
=
addr
;
n
->
end
=
addr
;
//? s.end_addr(data);
n
->
isFinal
=
false
;
n
->
sizes
=
0
;
n
->
nTrans
=
0
;
n
->
finalOutput
=
0
;
}
else
if
(
v
==
0
b10
)
{
// oneTrans
uint64_t
sz
;
// fetch sz from addr
n
->
data
=
fstSliceCopy
(
slice
,
0
,
addr
);
n
->
version
=
version
;
n
->
state
=
OneTrans
;
n
->
start
=
addr
;
n
->
end
=
addr
;
// s.end_addr(data, sz);
n
->
isFinal
=
false
;
n
->
nTrans
=
1
;
n
->
sizes
=
sz
;
n
->
finalOutput
=
0
;
}
else
{
// anyTrans
uint64_t
sz
;
// s.sizes(data)
uint32_t
nTrans
;
// s.ntrans(data)
n
->
data
=
*
slice
;
n
->
version
=
version
;
n
->
state
=
AnyTrans
;
n
->
start
=
addr
;
n
->
end
=
addr
;
// s.end_addr(version, data, sz, ntrans);
n
->
isFinal
=
false
;
// s.is_final_state();
n
->
nTrans
=
nTrans
;
n
->
sizes
=
sz
;
n
->
finalOutput
=
0
;
// s.final_output(version, data, sz, ntrans);
}
return
n
;
}
FstTransitions
*
fstNodeTransitions
(
FstNode
*
node
)
{
FstTransitions
*
t
=
malloc
(
sizeof
(
FstTransitions
));
if
(
NULL
==
t
)
{
return
NULL
;
}
FstRange
range
=
{.
start
=
0
,
.
end
=
FST_NODE_LEN
(
node
)};
t
->
node
=
node
;
t
->
range
=
range
;
return
t
;
}
bool
fstNodeGetTransitionAt
(
FstNode
*
node
,
uint64_t
i
,
FstTransition
*
res
)
{
bool
s
=
true
;
if
(
node
->
state
==
OneTransNext
)
{
}
else
if
(
node
->
state
==
OneTrans
)
{
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
else
{
s
=
false
;
}
return
s
;
}
bool
fstNodeGetTransitionAddrAt
(
FstNode
*
node
,
uint64_t
i
,
CompiledAddr
*
res
)
{
bool
s
=
true
;
if
(
node
->
state
==
OneTransNext
)
{
}
else
if
(
node
->
state
==
OneTrans
)
{
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
else
if
(
node
->
state
==
EmptyFinal
){
s
=
false
;
}
return
s
;
}
bool
fstNodeFindInput
(
FstNode
*
node
,
uint8_t
b
,
uint64_t
*
res
)
{
bool
s
=
true
;
uint8_t
input
;
// s.input
if
(
node
->
state
==
OneTransNext
)
{
if
(
b
==
input
)
{
*
res
=
0
;
}
else
{
return
s
;
}
}
else
if
(
node
->
state
==
OneTrans
)
{
if
(
b
==
input
)
{
*
res
=
0
;
}
else
{
return
s
;}
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
else
if
(
node
->
state
==
EmptyFinal
)
{
s
=
false
;
}
return
s
;
}
bool
fstNodeCompile
(
FstNode
*
node
,
void
*
w
,
CompiledAddr
lastAddr
,
CompiledAddr
addr
,
FstBuilderNode
*
builderNode
)
{
size_t
sz
=
taosArrayGetSize
(
builderNode
->
trans
);
assert
(
sz
<
256
);
if
(
sz
==
0
&&
builderNode
->
isFinal
&&
builderNode
->
finalOutput
==
0
)
{
return
true
;
}
else
if
(
sz
!=
1
||
builderNode
->
isFinal
)
{
// AnyTrans->Compile(w, addr, node);
}
else
{
FstTransition
*
tran
=
taosArrayGet
(
builderNode
->
trans
,
0
);
if
(
tran
->
addr
==
lastAddr
&&
tran
->
out
==
0
)
{
//OneTransNext::compile(w, lastAddr, tran->inp);
return
true
;
}
else
{
//OneTrans::Compile(w, lastAddr, *tran);
return
true
;
}
}
return
true
;
}
FstBuilder
*
fstBuilderCreate
(
void
*
w
,
FstType
ty
)
{
FstBuilder
*
b
=
malloc
(
sizeof
(
FstBuilder
));
if
(
NULL
==
b
)
{
return
b
;
}
FstCountingWriter
wtr
=
{.
wtr
=
w
,
.
count
=
0
,
.
summer
=
0
};
b
->
wtr
=
wtr
;
b
->
unfinished
=
malloc
(
sizeof
(
FstUnFinishedNodes
));
return
b
;
}
FstSlice
fstNodeAsSlice
(
FstNode
*
node
)
{
FstSlice
*
slice
=
&
node
->
data
;
FstSlice
s
=
fstSliceCopy
(
slice
,
slice
->
end
,
slice
->
dLen
-
1
);
return
s
;
}
source/libs/index/src/index_fst_automation.c
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
source/libs/index/src/index_fst_common.c
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tutil.h"
const
uint8_t
COMMON_INPUTS
[]
=
{
84
,
// '\x00'
85
,
// '\x01'
86
,
// '\x02'
87
,
// '\x03'
88
,
// '\x04'
89
,
// '\x05'
90
,
// '\x06'
91
,
// '\x07'
92
,
// '\x08'
93
,
// '\t'
94
,
// '\n'
95
,
// '\x0b'
96
,
// '\x0c'
97
,
// '\r'
98
,
// '\x0e'
99
,
// '\x0f'
100
,
// '\x10'
101
,
// '\x11'
102
,
// '\x12'
103
,
// '\x13'
104
,
// '\x14'
105
,
// '\x15'
106
,
// '\x16'
107
,
// '\x17'
108
,
// '\x18'
109
,
// '\x19'
110
,
// '\x1a'
111
,
// '\x1b'
112
,
// '\x1c'
113
,
// '\x1d'
114
,
// '\x1e'
115
,
// '\x1f'
116
,
// ' '
80
,
// '!'
117
,
// '"'
118
,
// '#'
79
,
// '$'
39
,
// '%'
30
,
// '&'
81
,
// "'"
75
,
// '('
74
,
// ')'
82
,
// '*'
57
,
// '+'
66
,
// ','
16
,
// '-'
12
,
// '.'
2
,
// '/'
19
,
// '0'
20
,
// '1'
21
,
// '2'
27
,
// '3'
32
,
// '4'
29
,
// '5'
35
,
// '6'
36
,
// '7'
37
,
// '8'
34
,
// '9'
24
,
// ':'
73
,
// ';'
119
,
// '<'
23
,
// '='
120
,
// '>'
40
,
// '?'
83
,
// '@'
44
,
// 'A'
48
,
// 'B'
42
,
// 'C'
43
,
// 'D'
49
,
// 'E'
46
,
// 'F'
62
,
// 'G'
61
,
// 'H'
47
,
// 'I'
69
,
// 'J'
68
,
// 'K'
58
,
// 'L'
56
,
// 'M'
55
,
// 'N'
59
,
// 'O'
51
,
// 'P'
72
,
// 'Q'
54
,
// 'R'
45
,
// 'S'
52
,
// 'T'
64
,
// 'U'
65
,
// 'V'
63
,
// 'W'
71
,
// 'X'
67
,
// 'Y'
70
,
// 'Z'
77
,
// '['
121
,
// '\\'
78
,
// ']'
122
,
// '^'
31
,
// '_'
123
,
// '`'
4
,
// 'a'
25
,
// 'b'
9
,
// 'c'
17
,
// 'd'
1
,
// 'e'
26
,
// 'f'
22
,
// 'g'
13
,
// 'h'
7
,
// 'i'
50
,
// 'j'
38
,
// 'k'
14
,
// 'l'
15
,
// 'm'
10
,
// 'n'
3
,
// 'o'
8
,
// 'p'
60
,
// 'q'
6
,
// 'r'
5
,
// 's'
0
,
// 't'
18
,
// 'u'
33
,
// 'v'
11
,
// 'w'
41
,
// 'x'
28
,
// 'y'
53
,
// 'z'
124
,
// '{'
125
,
// '|'
126
,
// '}'
76
,
// '~'
127
,
// '\x7f'
128
,
// '\x80'
129
,
// '\x81'
130
,
// '\x82'
131
,
// '\x83'
132
,
// '\x84'
133
,
// '\x85'
134
,
// '\x86'
135
,
// '\x87'
136
,
// '\x88'
137
,
// '\x89'
138
,
// '\x8a'
139
,
// '\x8b'
140
,
// '\x8c'
141
,
// '\x8d'
142
,
// '\x8e'
143
,
// '\x8f'
144
,
// '\x90'
145
,
// '\x91'
146
,
// '\x92'
147
,
// '\x93'
148
,
// '\x94'
149
,
// '\x95'
150
,
// '\x96'
151
,
// '\x97'
152
,
// '\x98'
153
,
// '\x99'
154
,
// '\x9a'
155
,
// '\x9b'
156
,
// '\x9c'
157
,
// '\x9d'
158
,
// '\x9e'
159
,
// '\x9f'
160
,
// '\xa0'
161
,
// '¡'
162
,
// '¢'
163
,
// '£'
164
,
// '¤'
165
,
// '¥'
166
,
// '¦'
167
,
// '§'
168
,
// '¨'
169
,
// '©'
170
,
// 'ª'
171
,
// '«'
172
,
// '¬'
173
,
// '\xad'
174
,
// '®'
175
,
// '¯'
176
,
// '°'
177
,
// '±'
178
,
// '²'
179
,
// '³'
180
,
// '´'
181
,
// 'µ'
182
,
// '¶'
183
,
// '·'
184
,
// '¸'
185
,
// '¹'
186
,
// 'º'
187
,
// '»'
188
,
// '¼'
189
,
// '½'
190
,
// '¾'
191
,
// '¿'
192
,
// 'À'
193
,
// 'Á'
194
,
// 'Â'
195
,
// 'Ã'
196
,
// 'Ä'
197
,
// 'Å'
198
,
// 'Æ'
199
,
// 'Ç'
200
,
// 'È'
201
,
// 'É'
202
,
// 'Ê'
203
,
// 'Ë'
204
,
// 'Ì'
205
,
// 'Í'
206
,
// 'Î'
207
,
// 'Ï'
208
,
// 'Ð'
209
,
// 'Ñ'
210
,
// 'Ò'
211
,
// 'Ó'
212
,
// 'Ô'
213
,
// 'Õ'
214
,
// 'Ö'
215
,
// '×'
216
,
// 'Ø'
217
,
// 'Ù'
218
,
// 'Ú'
219
,
// 'Û'
220
,
// 'Ü'
221
,
// 'Ý'
222
,
// 'Þ'
223
,
// 'ß'
224
,
// 'à'
225
,
// 'á'
226
,
// 'â'
227
,
// 'ã'
228
,
// 'ä'
229
,
// 'å'
230
,
// 'æ'
231
,
// 'ç'
232
,
// 'è'
233
,
// 'é'
234
,
// 'ê'
235
,
// 'ë'
236
,
// 'ì'
237
,
// 'í'
238
,
// 'î'
239
,
// 'ï'
240
,
// 'ð'
241
,
// 'ñ'
242
,
// 'ò'
243
,
// 'ó'
244
,
// 'ô'
245
,
// 'õ'
246
,
// 'ö'
247
,
// '÷'
248
,
// 'ø'
249
,
// 'ù'
250
,
// 'ú'
251
,
// 'û'
252
,
// 'ü'
253
,
// 'ý'
254
,
// 'þ'
255
,
// 'ÿ'
};
char
const
COMMON_INPUTS_INV
[]
=
{
't'
,
'e'
,
'/'
,
'o'
,
'a'
,
's'
,
'r'
,
'i'
,
'p'
,
'c'
,
'n'
,
'w'
,
'.'
,
'h'
,
'l'
,
'm'
,
'-'
,
'd'
,
'u'
,
'0'
,
'1'
,
'2'
,
'g'
,
'='
,
':'
,
'b'
,
'f'
,
'3'
,
'y'
,
'5'
,
'&'
,
'_'
,
'4'
,
'v'
,
'9'
,
'6'
,
'7'
,
'8'
,
'k'
,
'%'
,
'?'
,
'x'
,
'C'
,
'D'
,
'A'
,
'S'
,
'F'
,
'I'
,
'B'
,
'E'
,
'j'
,
'P'
,
'T'
,
'z'
,
'R'
,
'N'
,
'M'
,
'+'
,
'L'
,
'O'
,
'q'
,
'H'
,
'G'
,
'W'
,
'U'
,
'V'
,
','
,
'Y'
,
'K'
,
'J'
,
'Z'
,
'X'
,
'Q'
,
';'
,
')'
,
'('
,
'~'
,
'['
,
']'
,
'$'
,
'!'
,
'\''
,
'*'
,
'@'
,
'\x00'
,
'\x01'
,
'\x02'
,
'\x03'
,
'\x04'
,
'\x05'
,
'\x06'
,
'\x07'
,
'\x08'
,
'\t'
,
'\n'
,
'\x0b'
,
'\x0c'
,
'\r'
,
'\x0e'
,
'\x0f'
,
'\x10'
,
'\x11'
,
'\x12'
,
'\x13'
,
'\x14'
,
'\x15'
,
'\x16'
,
'\x17'
,
'\x18'
,
'\x19'
,
'\x1a'
,
'\x1b'
,
'\x1c'
,
'\x1d'
,
'\x1e'
,
'\x1f'
,
' '
,
'"'
,
'#'
,
'<'
,
'>'
,
'\\'
,
'^'
,
'`'
,
'{'
,
'|'
,
'}'
,
'\x7f'
,
'\x80'
,
'\x81'
,
'\x82'
,
'\x83'
,
'\x84'
,
'\x85'
,
'\x86'
,
'\x87'
,
'\x88'
,
'\x89'
,
'\x8a'
,
'\x8b'
,
'\x8c'
,
'\x8d'
,
'\x8e'
,
'\x8f'
,
'\x90'
,
'\x91'
,
'\x92'
,
'\x93'
,
'\x94'
,
'\x95'
,
'\x96'
,
'\x97'
,
'\x98'
,
'\x99'
,
'\x9a'
,
'\x9b'
,
'\x9c'
,
'\x9d'
,
'\x9e'
,
'\x9f'
,
'\xa0'
,
'\xa1'
,
'\xa2'
,
'\xa3'
,
'\xa4'
,
'\xa5'
,
'\xa6'
,
'\xa7'
,
'\xa8'
,
'\xa9'
,
'\xaa'
,
'\xab'
,
'\xac'
,
'\xad'
,
'\xae'
,
'\xaf'
,
'\xb0'
,
'\xb1'
,
'\xb2'
,
'\xb3'
,
'\xb4'
,
'\xb5'
,
'\xb6'
,
'\xb7'
,
'\xb8'
,
'\xb9'
,
'\xba'
,
'\xbb'
,
'\xbc'
,
'\xbd'
,
'\xbe'
,
'\xbf'
,
'\xc0'
,
'\xc1'
,
'\xc2'
,
'\xc3'
,
'\xc4'
,
'\xc5'
,
'\xc6'
,
'\xc7'
,
'\xc8'
,
'\xc9'
,
'\xca'
,
'\xcb'
,
'\xcc'
,
'\xcd'
,
'\xce'
,
'\xcf'
,
'\xd0'
,
'\xd1'
,
'\xd2'
,
'\xd3'
,
'\xd4'
,
'\xd5'
,
'\xd6'
,
'\xd7'
,
'\xd8'
,
'\xd9'
,
'\xda'
,
'\xdb'
,
'\xdc'
,
'\xdd'
,
'\xde'
,
'\xdf'
,
'\xe0'
,
'\xe1'
,
'\xe2'
,
'\xe3'
,
'\xe4'
,
'\xe5'
,
'\xe6'
,
'\xe7'
,
'\xe8'
,
'\xe9'
,
'\xea'
,
'\xeb'
,
'\xec'
,
'\xed'
,
'\xee'
,
'\xef'
,
'\xf0'
,
'\xf1'
,
'\xf2'
,
'\xf3'
,
'\xf4'
,
'\xf5'
,
'\xf6'
,
'\xf7'
,
'\xf8'
,
'\xf9'
,
'\xfa'
,
'\xfb'
,
'\xfc'
,
'\xfd'
,
'\xfe'
,
'\xff'
,
};
source/libs/index/src/index_fst_node.c
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
source/libs/index/src/index_fst_registry.c
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_registry.h"
source/libs/index/src/index_fst_util.c
0 → 100644
浏览文件 @
09f3e8e1
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_util.h"
//A sentinel value used to indicate an empty final state
const
CompiledAddr
EMPTY_ADDRESS
=
0
;
/// A sentinel value used to indicate an invalid state.
const
CompiledAddr
NONE_ADDRESS
=
1
;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
const
uint64_t
version
=
3
;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
const
uint64_t
TRANS_INDEX_THRESHOLD
=
32
;
//uint8_t commonInput(uint8_t idx) {
// if (idx == 0) { return -1; }
// else {
// return COMMON_INPUTS_INV[idx - 1];
// }
//}
//
//uint8_t commonIdx(uint8_t v, uint8_t max) {
// uint8_t v = ((uint16_t)tCOMMON_INPUTS[v] + 1)%256;
// return v > max ? 0: v;
//}
uint8_t
packSize
(
uint64_t
n
)
{
if
(
n
<
(
1u
<<
8
))
{
return
1
;
}
else
if
(
n
<
(
1u
<<
16
))
{
return
2
;
}
else
if
(
n
<
(
1u
<<
24
))
{
return
3
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
32
))
{
return
4
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
40
))
{
return
5
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
48
))
{
return
6
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
56
))
{
return
7
;
}
else
{
return
8
;
}
}
uint64_t
unpackUint64
(
uint8_t
*
ch
,
uint8_t
sz
)
{
uint64_t
n
;
for
(
uint8_t
i
=
0
;
i
<
sz
;
i
++
)
{
n
=
n
|
(
ch
[
i
]
<<
(
8
*
i
));
}
return
n
;
}
uint8_t
packDeltaSize
(
CompiledAddr
nodeAddr
,
CompiledAddr
transAddr
)
{
if
(
transAddr
==
EMPTY_ADDRESS
)
{
return
packSize
(
EMPTY_ADDRESS
);
}
else
{
return
packSize
(
nodeAddr
-
transAddr
);
}
}
CompiledAddr
unpackDelta
(
char
*
data
,
uint64_t
len
,
uint64_t
nodeAddr
)
{
uint64_t
delta
=
unpackUint64
(
data
,
len
);
// delta_add = u64_to_usize
if
(
delta
==
EMPTY_ADDRESS
)
{
return
EMPTY_ADDRESS
;
}
else
{
return
nodeAddr
-
delta
;
}
}
// fst slice func
FstSlice
fstSliceCreate
(
uint8_t
*
data
,
uint64_t
dLen
)
{
FstSlice
slice
=
{.
data
=
data
,
.
dLen
=
dLen
,
.
start
=
0
,
.
end
=
dLen
-
1
};
return
slice
;
}
FstSlice
fstSliceCopy
(
FstSlice
*
slice
,
uint32_t
start
,
uint32_t
end
)
{
FstSlice
t
;
if
(
start
>=
slice
->
dLen
||
end
>=
slice
->
dLen
||
start
>
end
)
{
t
.
data
=
NULL
;
return
t
;
};
t
.
data
=
slice
->
data
;
t
.
dLen
=
slice
->
dLen
;
t
.
start
=
start
;
t
.
end
=
end
;
return
t
;
}
bool
fstSliceEmpty
(
FstSlice
*
slice
)
{
return
slice
->
data
==
NULL
||
slice
->
dLen
<=
0
;
}
source/libs/index/test/CMakeLists.txt
0 → 100644
浏览文件 @
09f3e8e1
add_executable
(
indexTest
""
)
target_sources
(
indexTest
PRIVATE
"../src/index.c"
"indexTests.cpp"
)
target_include_directories
(
indexTest
PUBLIC
"
${
CMAKE_SOURCE_DIR
}
/include/libs/index"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/../inc"
)
target_link_libraries
(
indexTest
os
util
common
gtest_main
index
)
add_test
(
NAME index_test
COMMAND indexTest
)
source/libs/index/test/indexTests.cpp
浏览文件 @
09f3e8e1
#include <gtest/gtest.h>
#include <string>
#include <iostream>
#include "index.h"
#include "indexInt.h"
TEST
(
IndexTest
,
index_create_test
)
{
SIndexOpts
*
opts
=
indexOptsCreate
();
SIndex
*
index
=
indexOpen
(
opts
,
"./test"
);
if
(
index
==
NULL
)
{
std
::
cout
<<
"index open failed"
<<
std
::
endl
;
}
// write
for
(
int
i
=
0
;
i
<
100000
;
i
++
)
{
SIndexMultiTerm
*
terms
=
indexMultiTermCreate
();
std
::
string
val
=
"field"
;
indexMultiTermAdd
(
terms
,
"tag1"
,
strlen
(
"tag1"
),
val
.
c_str
(),
val
.
size
());
val
.
append
(
std
::
to_string
(
i
));
indexMultiTermAdd
(
terms
,
"tag2"
,
strlen
(
"tag2"
),
val
.
c_str
(),
val
.
size
());
val
.
insert
(
0
,
std
::
to_string
(
i
));
indexMultiTermAdd
(
terms
,
"tag3"
,
strlen
(
"tag3"
),
val
.
c_str
(),
val
.
size
());
val
.
append
(
"const"
);
indexMultiTermAdd
(
terms
,
"tag4"
,
strlen
(
"tag4"
),
val
.
c_str
(),
val
.
size
());
indexPut
(
index
,
terms
,
i
);
indexMultiTermDestroy
(
terms
);
}
// query
SIndexMultiTermQuery
*
multiQuery
=
indexMultiTermQueryCreate
(
MUST
);
indexMultiTermQueryAdd
(
multiQuery
,
"tag1"
,
strlen
(
"tag1"
),
"field"
,
strlen
(
"field"
),
QUERY_PREFIX
);
indexMultiTermQueryAdd
(
multiQuery
,
"tag3"
,
strlen
(
"tag3"
),
"0field0"
,
strlen
(
"0field0"
),
QUERY_TERM
);
SArray
*
result
=
(
SArray
*
)
taosArrayInit
(
10
,
sizeof
(
int
));
indexSearch
(
index
,
multiQuery
,
result
);
std
::
cout
<<
"taos'size : "
<<
taosArrayGetSize
(
result
)
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
taosArrayGetSize
(
result
);
i
++
)
{
int
*
v
=
(
int
*
)
taosArrayGet
(
result
,
i
);
std
::
cout
<<
"value --->"
<<
*
v
<<
std
::
endl
;
}
indexMultiTermQueryDestroy
(
multiQuery
);
indexOptsDestroy
(
opts
);
indexClose
(
index
);
//
}
source/libs/tkv/src/tkv.c
浏览文件 @
09f3e8e1
...
...
@@ -158,6 +158,8 @@ static void tkvInit() {
#ifdef USE_ROCKSDB
defaultReadOpts
.
ropts
=
rocksdb_readoptions_create
();
defaultWriteOpts
.
wopts
=
rocksdb_writeoptions_create
();
rocksdb_writeoptions_disable_WAL
(
defaultWriteOpts
.
wopts
,
true
);
#endif
}
...
...
@@ -166,4 +168,4 @@ static void tkvClear() {
rocksdb_readoptions_destroy
(
defaultReadOpts
.
ropts
);
rocksdb_writeoptions_destroy
(
defaultWriteOpts
.
wopts
);
#endif
}
\ No newline at end of file
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录