Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
9188298e
TDengine
项目概览
taosdata
/
TDengine
大约 2 年 前同步成功
通知
1192
Star
22018
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9188298e
编写于
11月 22, 2021
作者:
H
Hongze Cheng
浏览文件
操作
浏览文件
下载
差异文件
Merge branch '3.0' into feature/vnode
上级
9001468b
29eb9432
变更
61
展开全部
隐藏空白更改
内联
并排
Showing
61 changed file
with
3027 addition
and
912 deletion
+3027
-912
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/cmake.options
cmake/cmake.options
+2
-2
cmake/lucene_CMakeLists.txt.in
cmake/lucene_CMakeLists.txt.in
+2
-3
deps/CMakeLists.txt
deps/CMakeLists.txt
+5
-0
include/dnode/vnode/tq/tq.h
include/dnode/vnode/tq/tq.h
+98
-20
include/libs/index/index.h
include/libs/index/index.h
+42
-1
include/libs/wal/wal.h
include/libs/wal/wal.h
+4
-2
source/dnode/vnode/tq/inc/tqMetaStore.h
source/dnode/vnode/tq/inc/tqMetaStore.h
+6
-81
source/dnode/vnode/tq/src/tq.c
source/dnode/vnode/tq/src/tq.c
+78
-17
source/dnode/vnode/tq/src/tqMetaStore.c
source/dnode/vnode/tq/src/tqMetaStore.c
+24
-24
source/dnode/vnode/tq/test/tqSerializerTest.cpp
source/dnode/vnode/tq/test/tqSerializerTest.cpp
+13
-0
source/libs/index/CMakeLists.txt
source/libs/index/CMakeLists.txt
+24
-1
source/libs/index/inc/indexInt.h
source/libs/index/inc/indexInt.h
+41
-1
source/libs/index/inc/index_fst.h
source/libs/index/inc/index_fst.h
+164
-0
source/libs/index/inc/index_fst_automation.h
source/libs/index/inc/index_fst_automation.h
+42
-0
source/libs/index/inc/index_fst_node.h
source/libs/index/inc/index_fst_node.h
+40
-0
source/libs/index/inc/index_fst_registry.h
source/libs/index/inc/index_fst_registry.h
+57
-0
source/libs/index/inc/index_fst_util.h
source/libs/index/inc/index_fst_util.h
+82
-0
source/libs/index/src/index.c
source/libs/index/src/index.c
+167
-6
source/libs/index/src/index_fst.c
source/libs/index/src/index_fst.c
+296
-0
source/libs/index/src/index_fst_automation.c
source/libs/index/src/index_fst_automation.c
+1
-14
source/libs/index/src/index_fst_common.c
source/libs/index/src/index_fst_common.c
+306
-0
source/libs/index/src/index_fst_node.c
source/libs/index/src/index_fst_node.c
+55
-0
source/libs/index/src/index_fst_registry.c
source/libs/index/src/index_fst_registry.c
+158
-0
source/libs/index/src/index_fst_util.c
source/libs/index/src/index_fst_util.c
+115
-0
source/libs/index/test/CMakeLists.txt
source/libs/index/test/CMakeLists.txt
+23
-0
source/libs/index/test/indexTests.cpp
source/libs/index/test/indexTests.cpp
+59
-0
source/libs/sync/inc/raft.h
source/libs/sync/inc/raft.h
+3
-2
source/libs/sync/inc/raft_log.h
source/libs/sync/inc/raft_log.h
+0
-2
source/libs/sync/inc/raft_replication.h
source/libs/sync/inc/raft_replication.h
+2
-2
source/libs/sync/inc/sync_const.h
source/libs/sync/inc/sync_const.h
+8
-8
source/libs/sync/inc/sync_raft_config_change.h
source/libs/sync/inc/sync_raft_config_change.h
+5
-0
source/libs/sync/inc/sync_raft_impl.h
source/libs/sync/inc/sync_raft_impl.h
+4
-0
source/libs/sync/inc/sync_raft_inflights.h
source/libs/sync/inc/sync_raft_inflights.h
+17
-24
source/libs/sync/inc/sync_raft_node_map.h
source/libs/sync/inc/sync_raft_node_map.h
+49
-0
source/libs/sync/inc/sync_raft_progress.h
source/libs/sync/inc/sync_raft_progress.h
+92
-96
source/libs/sync/inc/sync_raft_progress_tracker.h
source/libs/sync/inc/sync_raft_progress_tracker.h
+30
-15
source/libs/sync/inc/sync_raft_proto.h
source/libs/sync/inc/sync_raft_proto.h
+16
-0
source/libs/sync/inc/sync_raft_quorum_joint.h
source/libs/sync/inc/sync_raft_quorum_joint.h
+31
-12
source/libs/sync/inc/sync_raft_quorum_majority.h
source/libs/sync/inc/sync_raft_quorum_majority.h
+6
-1
source/libs/sync/inc/sync_raft_restore.h
source/libs/sync/inc/sync_raft_restore.h
+2
-1
source/libs/sync/inc/sync_type.h
source/libs/sync/inc/sync_type.h
+6
-8
source/libs/sync/src/raft.c
source/libs/sync/src/raft.c
+33
-14
source/libs/sync/src/raft_handle_append_entries_message.c
source/libs/sync/src/raft_handle_append_entries_message.c
+4
-5
source/libs/sync/src/raft_handle_election_message.c
source/libs/sync/src/raft_handle_election_message.c
+0
-18
source/libs/sync/src/raft_handle_vote_message.c
source/libs/sync/src/raft_handle_vote_message.c
+14
-11
source/libs/sync/src/raft_handle_vote_resp_message.c
source/libs/sync/src/raft_handle_vote_resp_message.c
+7
-5
source/libs/sync/src/raft_replication.c
source/libs/sync/src/raft_replication.c
+10
-7
source/libs/sync/src/sync.c
source/libs/sync/src/sync.c
+3
-3
source/libs/sync/src/sync_raft_config_change.c
source/libs/sync/src/sync_raft_config_change.c
+210
-189
source/libs/sync/src/sync_raft_election.c
source/libs/sync/src/sync_raft_election.c
+48
-15
source/libs/sync/src/sync_raft_impl.c
source/libs/sync/src/sync_raft_impl.c
+59
-24
source/libs/sync/src/sync_raft_inflights.c
source/libs/sync/src/sync_raft_inflights.c
+9
-16
source/libs/sync/src/sync_raft_node_map.c
source/libs/sync/src/sync_raft_node_map.c
+82
-0
source/libs/sync/src/sync_raft_progress.c
source/libs/sync/src/sync_raft_progress.c
+109
-133
source/libs/sync/src/sync_raft_progress_tracker.c
source/libs/sync/src/sync_raft_progress_tracker.c
+95
-27
source/libs/sync/src/sync_raft_quorum_joint.c
source/libs/sync/src/sync_raft_quorum_joint.c
+24
-34
source/libs/sync/src/sync_raft_quorum_majority.c
source/libs/sync/src/sync_raft_quorum_majority.c
+82
-15
source/libs/sync/src/sync_raft_restore.c
source/libs/sync/src/sync_raft_restore.c
+49
-50
source/libs/tkv/src/tkv.c
source/libs/tkv/src/tkv.c
+3
-1
source/libs/wal/src/wal.c
source/libs/wal/src/wal.c
+10
-2
未找到文件。
CMakeLists.txt
浏览文件 @
9188298e
...
@@ -48,6 +48,7 @@ endif(${BUILD_WITH_ROCKSDB})
...
@@ -48,6 +48,7 @@ endif(${BUILD_WITH_ROCKSDB})
## lucene
## lucene
if
(
${
BUILD_WITH_LUCENE
}
)
if
(
${
BUILD_WITH_LUCENE
}
)
cat
(
"
${
CMAKE_SUPPORT_DIR
}
/lucene_CMakeLists.txt.in"
${
DEPS_TMP_FILE
}
)
cat
(
"
${
CMAKE_SUPPORT_DIR
}
/lucene_CMakeLists.txt.in"
${
DEPS_TMP_FILE
}
)
add_definitions
(
-DUSE_LUCENE
)
endif
(
${
BUILD_WITH_LUCENE
}
)
endif
(
${
BUILD_WITH_LUCENE
}
)
## NuRaft
## NuRaft
...
...
cmake/cmake.options
浏览文件 @
9188298e
...
@@ -22,7 +22,7 @@ option(
...
@@ -22,7 +22,7 @@ option(
option(
option(
BUILD_WITH_LUCENE
BUILD_WITH_LUCENE
"If build with lucene"
"If build with lucene"
OFF
off
)
)
option(
option(
...
@@ -41,4 +41,4 @@ option(
...
@@ -41,4 +41,4 @@ option(
BUILD_DOCS
BUILD_DOCS
"If use doxygen build documents"
"If use doxygen build documents"
ON
ON
)
)
\ No newline at end of file
cmake/lucene_CMakeLists.txt.in
浏览文件 @
9188298e
# lucene
# lucene
ExternalProject_Add(lucene
ExternalProject_Add(lucene
GIT_REPOSITORY https://github.com/taosdata-contrib/LucenePlusPlus.git
GIT_REPOSITORY https://github.com/yihaoDeng/LucenePlusPlus.git
GIT_TAG rel_3.0.8_td
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/lucene"
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/lucene"
BINARY_DIR ""
BINARY_DIR ""
#BUILD_IN_SOURCE TRUE
#BUILD_IN_SOURCE TRUE
...
@@ -10,4 +9,4 @@ ExternalProject_Add(lucene
...
@@ -10,4 +9,4 @@ ExternalProject_Add(lucene
BUILD_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
TEST_COMMAND ""
)
)
\ No newline at end of file
deps/CMakeLists.txt
浏览文件 @
9188298e
...
@@ -68,6 +68,11 @@ endif(${BUILD_WITH_ROCKSDB})
...
@@ -68,6 +68,11 @@ endif(${BUILD_WITH_ROCKSDB})
if
(
${
BUILD_WITH_LUCENE
}
)
if
(
${
BUILD_WITH_LUCENE
}
)
option
(
ENABLE_TEST
"Enable the tests"
OFF
)
option
(
ENABLE_TEST
"Enable the tests"
OFF
)
add_subdirectory
(
lucene
)
add_subdirectory
(
lucene
)
target_include_directories
(
lucene++
PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/lucene/include>
)
endif
(
${
BUILD_WITH_LUCENE
}
)
endif
(
${
BUILD_WITH_LUCENE
}
)
# NuRaft
# NuRaft
...
...
include/dnode/vnode/tq/tq.h
浏览文件 @
9188298e
...
@@ -109,11 +109,10 @@ typedef struct TqTopicVhandle {
...
@@ -109,11 +109,10 @@ typedef struct TqTopicVhandle {
#define TQ_BUFFER_SIZE 8
#define TQ_BUFFER_SIZE 8
// TODO: define a serializer and deserializer
typedef
struct
TqBufferItem
{
typedef
struct
TqBufferItem
{
int64_t
offset
;
int64_t
offset
;
// executors are identical but not concurrent
// executors are identical but not concurrent
// so
it
must be a copy in each item
// so
there
must be a copy in each item
void
*
executor
;
void
*
executor
;
int64_t
size
;
int64_t
size
;
void
*
content
;
void
*
content
;
...
@@ -156,23 +155,111 @@ typedef struct TqQueryMsg {
...
@@ -156,23 +155,111 @@ typedef struct TqQueryMsg {
typedef
struct
TqLogReader
{
typedef
struct
TqLogReader
{
void
*
logHandle
;
void
*
logHandle
;
int32_t
(
*
wal
Read
)(
void
*
logHandle
,
void
**
data
,
int64_t
ver
);
int32_t
(
*
log
Read
)(
void
*
logHandle
,
void
**
data
,
int64_t
ver
);
int64_t
(
*
wal
GetFirstVer
)(
void
*
logHandle
);
int64_t
(
*
log
GetFirstVer
)(
void
*
logHandle
);
int64_t
(
*
wal
GetSnapshotVer
)(
void
*
logHandle
);
int64_t
(
*
log
GetSnapshotVer
)(
void
*
logHandle
);
int64_t
(
*
wal
GetLastVer
)(
void
*
logHandle
);
int64_t
(
*
log
GetLastVer
)(
void
*
logHandle
);
}
TqLogReader
;
}
TqLogReader
;
typedef
struct
TqConfig
{
typedef
struct
TqConfig
{
// TODO
// TODO
}
TqConfig
;
}
TqConfig
;
typedef
struct
TqMemRef
{
SMemAllocatorFactory
*
pAlloctorFactory
;
SMemAllocator
*
pAllocator
;
}
TqMemRef
;
typedef
struct
TqSerializedHead
{
int16_t
ver
;
int16_t
action
;
int32_t
checksum
;
int64_t
ssize
;
char
content
[];
}
TqSerializedHead
;
typedef
int
(
*
TqSerializeFun
)(
const
void
*
pObj
,
TqSerializedHead
**
ppHead
);
typedef
const
void
*
(
*
TqDeserializeFun
)(
const
TqSerializedHead
*
pHead
,
void
**
ppObj
);
typedef
void
(
*
TqDeleteFun
)(
void
*
);
#define TQ_BUCKET_MASK 0xFF
#define TQ_BUCKET_SIZE 256
#define TQ_PAGE_SIZE 4096
//key + offset + size
#define TQ_IDX_SIZE 24
//4096 / 24
#define TQ_MAX_IDX_ONE_PAGE 170
//24 * 170
#define TQ_IDX_PAGE_BODY_SIZE 4080
//4096 - 4080
#define TQ_IDX_PAGE_HEAD_SIZE 16
#define TQ_ACTION_CONST 0
#define TQ_ACTION_INUSE 1
#define TQ_ACTION_INUSE_CONT 2
#define TQ_ACTION_INTXN 3
#define TQ_SVER 0
//TODO: inplace mode is not implemented
#define TQ_UPDATE_INPLACE 0
#define TQ_UPDATE_APPEND 1
#define TQ_DUP_INTXN_REWRITE 0
#define TQ_DUP_INTXN_REJECT 2
static
inline
bool
TqUpdateAppend
(
int32_t
tqConfigFlag
)
{
return
tqConfigFlag
&
TQ_UPDATE_APPEND
;
}
static
inline
bool
TqDupIntxnReject
(
int32_t
tqConfigFlag
)
{
return
tqConfigFlag
&
TQ_DUP_INTXN_REJECT
;
}
static
const
int8_t
TQ_CONST_DELETE
=
TQ_ACTION_CONST
;
#define TQ_DELETE_TOKEN (void*)&TQ_CONST_DELETE
typedef
struct
TqMetaHandle
{
int64_t
key
;
int64_t
offset
;
int64_t
serializedSize
;
void
*
valueInUse
;
void
*
valueInTxn
;
}
TqMetaHandle
;
typedef
struct
TqMetaList
{
TqMetaHandle
handle
;
struct
TqMetaList
*
next
;
//struct TqMetaList* inTxnPrev;
//struct TqMetaList* inTxnNext;
struct
TqMetaList
*
unpersistPrev
;
struct
TqMetaList
*
unpersistNext
;
}
TqMetaList
;
typedef
struct
TqMetaStore
{
TqMetaList
*
bucket
[
TQ_BUCKET_SIZE
];
//a table head
TqMetaList
*
unpersistHead
;
//TODO:temporaral use, to be replaced by unified tfile
int
fileFd
;
//TODO:temporaral use, to be replaced by unified tfile
int
idxFd
;
char
*
dirPath
;
int32_t
tqConfigFlag
;
TqSerializeFun
pSerializer
;
TqDeserializeFun
pDeserializer
;
TqDeleteFun
pDeleter
;
}
TqMetaStore
;
typedef
struct
STQ
{
typedef
struct
STQ
{
// the collection of group handle
// the collection of group handle
// the handle of kvstore
// the handle of kvstore
c
onst
c
har
*
path
;
char
*
path
;
TqConfig
*
tqConfig
;
TqConfig
*
tqConfig
;
TqLogReader
*
tqLogReader
;
TqLogReader
*
tqLogReader
;
SMemAllocatorFactory
*
allocFac
;
TqMemRef
tqMemRef
;
TqMetaStore
*
tqMeta
;
}
STQ
;
}
STQ
;
// open in each vnode
// open in each vnode
...
@@ -187,7 +274,7 @@ int tqConsume(STQ*, TmqConsumeReq*);
...
@@ -187,7 +274,7 @@ int tqConsume(STQ*, TmqConsumeReq*);
TqGroupHandle
*
tqGetGroupHandle
(
STQ
*
,
int64_t
cId
);
TqGroupHandle
*
tqGetGroupHandle
(
STQ
*
,
int64_t
cId
);
int
tqOpenTCGroup
(
STQ
*
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
);
TqGroupHandle
*
tqOpenTCGroup
(
STQ
*
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
);
int
tqCloseTCGroup
(
STQ
*
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
);
int
tqCloseTCGroup
(
STQ
*
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
);
int
tqMoveOffsetToNext
(
TqGroupHandle
*
);
int
tqMoveOffsetToNext
(
TqGroupHandle
*
);
int
tqResetOffset
(
STQ
*
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
offset
);
int
tqResetOffset
(
STQ
*
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
offset
);
...
@@ -195,18 +282,9 @@ int tqRegisterContext(TqGroupHandle*, void* ahandle);
...
@@ -195,18 +282,9 @@ int tqRegisterContext(TqGroupHandle*, void* ahandle);
int
tqLaunchQuery
(
TqGroupHandle
*
);
int
tqLaunchQuery
(
TqGroupHandle
*
);
int
tqSendLaunchQuery
(
TqGroupHandle
*
);
int
tqSendLaunchQuery
(
TqGroupHandle
*
);
int
tqSerializeGroupHandle
(
TqGroupHandle
*
gHandle
,
void
**
ppBytes
);
int
tqSerializeGroupHandle
(
const
TqGroupHandle
*
gHandle
,
TqSerializedHead
**
ppHead
);
void
*
tqSerializeListHandle
(
TqListHandle
*
listHandle
,
void
*
ptr
);
void
*
tqSerializeBufHandle
(
TqBufferHandle
*
bufHandle
,
void
*
ptr
);
void
*
tqSerializeBufItem
(
TqBufferItem
*
bufItem
,
void
*
ptr
);
const
void
*
tqDeserializeGroupHandle
(
const
void
*
pBytes
,
TqGroupHandle
*
ghandle
);
const
void
*
tqDeserializeBufHandle
(
const
void
*
pBytes
,
TqBufferHandle
*
bufHandle
);
const
void
*
tqDeserializeBufItem
(
const
void
*
pBytes
,
TqBufferItem
*
bufItem
);
int
tqGetGHandleSSize
(
const
TqGroupHandle
*
gHandle
);
const
void
*
tqDeserializeGroupHandle
(
const
TqSerializedHead
*
pHead
,
TqGroupHandle
**
gHandle
);
int
tqBufHandleSSize
();
int
tqBufItemSSize
();
#ifdef __cplusplus
#ifdef __cplusplus
}
}
...
...
include/libs/index/index.h
浏览文件 @
9188298e
...
@@ -16,12 +16,53 @@
...
@@ -16,12 +16,53 @@
#ifndef _TD_INDEX_H_
#ifndef _TD_INDEX_H_
#define _TD_INDEX_H_
#define _TD_INDEX_H_
#include "os.h"
#include "tarray.h"
#ifdef __cplusplus
#ifdef __cplusplus
extern
"C"
{
extern
"C"
{
#endif
#endif
typedef
struct
SIndex
SIndex
;
typedef
struct
SIndexOpts
SIndexOpts
;
typedef
struct
SIndexMultiTermQuery
SIndexMultiTermQuery
;
typedef
struct
SArray
SIndexMultiTerm
;
typedef
enum
{
MUST
=
0
,
SHOULD
=
1
,
NOT
=
2
}
EIndexOperatorType
;
typedef
enum
{
QUERY_TERM
=
0
,
QUERY_PREFIX
=
1
,
QUERY_SUFFIX
=
2
,
QUERY_REGEX
=
3
}
EIndexQueryType
;
/*
* @param: oper
*
*/
SIndexMultiTermQuery
*
indexMultiTermQueryCreate
(
EIndexOperatorType
oper
);
void
indexMultiTermQueryDestroy
(
SIndexMultiTermQuery
*
pQuery
);
int
indexMultiTermQueryAdd
(
SIndexMultiTermQuery
*
pQuery
,
const
char
*
field
,
int32_t
nFields
,
const
char
*
value
,
int32_t
nValue
,
EIndexQueryType
type
);
/*
* @param:
* @param:
*/
SIndex
*
indexOpen
(
SIndexOpts
*
opt
,
const
char
*
path
);
void
indexClose
(
SIndex
*
index
);
int
indexPut
(
SIndex
*
index
,
SIndexMultiTerm
*
terms
,
int
uid
);
int
indexDelete
(
SIndex
*
index
,
SIndexMultiTermQuery
*
query
);
int
indexSearch
(
SIndex
*
index
,
SIndexMultiTermQuery
*
query
,
SArray
*
result
);
int
indexRebuild
(
SIndex
*
index
,
SIndexOpts
*
opt
);
/*
* @param
* @param
*/
SIndexMultiTerm
*
indexMultiTermCreate
();
int
indexMultiTermAdd
(
SIndexMultiTerm
*
terms
,
const
char
*
field
,
int32_t
nFields
,
const
char
*
value
,
int32_t
nValue
);
void
indexMultiTermDestroy
(
SIndexMultiTerm
*
terms
);
/*
* @param:
* @param:
*/
SIndexOpts
*
indexOptsCreate
();
void
indexOptsDestroy
(
SIndexOpts
*
opts
);
#ifdef __cplusplus
#ifdef __cplusplus
}
}
#endif
#endif
#endif
/*_TD_INDEX_H_*/
#endif
/*_TD_INDEX_H_*/
\ No newline at end of file
include/libs/wal/wal.h
浏览文件 @
9188298e
...
@@ -44,8 +44,10 @@ typedef struct {
...
@@ -44,8 +44,10 @@ typedef struct {
EWalType
walLevel
;
// wal level
EWalType
walLevel
;
// wal level
}
SWalCfg
;
}
SWalCfg
;
struct
SWal
;
typedef
struct
SWal
{
typedef
struct
SWal
SWal
;
// WAL HANDLE
int8_t
unused
;
}
SWal
;
// WAL HANDLE
typedef
int32_t
(
*
FWalWrite
)(
void
*
ahandle
,
void
*
pHead
,
int32_t
qtype
,
void
*
pMsg
);
typedef
int32_t
(
*
FWalWrite
)(
void
*
ahandle
,
void
*
pHead
,
int32_t
qtype
,
void
*
pMsg
);
// module initialization
// module initialization
...
...
source/dnode/vnode/tq/inc/tqMetaStore.h
浏览文件 @
9188298e
...
@@ -17,97 +17,22 @@
...
@@ -17,97 +17,22 @@
#define _TQ_META_STORE_H_
#define _TQ_META_STORE_H_
#include "os.h"
#include "os.h"
#include "tq.h"
#ifdef __cplusplus
#ifdef __cplusplus
extern
"C"
{
extern
"C"
{
#endif
#endif
#define TQ_BUCKET_MASK 0xFF
#define TQ_BUCKET_SIZE 256
#define TQ_PAGE_SIZE 4096
//key + offset + size
#define TQ_IDX_SIZE 24
//4096 / 24
#define TQ_MAX_IDX_ONE_PAGE 170
//24 * 170
#define TQ_IDX_PAGE_BODY_SIZE 4080
//4096 - 4080
#define TQ_IDX_PAGE_HEAD_SIZE 16
#define TQ_ACTION_CONST 0
#define TQ_ACTION_INUSE 1
#define TQ_ACTION_INUSE_CONT 2
#define TQ_ACTION_INTXN 3
#define TQ_SVER 0
//TODO: inplace mode is not implemented
#define TQ_UPDATE_INPLACE 0
#define TQ_UPDATE_APPEND 1
#define TQ_DUP_INTXN_REWRITE 0
#define TQ_DUP_INTXN_REJECT 2
static
inline
bool
TqUpdateAppend
(
int32_t
tqConfigFlag
)
{
return
tqConfigFlag
&
TQ_UPDATE_APPEND
;
}
static
inline
bool
TqDupIntxnReject
(
int32_t
tqConfigFlag
)
{
return
tqConfigFlag
&
TQ_DUP_INTXN_REJECT
;
}
static
const
int8_t
TQ_CONST_DELETE
=
TQ_ACTION_CONST
;
#define TQ_DELETE_TOKEN (void*)&TQ_CONST_DELETE
typedef
struct
TqSerializedHead
{
int16_t
ver
;
int16_t
action
;
int32_t
checksum
;
int64_t
ssize
;
char
content
[];
}
TqSerializedHead
;
typedef
struct
TqMetaHandle
{
int64_t
key
;
int64_t
offset
;
int64_t
serializedSize
;
void
*
valueInUse
;
void
*
valueInTxn
;
}
TqMetaHandle
;
typedef
struct
TqMetaList
{
TqMetaHandle
handle
;
struct
TqMetaList
*
next
;
//struct TqMetaList* inTxnPrev;
//struct TqMetaList* inTxnNext;
struct
TqMetaList
*
unpersistPrev
;
struct
TqMetaList
*
unpersistNext
;
}
TqMetaList
;
typedef
struct
TqMetaStore
{
TqMetaList
*
bucket
[
TQ_BUCKET_SIZE
];
//a table head
TqMetaList
*
unpersistHead
;
int
fileFd
;
//TODO:temporaral use, to be replaced by unified tfile
int
idxFd
;
//TODO:temporaral use, to be replaced by unified tfile
char
*
dirPath
;
int32_t
tqConfigFlag
;
int
(
*
serializer
)(
const
void
*
pObj
,
TqSerializedHead
**
ppHead
);
const
void
*
(
*
deserializer
)(
const
TqSerializedHead
*
pHead
,
void
**
ppObj
);
void
(
*
deleter
)(
void
*
);
}
TqMetaStore
;
TqMetaStore
*
tqStoreOpen
(
const
char
*
path
,
TqMetaStore
*
tqStoreOpen
(
const
char
*
path
,
int
serializer
(
const
void
*
pObj
,
TqSerializedHead
**
ppHead
)
,
TqSerializeFun
pSerializer
,
const
void
*
deserializer
(
const
TqSerializedHead
*
pHead
,
void
**
ppObj
)
,
TqDeserializeFun
pDeserializer
,
void
deleter
(
void
*
pObj
)
,
TqDeleteFun
pDeleter
,
int32_t
tqConfigFlag
int32_t
tqConfigFlag
);
);
int32_t
tqStoreClose
(
TqMetaStore
*
);
int32_t
tqStoreClose
(
TqMetaStore
*
);
//int32_t tqStoreDelete(TqMetaStore*);
//int32_t tqStoreDelete(TqMetaStore*);
//int32_t
T
qStoreCommitAll(TqMetaStore*);
//int32_t
t
qStoreCommitAll(TqMetaStore*);
int32_t
tqStorePersist
(
TqMetaStore
*
);
int32_t
tqStorePersist
(
TqMetaStore
*
);
//clean deleted idx and data from persistent file
//clean deleted idx and data from persistent file
int32_t
tqStoreCompact
(
TqMetaStore
*
);
int32_t
tqStoreCompact
(
TqMetaStore
*
);
...
...
source/dnode/vnode/tq/src/tq.c
浏览文件 @
9188298e
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
*/
*/
#include "tqInt.h"
#include "tqInt.h"
#include "tqMetaStore.h"
//static
//static
//read next version data
//read next version data
...
@@ -24,6 +25,46 @@
...
@@ -24,6 +25,46 @@
//
//
int
tqGetgHandleSSize
(
const
TqGroupHandle
*
gHandle
);
int
tqGetgHandleSSize
(
const
TqGroupHandle
*
gHandle
);
int
tqBufHandleSSize
();
int
tqBufItemSSize
();
TqGroupHandle
*
tqFindHandle
(
STQ
*
pTq
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
)
{
TqGroupHandle
*
gHandle
;
return
NULL
;
}
void
*
tqSerializeListHandle
(
TqListHandle
*
listHandle
,
void
*
ptr
);
void
*
tqSerializeBufHandle
(
TqBufferHandle
*
bufHandle
,
void
*
ptr
);
void
*
tqSerializeBufItem
(
TqBufferItem
*
bufItem
,
void
*
ptr
);
const
void
*
tqDeserializeBufHandle
(
const
void
*
pBytes
,
TqBufferHandle
*
bufHandle
);
const
void
*
tqDeserializeBufItem
(
const
void
*
pBytes
,
TqBufferItem
*
bufItem
);
STQ
*
tqOpen
(
const
char
*
path
,
TqConfig
*
tqConfig
,
TqLogReader
*
tqLogReader
,
SMemAllocatorFactory
*
allocFac
)
{
STQ
*
pTq
=
malloc
(
sizeof
(
STQ
));
if
(
pTq
==
NULL
)
{
//TODO: memory error
return
NULL
;
}
strcpy
(
pTq
->
path
,
path
);
pTq
->
tqConfig
=
tqConfig
;
pTq
->
tqLogReader
=
tqLogReader
;
pTq
->
tqMemRef
.
pAlloctorFactory
=
allocFac
;
pTq
->
tqMemRef
.
pAllocator
=
allocFac
->
create
();
if
(
pTq
->
tqMemRef
.
pAllocator
==
NULL
)
{
//TODO
}
pTq
->
tqMeta
=
tqStoreOpen
(
path
,
(
TqSerializeFun
)
tqSerializeGroupHandle
,
(
TqDeserializeFun
)
tqDeserializeGroupHandle
,
free
,
0
);
if
(
pTq
->
tqMeta
==
NULL
)
{
//TODO: free STQ
return
NULL
;
}
return
pTq
;
}
static
int
tqProtoCheck
(
TmqMsgHead
*
pMsg
)
{
static
int
tqProtoCheck
(
TmqMsgHead
*
pMsg
)
{
return
pMsg
->
protoVer
==
0
;
return
pMsg
->
protoVer
==
0
;
...
@@ -83,14 +124,29 @@ static int tqCommitTCGroup(TqGroupHandle* handle) {
...
@@ -83,14 +124,29 @@ static int tqCommitTCGroup(TqGroupHandle* handle) {
int
tqCreateTCGroup
(
STQ
*
pTq
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
,
TqGroupHandle
**
handle
)
{
int
tqCreateTCGroup
(
STQ
*
pTq
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
,
TqGroupHandle
**
handle
)
{
//create in disk
//create in disk
TqGroupHandle
*
gHandle
=
(
TqGroupHandle
*
)
malloc
(
sizeof
(
TqGroupHandle
));
if
(
gHandle
==
NULL
)
{
//TODO
return
-
1
;
}
memset
(
gHandle
,
0
,
sizeof
(
TqGroupHandle
));
return
0
;
return
0
;
}
}
int
tqOpenTCGroup
(
STQ
*
pTq
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
)
{
TqGroupHandle
*
tqOpenTCGroup
(
STQ
*
pTq
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
)
{
//look up in disk
TqGroupHandle
*
gHandle
=
tqHandleGet
(
pTq
->
tqMeta
,
cId
);
if
(
gHandle
==
NULL
)
{
int
code
=
tqCreateTCGroup
(
pTq
,
topicId
,
cgId
,
cId
,
&
gHandle
);
if
(
code
!=
0
)
{
//TODO
return
NULL
;
}
}
//create
//create
//open
//open
return
0
;
return
gHandle
;
}
}
int
tqCloseTCGroup
(
STQ
*
pTq
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
)
{
int
tqCloseTCGroup
(
STQ
*
pTq
,
int64_t
topicId
,
int64_t
cgId
,
int64_t
cId
)
{
...
@@ -207,16 +263,20 @@ int tqConsume(STQ* pTq, TmqConsumeReq* pMsg) {
...
@@ -207,16 +263,20 @@ int tqConsume(STQ* pTq, TmqConsumeReq* pMsg) {
return
0
;
return
0
;
}
}
int
tqSerializeGroupHandle
(
TqGroupHandle
*
gHandle
,
void
**
ppBytes
)
{
int
tqSerializeGroupHandle
(
const
TqGroupHandle
*
gHandle
,
TqSerializedHead
**
ppHead
)
{
//calculate size
//calculate size
int
sz
=
tqGetgHandleSSize
(
gHandle
);
int
sz
=
tqGetgHandleSSize
(
gHandle
)
+
sizeof
(
TqSerializedHead
);
void
*
ptr
=
realloc
(
*
ppBytes
,
sz
);
if
(
sz
>
(
*
ppHead
)
->
ssize
)
{
if
(
ptr
==
NULL
)
{
void
*
tmpPtr
=
realloc
(
*
ppHead
,
sz
);
free
(
ppBytes
);
if
(
tmpPtr
==
NULL
)
{
//TODO: memory err
free
(
*
ppHead
);
return
-
1
;
//TODO: memory err
return
-
1
;
}
*
ppHead
=
tmpPtr
;
(
*
ppHead
)
->
ssize
=
sz
;
}
}
*
ppBytes
=
ptr
;
void
*
ptr
=
(
*
ppHead
)
->
content
;
//do serialization
//do serialization
*
(
int64_t
*
)
ptr
=
gHandle
->
cId
;
*
(
int64_t
*
)
ptr
=
gHandle
->
cId
;
ptr
=
POINTER_SHIFT
(
ptr
,
sizeof
(
int64_t
));
ptr
=
POINTER_SHIFT
(
ptr
,
sizeof
(
int64_t
));
...
@@ -261,8 +321,9 @@ void* tqSerializeBufItem(TqBufferItem *bufItem, void* ptr) {
...
@@ -261,8 +321,9 @@ void* tqSerializeBufItem(TqBufferItem *bufItem, void* ptr) {
return
ptr
;
return
ptr
;
}
}
const
void
*
tqDeserializeGroupHandle
(
const
void
*
pBytes
,
TqGroupHandle
*
gHandle
)
{
const
void
*
tqDeserializeGroupHandle
(
const
TqSerializedHead
*
pHead
,
TqGroupHandle
**
ppGHandle
)
{
const
void
*
ptr
=
pBytes
;
TqGroupHandle
*
gHandle
=
*
ppGHandle
;
const
void
*
ptr
=
pHead
->
content
;
gHandle
->
cId
=
*
(
int64_t
*
)
ptr
;
gHandle
->
cId
=
*
(
int64_t
*
)
ptr
;
ptr
=
POINTER_SHIFT
(
ptr
,
sizeof
(
int64_t
));
ptr
=
POINTER_SHIFT
(
ptr
,
sizeof
(
int64_t
));
gHandle
->
cgId
=
*
(
int64_t
*
)
ptr
;
gHandle
->
cgId
=
*
(
int64_t
*
)
ptr
;
...
@@ -317,15 +378,15 @@ const void* tqDeserializeBufItem(const void* pBytes, TqBufferItem *bufItem) {
...
@@ -317,15 +378,15 @@ const void* tqDeserializeBufItem(const void* pBytes, TqBufferItem *bufItem) {
//TODO: make this a macro
//TODO: make this a macro
int
tqGetgHandleSSize
(
const
TqGroupHandle
*
gHandle
)
{
int
tqGetgHandleSSize
(
const
TqGroupHandle
*
gHandle
)
{
return
sizeof
(
int64_t
)
*
2
return
sizeof
(
int64_t
)
*
2
//cId + cgId
+
sizeof
(
int32_t
)
+
sizeof
(
int32_t
)
//topicNum
+
gHandle
->
topicNum
*
tqBufHandleSSize
();
+
gHandle
->
topicNum
*
tqBufHandleSSize
();
}
}
//TODO: make this a macro
//TODO: make this a macro
int
tqBufHandleSSize
()
{
int
tqBufHandleSSize
()
{
return
sizeof
(
int64_t
)
*
2
return
sizeof
(
int64_t
)
*
2
// nextConsumeOffset + topicId
+
sizeof
(
int32_t
)
*
2
+
sizeof
(
int32_t
)
*
2
// head + tail
+
TQ_BUFFER_SIZE
*
tqBufItemSSize
();
+
TQ_BUFFER_SIZE
*
tqBufItemSSize
();
}
}
...
...
source/dnode/vnode/tq/src/tqMetaStore.c
浏览文件 @
9188298e
...
@@ -69,10 +69,10 @@ static inline int tqReadLastPage(int fd, TqIdxPageBuf* pBuf) {
...
@@ -69,10 +69,10 @@ static inline int tqReadLastPage(int fd, TqIdxPageBuf* pBuf) {
}
}
TqMetaStore
*
tqStoreOpen
(
const
char
*
path
,
TqMetaStore
*
tqStoreOpen
(
const
char
*
path
,
int
serializer
(
const
void
*
pObj
,
TqSerializedHead
**
ppHead
)
,
TqSerializeFun
serializer
,
const
void
*
deserializer
(
const
TqSerializedHead
*
pHead
,
void
**
ppObj
)
,
TqDeserializeFun
deserializer
,
void
deleter
(
void
*
pObj
)
,
TqDeleteFun
deleter
,
int32_t
tqConfigFlag
int32_t
tqConfigFlag
)
{
)
{
TqMetaStore
*
pMeta
=
malloc
(
sizeof
(
TqMetaStore
));
TqMetaStore
*
pMeta
=
malloc
(
sizeof
(
TqMetaStore
));
if
(
pMeta
==
NULL
)
{
if
(
pMeta
==
NULL
)
{
...
@@ -127,9 +127,9 @@ TqMetaStore* tqStoreOpen(const char* path,
...
@@ -127,9 +127,9 @@ TqMetaStore* tqStoreOpen(const char* path,
pMeta
->
fileFd
=
fileFd
;
pMeta
->
fileFd
=
fileFd
;
pMeta
->
s
erializer
=
serializer
;
pMeta
->
pS
erializer
=
serializer
;
pMeta
->
d
eserializer
=
deserializer
;
pMeta
->
pD
eserializer
=
deserializer
;
pMeta
->
d
eleter
=
deleter
;
pMeta
->
pD
eleter
=
deleter
;
pMeta
->
tqConfigFlag
=
tqConfigFlag
;
pMeta
->
tqConfigFlag
=
tqConfigFlag
;
//read idx file and load into memory
//read idx file and load into memory
...
@@ -171,25 +171,25 @@ TqMetaStore* tqStoreOpen(const char* path,
...
@@ -171,25 +171,25 @@ TqMetaStore* tqStoreOpen(const char* path,
}
}
if
(
serializedObj
->
action
==
TQ_ACTION_INUSE
)
{
if
(
serializedObj
->
action
==
TQ_ACTION_INUSE
)
{
if
(
serializedObj
->
ssize
!=
sizeof
(
TqSerializedHead
))
{
if
(
serializedObj
->
ssize
!=
sizeof
(
TqSerializedHead
))
{
pMeta
->
d
eserializer
(
serializedObj
,
&
pNode
->
handle
.
valueInUse
);
pMeta
->
pD
eserializer
(
serializedObj
,
&
pNode
->
handle
.
valueInUse
);
}
else
{
}
else
{
pNode
->
handle
.
valueInUse
=
TQ_DELETE_TOKEN
;
pNode
->
handle
.
valueInUse
=
TQ_DELETE_TOKEN
;
}
}
}
else
if
(
serializedObj
->
action
==
TQ_ACTION_INTXN
)
{
}
else
if
(
serializedObj
->
action
==
TQ_ACTION_INTXN
)
{
if
(
serializedObj
->
ssize
!=
sizeof
(
TqSerializedHead
))
{
if
(
serializedObj
->
ssize
!=
sizeof
(
TqSerializedHead
))
{
pMeta
->
d
eserializer
(
serializedObj
,
&
pNode
->
handle
.
valueInTxn
);
pMeta
->
pD
eserializer
(
serializedObj
,
&
pNode
->
handle
.
valueInTxn
);
}
else
{
}
else
{
pNode
->
handle
.
valueInTxn
=
TQ_DELETE_TOKEN
;
pNode
->
handle
.
valueInTxn
=
TQ_DELETE_TOKEN
;
}
}
}
else
if
(
serializedObj
->
action
==
TQ_ACTION_INUSE_CONT
)
{
}
else
if
(
serializedObj
->
action
==
TQ_ACTION_INUSE_CONT
)
{
if
(
serializedObj
->
ssize
!=
sizeof
(
TqSerializedHead
))
{
if
(
serializedObj
->
ssize
!=
sizeof
(
TqSerializedHead
))
{
pMeta
->
d
eserializer
(
serializedObj
,
&
pNode
->
handle
.
valueInUse
);
pMeta
->
pD
eserializer
(
serializedObj
,
&
pNode
->
handle
.
valueInUse
);
}
else
{
}
else
{
pNode
->
handle
.
valueInUse
=
TQ_DELETE_TOKEN
;
pNode
->
handle
.
valueInUse
=
TQ_DELETE_TOKEN
;
}
}
TqSerializedHead
*
ptr
=
POINTER_SHIFT
(
serializedObj
,
serializedObj
->
ssize
);
TqSerializedHead
*
ptr
=
POINTER_SHIFT
(
serializedObj
,
serializedObj
->
ssize
);
if
(
ptr
->
ssize
!=
sizeof
(
TqSerializedHead
))
{
if
(
ptr
->
ssize
!=
sizeof
(
TqSerializedHead
))
{
pMeta
->
d
eserializer
(
ptr
,
&
pNode
->
handle
.
valueInTxn
);
pMeta
->
pD
eserializer
(
ptr
,
&
pNode
->
handle
.
valueInTxn
);
}
else
{
}
else
{
pNode
->
handle
.
valueInTxn
=
TQ_DELETE_TOKEN
;
pNode
->
handle
.
valueInTxn
=
TQ_DELETE_TOKEN
;
}
}
...
@@ -225,11 +225,11 @@ TqMetaStore* tqStoreOpen(const char* path,
...
@@ -225,11 +225,11 @@ TqMetaStore* tqStoreOpen(const char* path,
if
(
pBucketNode
)
{
if
(
pBucketNode
)
{
if
(
pBucketNode
->
handle
.
valueInUse
if
(
pBucketNode
->
handle
.
valueInUse
&&
pBucketNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
&&
pBucketNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pBucketNode
->
handle
.
valueInUse
);
pMeta
->
pD
eleter
(
pBucketNode
->
handle
.
valueInUse
);
}
}
if
(
pBucketNode
->
handle
.
valueInTxn
if
(
pBucketNode
->
handle
.
valueInTxn
&&
pBucketNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
&&
pBucketNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pBucketNode
->
handle
.
valueInTxn
);
pMeta
->
pD
eleter
(
pBucketNode
->
handle
.
valueInTxn
);
}
}
free
(
pBucketNode
);
free
(
pBucketNode
);
}
}
...
@@ -253,11 +253,11 @@ int32_t tqStoreClose(TqMetaStore* pMeta) {
...
@@ -253,11 +253,11 @@ int32_t tqStoreClose(TqMetaStore* pMeta) {
ASSERT
(
pNode
->
unpersistPrev
==
NULL
);
ASSERT
(
pNode
->
unpersistPrev
==
NULL
);
if
(
pNode
->
handle
.
valueInTxn
if
(
pNode
->
handle
.
valueInTxn
&&
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
&&
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInTxn
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInTxn
);
}
}
if
(
pNode
->
handle
.
valueInUse
if
(
pNode
->
handle
.
valueInUse
&&
pNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
&&
pNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInUse
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInUse
);
}
}
TqMetaList
*
next
=
pNode
->
next
;
TqMetaList
*
next
=
pNode
->
next
;
free
(
pNode
);
free
(
pNode
);
...
@@ -280,11 +280,11 @@ int32_t tqStoreDelete(TqMetaStore* pMeta) {
...
@@ -280,11 +280,11 @@ int32_t tqStoreDelete(TqMetaStore* pMeta) {
while
(
pNode
)
{
while
(
pNode
)
{
if
(
pNode
->
handle
.
valueInTxn
if
(
pNode
->
handle
.
valueInTxn
&&
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
&&
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInTxn
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInTxn
);
}
}
if
(
pNode
->
handle
.
valueInUse
if
(
pNode
->
handle
.
valueInUse
&&
pNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
&&
pNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInUse
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInUse
);
}
}
TqMetaList
*
next
=
pNode
->
next
;
TqMetaList
*
next
=
pNode
->
next
;
free
(
pNode
);
free
(
pNode
);
...
@@ -338,7 +338,7 @@ int32_t tqStorePersist(TqMetaStore* pMeta) {
...
@@ -338,7 +338,7 @@ int32_t tqStorePersist(TqMetaStore* pMeta) {
if
(
pNode
->
handle
.
valueInUse
==
TQ_DELETE_TOKEN
)
{
if
(
pNode
->
handle
.
valueInUse
==
TQ_DELETE_TOKEN
)
{
pSHead
->
ssize
=
sizeof
(
TqSerializedHead
);
pSHead
->
ssize
=
sizeof
(
TqSerializedHead
);
}
else
{
}
else
{
pMeta
->
s
erializer
(
pNode
->
handle
.
valueInUse
,
&
pSHead
);
pMeta
->
pS
erializer
(
pNode
->
handle
.
valueInUse
,
&
pSHead
);
}
}
nBytes
=
write
(
pMeta
->
fileFd
,
pSHead
,
pSHead
->
ssize
);
nBytes
=
write
(
pMeta
->
fileFd
,
pSHead
,
pSHead
->
ssize
);
ASSERT
(
nBytes
==
pSHead
->
ssize
);
ASSERT
(
nBytes
==
pSHead
->
ssize
);
...
@@ -349,7 +349,7 @@ int32_t tqStorePersist(TqMetaStore* pMeta) {
...
@@ -349,7 +349,7 @@ int32_t tqStorePersist(TqMetaStore* pMeta) {
if
(
pNode
->
handle
.
valueInTxn
==
TQ_DELETE_TOKEN
)
{
if
(
pNode
->
handle
.
valueInTxn
==
TQ_DELETE_TOKEN
)
{
pSHead
->
ssize
=
sizeof
(
TqSerializedHead
);
pSHead
->
ssize
=
sizeof
(
TqSerializedHead
);
}
else
{
}
else
{
pMeta
->
s
erializer
(
pNode
->
handle
.
valueInTxn
,
&
pSHead
);
pMeta
->
pS
erializer
(
pNode
->
handle
.
valueInTxn
,
&
pSHead
);
}
}
int
nBytesTxn
=
write
(
pMeta
->
fileFd
,
pSHead
,
pSHead
->
ssize
);
int
nBytesTxn
=
write
(
pMeta
->
fileFd
,
pSHead
,
pSHead
->
ssize
);
ASSERT
(
nBytesTxn
==
pSHead
->
ssize
);
ASSERT
(
nBytesTxn
==
pSHead
->
ssize
);
...
@@ -423,7 +423,7 @@ static int32_t tqHandlePutCommitted(TqMetaStore* pMeta, int64_t key, void* value
...
@@ -423,7 +423,7 @@ static int32_t tqHandlePutCommitted(TqMetaStore* pMeta, int64_t key, void* value
//TODO: think about thread safety
//TODO: think about thread safety
if
(
pNode
->
handle
.
valueInUse
if
(
pNode
->
handle
.
valueInUse
&&
pNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
&&
pNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInUse
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInUse
);
}
}
//change pointer ownership
//change pointer ownership
pNode
->
handle
.
valueInUse
=
value
;
pNode
->
handle
.
valueInUse
=
value
;
...
@@ -496,7 +496,7 @@ static inline int32_t tqHandlePutImpl(TqMetaStore* pMeta, int64_t key, void* val
...
@@ -496,7 +496,7 @@ static inline int32_t tqHandlePutImpl(TqMetaStore* pMeta, int64_t key, void* val
return
-
2
;
return
-
2
;
}
}
if
(
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
if
(
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInTxn
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInTxn
);
}
}
}
}
pNode
->
handle
.
valueInTxn
=
value
;
pNode
->
handle
.
valueInTxn
=
value
;
...
@@ -562,7 +562,7 @@ int32_t tqHandleCommit(TqMetaStore* pMeta, int64_t key) {
...
@@ -562,7 +562,7 @@ int32_t tqHandleCommit(TqMetaStore* pMeta, int64_t key) {
}
}
if
(
pNode
->
handle
.
valueInUse
if
(
pNode
->
handle
.
valueInUse
&&
pNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
&&
pNode
->
handle
.
valueInUse
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInUse
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInUse
);
}
}
pNode
->
handle
.
valueInUse
=
pNode
->
handle
.
valueInTxn
;
pNode
->
handle
.
valueInUse
=
pNode
->
handle
.
valueInTxn
;
pNode
->
handle
.
valueInTxn
=
NULL
;
pNode
->
handle
.
valueInTxn
=
NULL
;
...
@@ -582,7 +582,7 @@ int32_t tqHandleAbort(TqMetaStore* pMeta, int64_t key) {
...
@@ -582,7 +582,7 @@ int32_t tqHandleAbort(TqMetaStore* pMeta, int64_t key) {
if
(
pNode
->
handle
.
key
==
key
)
{
if
(
pNode
->
handle
.
key
==
key
)
{
if
(
pNode
->
handle
.
valueInTxn
)
{
if
(
pNode
->
handle
.
valueInTxn
)
{
if
(
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
if
(
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInTxn
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInTxn
);
}
}
pNode
->
handle
.
valueInTxn
=
NULL
;
pNode
->
handle
.
valueInTxn
=
NULL
;
tqLinkUnpersist
(
pMeta
,
pNode
);
tqLinkUnpersist
(
pMeta
,
pNode
);
...
@@ -602,7 +602,7 @@ int32_t tqHandleDel(TqMetaStore* pMeta, int64_t key) {
...
@@ -602,7 +602,7 @@ int32_t tqHandleDel(TqMetaStore* pMeta, int64_t key) {
while
(
pNode
)
{
while
(
pNode
)
{
if
(
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
if
(
pNode
->
handle
.
valueInTxn
!=
TQ_DELETE_TOKEN
)
{
if
(
pNode
->
handle
.
valueInTxn
)
{
if
(
pNode
->
handle
.
valueInTxn
)
{
pMeta
->
d
eleter
(
pNode
->
handle
.
valueInTxn
);
pMeta
->
pD
eleter
(
pNode
->
handle
.
valueInTxn
);
}
}
pNode
->
handle
.
valueInTxn
=
TQ_DELETE_TOKEN
;
pNode
->
handle
.
valueInTxn
=
TQ_DELETE_TOKEN
;
tqLinkUnpersist
(
pMeta
,
pNode
);
tqLinkUnpersist
(
pMeta
,
pNode
);
...
...
source/dnode/vnode/tq/test/tqSerializerTest.cpp
0 → 100644
浏览文件 @
9188298e
#include <gtest/gtest.h>
#include <cstring>
#include <iostream>
#include <queue>
#include "tq.h"
using
namespace
std
;
TEST
(
TqSerializerTest
,
basicTest
)
{
TqGroupHandle
*
gHandle
=
(
TqGroupHandle
*
)
malloc
(
sizeof
(
TqGroupHandle
));
}
source/libs/index/CMakeLists.txt
浏览文件 @
9188298e
...
@@ -4,4 +4,27 @@ target_include_directories(
...
@@ -4,4 +4,27 @@ target_include_directories(
index
index
PUBLIC
"
${
CMAKE_SOURCE_DIR
}
/include/libs/index"
PUBLIC
"
${
CMAKE_SOURCE_DIR
}
/include/libs/index"
PRIVATE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/inc"
PRIVATE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/inc"
)
)
\ No newline at end of file
target_link_libraries
(
index
PUBLIC os
PUBLIC util
)
if
(
${
BUILD_WITH_LUCENE
}
)
target_include_directories
(
index
PUBLIC
"
${
CMAKE_SOURCE_DIR
}
/deps/lucene/include"
)
LINK_DIRECTORIES
(
"
${
CMAKE_SOURCE_DIR
}
/deps/lucene/debug/src/core"
)
target_link_libraries
(
index
PUBLIC lucene++
)
endif
(
${
BUILD_WITH_LUCENE
}
)
if
(
${
BUILD_TEST
}
)
add_subdirectory
(
test
)
endif
(
${
BUILD_TEST
}
)
source/libs/index/inc/indexInt.h
浏览文件 @
9188298e
...
@@ -16,12 +16,52 @@
...
@@ -16,12 +16,52 @@
#ifndef _TD_INDEX_INT_H_
#ifndef _TD_INDEX_INT_H_
#define _TD_INDEX_INT_H_
#define _TD_INDEX_INT_H_
#include "index.h"
#ifdef USE_LUCENE
#include <lucene++/Lucene_c.h>
#endif
#ifdef __cplusplus
#ifdef __cplusplus
extern
"C"
{
extern
"C"
{
#endif
#endif
struct
SIndex
{
#ifdef USE_LUCENE
index_t
*
index
;
#endif
};
struct
SIndexOpts
{
#ifdef USE_LUCENE
void
*
opts
;
#endif
};
struct
SIndexMultiTermQuery
{
EIndexOperatorType
opera
;
SArray
*
query
;
};
// field and key;
typedef
struct
SIndexTerm
{
char
*
key
;
int32_t
nKey
;
char
*
val
;
int32_t
nVal
;
}
SIndexTerm
;
typedef
struct
SIndexTermQuery
{
SIndexTerm
*
field_value
;
EIndexQueryType
type
;
}
SIndexTermQuery
;
SIndexTerm
*
indexTermCreate
(
const
char
*
key
,
int32_t
nKey
,
const
char
*
val
,
int32_t
nVal
);
void
indexTermDestroy
(
SIndexTerm
*
p
);
#ifdef __cplusplus
#ifdef __cplusplus
}
}
#endif
#endif
#endif
/*_TD_INDEX_INT_H_*/
#endif
/*_TD_INDEX_INT_H_*/
\ No newline at end of file
source/libs/index/inc/index_fst.h
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_H__
#define __INDEX_FST_H__
#include "tarray.h"
#include "index_fst_util.h"
#include "index_fst_registry.h"
typedef
struct
FstNode
FstNode
;
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
typedef
struct
FstRange
{
uint64_t
start
;
uint64_t
end
;
}
FstRange
;
typedef
enum
{
OneTransNext
,
OneTrans
,
AnyTrans
,
EmptyFinal
}
State
;
typedef
enum
{
Included
,
Excluded
,
Unbounded
}
FstBound
;
typedef
uint32_t
CheckSummer
;
/*
*
* UnFinished node and helper function
* TODO: simple function name
*/
typedef
struct
FstUnFinishedNodes
{
SArray
*
stack
;
// <FstBuilderNodeUnfinished> } FstUnFinishedNodes;
}
FstUnFinishedNodes
;
#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
FstUnFinishedNodes
*
FstUnFinishedNodesCreate
();
void
fstUnFinishedNodesPushEmpty
(
FstUnFinishedNodes
*
nodes
,
bool
isFinal
);
FstBuilderNode
*
fstUnFinishedNodesPopRoot
(
FstUnFinishedNodes
*
nodes
);
FstBuilderNode
*
fstUnFinishedNodesPopFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
);
FstBuilderNode
*
fstUnFinishedNodesPopEmpty
(
FstUnFinishedNodes
*
nodes
);
void
fstUnFinishedNodesSetRootOutput
(
FstUnFinishedNodes
*
node
,
Output
out
);
void
fstUnFinishedNodesTopLastFreeze
(
FstUnFinishedNodes
*
node
,
CompiledAddr
addr
);
void
fstUnFinishedNodesAddSuffix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
out
);
uint64_t
fstUnFinishedNodesFindCommPrefix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
);
uint64_t
FstUnFinishedNodesFindCommPreifxAndSetOutput
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
in
,
Output
*
out
);
typedef
struct
FstCountingWriter
{
void
*
wtr
;
// wrap any writer that counts and checksum bytes written
uint64_t
count
;
CheckSummer
summer
;
}
FstCountingWriter
;
typedef
struct
FstBuilder
{
FstCountingWriter
wtr
;
// The FST raw data is written directly to `wtr`.
FstUnFinishedNodes
*
unfinished
;
// The stack of unfinished nodes
FstRegistry
registry
;
// A map of finished nodes.
SArray
*
last
;
// The last word added
CompiledAddr
lastAddr
;
// The address of the last compiled node
uint64_t
len
;
// num of keys added
}
FstBuilder
;
typedef
struct
FstTransitions
{
FstNode
*
node
;
FstRange
range
;
}
FstTransitions
;
typedef
struct
FstLastTransition
{
uint8_t
inp
;
Output
out
;
}
FstLastTransition
;
/*
* FstBuilderNodeUnfinished and helper function
* TODO: simple function name
*/
typedef
struct
FstBuilderNodeUnfinished
{
FstBuilderNode
*
node
;
FstLastTransition
*
last
;
}
FstBuilderNodeUnfinished
;
void
fstBuilderNodeUnfinishedLastCompiled
(
FstBuilderNodeUnfinished
*
node
,
CompiledAddr
addr
);
void
fstBuilderNodeUnfinishedAddOutputPrefix
(
FstBuilderNodeUnfinished
*
node
,
CompiledAddr
addr
);
/*
* FstNode and helper function
*/
typedef
struct
FstNode
{
FstSlice
data
;
uint64_t
version
;
State
state
;
CompiledAddr
start
;
CompiledAddr
end
;
bool
isFinal
;
uint64_t
nTrans
;
PackSizes
sizes
;
Output
finalOutput
;
}
FstNode
;
// If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
#define FST_NODE_IS_FINAL(node) node->isFinal
// Returns the number of transitions in this node, The maximum number of transitions is 256.
#define FST_NODE_LEN(node) node->nTrans
// Returns true if and only if this node has zero transitions.
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start
FstNode
*
fstNodeCreate
(
int64_t
version
,
CompiledAddr
addr
,
FstSlice
*
data
);
FstTransitions
fstNodeTransitionIter
(
FstNode
*
node
);
FstTransitions
*
fstNodeTransitions
(
FstNode
*
node
);
bool
fstNodeGetTransitionAt
(
FstNode
*
node
,
uint64_t
i
,
FstTransition
*
res
);
bool
fstNodeGetTransitionAddrAt
(
FstNode
*
node
,
uint64_t
i
,
CompiledAddr
*
res
);
bool
fstNodeFindInput
(
FstNode
*
node
,
uint8_t
b
,
uint64_t
*
res
);
bool
fstNodeCompile
(
FstNode
*
node
,
void
*
w
,
CompiledAddr
lastAddr
,
CompiledAddr
addr
,
FstBuilderNode
*
builderNode
);
FstSlice
fstNodeAsSlice
(
FstNode
*
node
);
typedef
struct
FstMeta
{
uint64_t
version
;
CompiledAddr
rootAddr
;
FstType
ty
;
uint64_t
len
;
uint32_t
checkSum
;
}
FstMeta
;
typedef
struct
Fst
{
FstMeta
meta
;
void
*
data
;
//
}
Fst
;
// ops
typedef
struct
FstIndexedValue
{
uint64_t
index
;
uint64_t
value
;
}
FstIndexedValue
;
#endif
source/libs/index/inc/index_fst_automation.h
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_AUTAOMATION_H__
#define __INDEX_FST_AUTAOMATION_H__
struct
AutomationCtx
;
typedef
struct
StartWith
{
AutomationCtx
*
autoSelf
;
}
StartWith
;
typedef
struct
Complement
{
AutomationCtx
*
autoSelf
;
}
Complement
;
// automation
typedef
struct
AutomationCtx
{
void
*
data
;
}
AutomationCtx
;
// automation interface
void
(
*
start
)(
AutomationCtx
*
ctx
);
bool
(
*
isMatch
)(
AutomationCtx
*
ctx
);
bool
(
*
canMatch
)(
AutomationCtx
*
ctx
,
void
*
data
);
bool
(
*
willAlwaysMatch
)(
AutomationCtx
*
ctx
,
void
*
state
);
void
*
(
*
accpet
)(
AutomationCtx
*
ctx
,
void
*
state
,
uint8_t
byte
);
void
*
(
*
accpetEof
)(
AutomationCtx
*
ctx
,
*
state
);
#endif
source/libs/index/inc/index_fst_node.h
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_NODE_H__
#define __INDEX_FST_NODE_H__
#include "index_fst_util.h"
typedef
struct
FstTransition
{
uint8_t
inp
;
//The byte input associated with this transition.
Output
out
;
//The output associated with this transition
CompiledAddr
addr
;
//The address of the node that this transition points to
}
FstTransition
;
typedef
struct
FstBuilderNode
{
bool
isFinal
;
Output
finalOutput
;
SArray
*
trans
;
// <FstTransition>
}
FstBuilderNode
;
FstBuilderNode
*
fstBuilderNodeDefault
();
FstBuilderNode
*
fstBuilderNodeClone
(
FstBuilderNode
*
src
);
void
fstBuilderNodeCloneFrom
(
FstBuilderNode
*
dst
,
FstBuilderNode
*
src
);
#endif
source/libs/index/inc/index_fst_registry.h
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __FST_REGISTRY_H__
#define __FST_REGISTRY_H__
#include "index_fst_util.h"
#include "tarray.h"
#include "index_fst_node.h"
typedef
struct
FstRegistryCell
{
CompiledAddr
addr
;
FstBuilderNode
*
node
;
}
FstRegistryCell
;
//typedef struct FstRegistryCache {
// SArray *cells;
// uint32_t start;
// uint32_t end;
//} FstRegistryCache;
typedef
enum
{
FOUND
,
NOTFOUND
,
REJECTED
}
FstRegistryEntryState
;
typedef
struct
FstRegistryEntry
{
FstRegistryEntryState
state
;
CompiledAddr
addr
;
FstRegistryCell
*
cell
;
}
FstRegistryEntry
;
// Registry relation function
typedef
struct
FstRegistry
{
SArray
*
table
;
uint64_t
tableSize
;
// num of rows
uint64_t
mruSize
;
// num of columns
}
FstRegistry
;
//
FstRegistry
*
fstRegistryCreate
(
uint64_t
tableSize
,
uint64_t
mruSize
);
FstRegistryEntry
*
fstRegistryGetEntry
(
FstRegistry
*
registry
,
FstBuilderNode
*
bNode
);
#endif
source/libs/index/inc/index_fst_util.h
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_UTIL_H__
#define __INDEX_FST_UTIL_H__
#include "tarray.h"
typedef
uint64_t
FstType
;
typedef
uint64_t
CompiledAddr
;
typedef
uint64_t
Output
;
typedef
uint8_t
PackSizes
;
//A sentinel value used to indicate an empty final state
extern
const
CompiledAddr
EMPTY_ADDRESS
;
/// A sentinel value used to indicate an invalid state.
extern
const
CompiledAddr
NONE_ADDRESS
;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
extern
const
uint64_t
version
;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
extern
const
uint64_t
TRANS_INDEX_THRESHOLD
;
// high 4 bits is transition address packed size.
// low 4 bits is output value packed size.
//
// `0` is a legal value which means there are no transitions/outputs
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
#define COMMON_INDEX(v, max, val) do { \
val = ((uint16_t)COMMON_INPUTS[v] + 1)%256; \
val = val > max ? 0: val; \
} while(0)
//uint8_t commonInput(uint8_t idx);
//uint8_t commonIdx(uint8_t v, uint8_t max);
uint8_t
packSize
(
uint64_t
n
);
uint64_t
unpackUint64
(
uint8_t
*
ch
,
uint8_t
sz
);
uint8_t
packDeltaSize
(
CompiledAddr
nodeAddr
,
CompiledAddr
transAddr
);
CompiledAddr
unpackDelta
(
char
*
data
,
uint64_t
len
,
uint64_t
nodeAddr
);
typedef
struct
FstSlice
{
uint8_t
*
data
;
uint64_t
dLen
;
uint32_t
start
;
uint32_t
end
;
}
FstSlice
;
FstSlice
fstSliceCopy
(
FstSlice
*
slice
,
uint32_t
start
,
uint32_t
end
);
FstSlice
fstSliceCreate
(
uint8_t
*
data
,
uint64_t
dLen
);
bool
fstSliceEmpty
(
FstSlice
*
slice
);
#endif
source/libs/index/src/index.c
浏览文件 @
9188298e
...
@@ -13,15 +13,176 @@
...
@@ -13,15 +13,176 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
*/
#i
fndef _TD_INDEX_H_
#i
nclude "index.h"
#
define _TD_INDEX_H_
#
include "indexInt.h"
#ifdef
__cplusplus
#ifdef
USE_LUCENE
extern
"C"
{
#include "lucene++/Lucene_c.h"
#endif
#endif
#ifdef __cplusplus
static
pthread_once_t
isInit
=
PTHREAD_ONCE_INIT
;
static
void
indexInit
();
SIndex
*
indexOpen
(
SIndexOpts
*
opts
,
const
char
*
path
)
{
pthread_once
(
&
isInit
,
indexInit
);
#ifdef USE_LUCENE
index_t
*
index
=
index_open
(
path
);
SIndex
*
p
=
malloc
(
sizeof
(
SIndex
));
p
->
index
=
index
;
return
p
;
#endif
return
NULL
;
}
void
indexClose
(
SIndex
*
index
)
{
#ifdef USE_LUCENE
index_close
(
index
->
index
);
index
->
index
=
NULL
;
#endif
free
(
index
);
return
;
}
#ifdef USE_LUCENE
#endif
int
indexPut
(
SIndex
*
index
,
SArray
*
field_vals
,
int
uid
)
{
#ifdef USE_LUCENE
index_document_t
*
doc
=
index_document_create
();
char
buf
[
16
]
=
{
0
};
sprintf
(
buf
,
"%d"
,
uid
);
for
(
int
i
=
0
;
i
<
taosArrayGetSize
(
field_vals
);
i
++
)
{
SIndexTerm
*
p
=
taosArrayGetP
(
field_vals
,
i
);
index_document_add
(
doc
,
(
const
char
*
)(
p
->
key
),
p
->
nKey
,
(
const
char
*
)(
p
->
val
),
p
->
nVal
,
1
);
}
index_document_add
(
doc
,
NULL
,
0
,
buf
,
strlen
(
buf
),
0
);
index_put
(
index
->
index
,
doc
);
index_document_destroy
(
doc
);
#endif
return
1
;
}
int
indexSearch
(
SIndex
*
index
,
SIndexMultiTermQuery
*
multiQuerys
,
SArray
*
result
)
{
#ifdef USE_LUCENE
EIndexOperatorType
opera
=
multiQuerys
->
opera
;
int
nQuery
=
taosArrayGetSize
(
multiQuerys
->
query
);
char
**
fields
=
malloc
(
sizeof
(
char
*
)
*
nQuery
);
char
**
keys
=
malloc
(
sizeof
(
char
*
)
*
nQuery
);
int
*
types
=
malloc
(
sizeof
(
int
)
*
nQuery
);
for
(
int
i
=
0
;
i
<
nQuery
;
i
++
)
{
SIndexTermQuery
*
p
=
taosArrayGet
(
multiQuerys
->
query
,
i
);
SIndexTerm
*
term
=
p
->
field_value
;
fields
[
i
]
=
calloc
(
1
,
term
->
nKey
+
1
);
keys
[
i
]
=
calloc
(
1
,
term
->
nVal
+
1
);
memcpy
(
fields
[
i
],
term
->
key
,
term
->
nKey
);
memcpy
(
keys
[
i
],
term
->
val
,
term
->
nVal
);
types
[
i
]
=
(
int
)(
p
->
type
);
}
int
*
tResult
=
NULL
;
int
tsz
=
0
;
index_multi_search
(
index
->
index
,
(
const
char
**
)
fields
,
(
const
char
**
)
keys
,
types
,
nQuery
,
opera
,
&
tResult
,
&
tsz
);
for
(
int
i
=
0
;
i
<
tsz
;
i
++
)
{
taosArrayPush
(
result
,
&
tResult
[
i
]);
}
for
(
int
i
=
0
;
i
<
nQuery
;
i
++
)
{
free
(
fields
[
i
]);
free
(
keys
[
i
]);
}
free
(
fields
);
free
(
keys
);
free
(
types
);
#endif
return
1
;
}
}
int
indexDelete
(
SIndex
*
index
,
SIndexMultiTermQuery
*
query
)
{
return
1
;
}
int
indexRebuild
(
SIndex
*
index
,
SIndexOpts
*
opts
);
SIndexOpts
*
indexOptsCreate
()
{
#ifdef USE_LUCENE
#endif
return
NULL
;
}
void
indexOptsDestroy
(
SIndexOpts
*
opts
)
{
#ifdef USE_LUCENE
#endif
#endif
}
/*
* @param: oper
*
*/
#endif
/*_TD_INDEX_H_*/
SIndexMultiTermQuery
*
indexMultiTermQueryCreate
(
EIndexOperatorType
opera
)
{
\ No newline at end of file
SIndexMultiTermQuery
*
p
=
(
SIndexMultiTermQuery
*
)
malloc
(
sizeof
(
SIndexMultiTermQuery
));
if
(
p
==
NULL
)
{
return
NULL
;
}
p
->
opera
=
opera
;
p
->
query
=
taosArrayInit
(
1
,
sizeof
(
SIndexTermQuery
));
return
p
;
}
void
indexMultiTermQueryDestroy
(
SIndexMultiTermQuery
*
pQuery
)
{
for
(
int
i
=
0
;
i
<
taosArrayGetSize
(
pQuery
->
query
);
i
++
)
{
SIndexTermQuery
*
p
=
(
SIndexTermQuery
*
)
taosArrayGet
(
pQuery
->
query
,
i
);
indexTermDestroy
(
p
->
field_value
);
}
taosArrayDestroy
(
pQuery
->
query
);
free
(
pQuery
);
};
int
indexMultiTermQueryAdd
(
SIndexMultiTermQuery
*
pQuery
,
const
char
*
field
,
int32_t
nFields
,
const
char
*
value
,
int32_t
nValue
,
EIndexQueryType
type
){
SIndexTerm
*
t
=
indexTermCreate
(
field
,
nFields
,
value
,
nValue
);
if
(
t
==
NULL
)
{
return
-
1
;}
SIndexTermQuery
q
=
{.
type
=
type
,
.
field_value
=
t
};
taosArrayPush
(
pQuery
->
query
,
&
q
);
return
0
;
}
SIndexTerm
*
indexTermCreate
(
const
char
*
key
,
int32_t
nKey
,
const
char
*
val
,
int32_t
nVal
)
{
SIndexTerm
*
t
=
(
SIndexTerm
*
)
malloc
(
sizeof
(
SIndexTerm
));
t
->
key
=
(
char
*
)
calloc
(
nKey
+
1
,
1
);
memcpy
(
t
->
key
,
key
,
nKey
);
t
->
nKey
=
nKey
;
t
->
val
=
(
char
*
)
calloc
(
nVal
+
1
,
1
);
memcpy
(
t
->
val
,
val
,
nVal
);
t
->
nVal
=
nVal
;
return
t
;
}
void
indexTermDestroy
(
SIndexTerm
*
p
)
{
free
(
p
->
key
);
free
(
p
->
val
);
free
(
p
);
}
SArray
*
indexMultiTermCreate
()
{
return
taosArrayInit
(
4
,
sizeof
(
SIndexTerm
*
));
}
int
indexMultiTermAdd
(
SArray
*
array
,
const
char
*
field
,
int32_t
nField
,
const
char
*
val
,
int32_t
nVal
)
{
SIndexTerm
*
term
=
indexTermCreate
(
field
,
nField
,
val
,
nVal
);
if
(
term
==
NULL
)
{
return
-
1
;
}
taosArrayPush
(
array
,
&
term
);
return
0
;
}
void
indexMultiTermDestroy
(
SArray
*
array
)
{
for
(
int32_t
i
=
0
;
i
<
taosArrayGetSize
(
array
);
i
++
)
{
SIndexTerm
*
p
=
taosArrayGetP
(
array
,
i
);
indexTermDestroy
(
p
);
}
taosArrayDestroy
(
array
);
}
void
indexInit
()
{
//do nothing
}
source/libs/index/src/index_fst.c
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst.h"
FstUnFinishedNodes
*
fstUnFinishedNodesCreate
()
{
FstUnFinishedNodes
*
nodes
=
malloc
(
sizeof
(
FstUnFinishedNodes
));
if
(
nodes
==
NULL
)
{
return
NULL
;
}
nodes
->
stack
=
(
SArray
*
)
taosArrayInit
(
64
,
sizeof
(
FstBuilderNodeUnfinished
));
fstUnFinishedNodesPushEmpty
(
nodes
,
false
);
return
nodes
;
}
void
fstUnFinishedNodesPushEmpty
(
FstUnFinishedNodes
*
nodes
,
bool
isFinal
)
{
FstBuilderNode
*
node
=
malloc
(
sizeof
(
FstBuilderNode
));
node
->
isFinal
=
isFinal
;
node
->
finalOutput
=
0
;
node
->
trans
=
NULL
;
FstBuilderNodeUnfinished
un
=
{.
node
=
node
,
.
last
=
NULL
};
taosArrayPush
(
nodes
->
stack
,
&
un
);
}
FstBuilderNode
*
fstUnFinishedNodesPopRoot
(
FstUnFinishedNodes
*
nodes
)
{
assert
(
taosArrayGetSize
(
nodes
->
stack
)
==
1
);
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
assert
(
un
->
last
==
NULL
);
return
un
->
node
;
}
FstBuilderNode
*
fstUnFinishedNodesPopFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
fstBuilderNodeUnfinishedLastCompiled
(
un
,
addr
);
free
(
un
->
last
);
// TODO add func FstLastTransitionFree()
return
un
->
node
;
}
FstBuilderNode
*
fstUnFinishedNodesPopEmpty
(
FstUnFinishedNodes
*
nodes
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayPop
(
nodes
->
stack
);
assert
(
un
->
last
==
NULL
);
return
un
->
node
;
}
void
fstUnFinishedNodesSetRootOutput
(
FstUnFinishedNodes
*
nodes
,
Output
out
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
0
);
un
->
node
->
isFinal
=
true
;
un
->
node
->
finalOutput
=
out
;
//un->node->trans = NULL;
}
void
fstUnFinishedNodesTopLastFreeze
(
FstUnFinishedNodes
*
nodes
,
CompiledAddr
addr
)
{
size_t
sz
=
taosArrayGetSize
(
nodes
->
stack
)
-
1
;
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
sz
);
fstBuilderNodeUnfinishedLastCompiled
(
un
,
addr
);
}
void
fstUnFinishedNodesAddSuffix
(
FstUnFinishedNodes
*
nodes
,
FstSlice
bs
,
Output
out
)
{
FstSlice
*
s
=
&
bs
;
if
(
s
->
data
==
NULL
||
s
->
dLen
==
0
||
s
->
start
>
s
->
end
)
{
return
;
}
size_t
sz
=
taosArrayGetSize
(
nodes
->
stack
)
-
1
;
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
nodes
->
stack
,
sz
);
assert
(
un
->
last
==
NULL
);
FstLastTransition
*
trn
=
malloc
(
sizeof
(
FstLastTransition
));
trn
->
inp
=
s
->
data
[
s
->
start
];
trn
->
out
=
out
;
un
->
last
=
trn
;
for
(
uint64_t
i
=
s
->
start
;
i
<=
s
->
end
;
i
++
)
{
FstBuilderNode
*
n
=
malloc
(
sizeof
(
FstBuilderNode
));
n
->
isFinal
=
false
;
n
->
finalOutput
=
0
;
n
->
trans
=
NULL
;
FstLastTransition
*
trn
=
malloc
(
sizeof
(
FstLastTransition
));
trn
->
inp
=
s
->
data
[
i
];
trn
->
out
=
out
;
FstBuilderNodeUnfinished
un
=
{.
node
=
n
,
.
last
=
trn
};
taosArrayPush
(
nodes
->
stack
,
&
un
);
}
fstUnFinishedNodesPushEmpty
(
nodes
,
true
);
}
uint64_t
fstUnFinishedNodesFindCommPrefix
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
)
{
FstSlice
*
s
=
&
bs
;
size_t
lsz
=
(
size_t
)(
s
->
end
-
s
->
start
+
1
);
// data len
size_t
ssz
=
taosArrayGetSize
(
node
->
stack
);
// stack size
uint64_t
count
=
0
;
for
(
size_t
i
=
0
;
i
<
ssz
&&
i
<
lsz
;
i
++
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
node
->
stack
,
i
);
if
(
un
->
last
->
inp
==
s
->
data
[
s
->
start
+
i
])
{
count
++
;
}
else
{
break
;
}
}
return
count
;
}
uint64_t
FstUnFinishedNodesFindCommPrefixAndSetOutput
(
FstUnFinishedNodes
*
node
,
FstSlice
bs
,
Output
in
,
Output
*
out
)
{
FstSlice
*
s
=
&
bs
;
size_t
lsz
=
(
size_t
)(
s
->
end
-
s
->
start
+
1
);
// data len
size_t
ssz
=
taosArrayGetSize
(
node
->
stack
);
// stack size
uint64_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
lsz
&&
i
<
ssz
;
i
++
)
{
FstBuilderNodeUnfinished
*
un
=
taosArrayGet
(
node
->
stack
,
i
);
FstLastTransition
*
last
=
un
->
last
;
if
(
last
->
inp
==
s
->
data
[
s
->
start
+
i
])
{
uint64_t
commPrefix
=
last
->
out
;
uint64_t
addPrefix
=
last
->
out
-
commPrefix
;
out
=
out
-
commPrefix
;
last
->
out
=
commPrefix
;
if
(
addPrefix
!=
0
)
{
fstBuilderNodeUnfinishedAddOutputPrefix
(
un
,
addPrefix
);
}
}
else
{
break
;
}
}
return
res
;
}
// fst node function
FstNode
*
fstNodeCreate
(
int64_t
version
,
CompiledAddr
addr
,
FstSlice
*
slice
)
{
FstNode
*
n
=
(
FstNode
*
)
malloc
(
sizeof
(
FstNode
));
if
(
n
==
NULL
)
{
return
NULL
;
}
if
(
addr
==
EMPTY_ADDRESS
)
{
n
->
data
=
fstSliceCreate
(
NULL
,
0
);
n
->
version
=
version
;
n
->
state
=
EmptyFinal
;
n
->
start
=
EMPTY_ADDRESS
;
n
->
end
=
EMPTY_ADDRESS
;
n
->
isFinal
=
true
;
n
->
nTrans
=
0
;
n
->
sizes
=
0
;
n
->
finalOutput
=
0
;
}
uint8_t
v
=
slice
->
data
[
addr
];
uint8_t
s
=
(
v
&
0
b11000000
)
>>
6
;
if
(
s
==
0
b11
)
{
// oneTransNext
n
->
data
=
fstSliceCopy
(
slice
,
0
,
addr
);
n
->
version
=
version
;
n
->
state
=
OneTransNext
;
n
->
start
=
addr
;
n
->
end
=
addr
;
//? s.end_addr(data);
n
->
isFinal
=
false
;
n
->
sizes
=
0
;
n
->
nTrans
=
0
;
n
->
finalOutput
=
0
;
}
else
if
(
v
==
0
b10
)
{
// oneTrans
uint64_t
sz
;
// fetch sz from addr
n
->
data
=
fstSliceCopy
(
slice
,
0
,
addr
);
n
->
version
=
version
;
n
->
state
=
OneTrans
;
n
->
start
=
addr
;
n
->
end
=
addr
;
// s.end_addr(data, sz);
n
->
isFinal
=
false
;
n
->
nTrans
=
1
;
n
->
sizes
=
sz
;
n
->
finalOutput
=
0
;
}
else
{
// anyTrans
uint64_t
sz
;
// s.sizes(data)
uint32_t
nTrans
;
// s.ntrans(data)
n
->
data
=
*
slice
;
n
->
version
=
version
;
n
->
state
=
AnyTrans
;
n
->
start
=
addr
;
n
->
end
=
addr
;
// s.end_addr(version, data, sz, ntrans);
n
->
isFinal
=
false
;
// s.is_final_state();
n
->
nTrans
=
nTrans
;
n
->
sizes
=
sz
;
n
->
finalOutput
=
0
;
// s.final_output(version, data, sz, ntrans);
}
return
n
;
}
FstTransitions
*
fstNodeTransitions
(
FstNode
*
node
)
{
FstTransitions
*
t
=
malloc
(
sizeof
(
FstTransitions
));
if
(
NULL
==
t
)
{
return
NULL
;
}
FstRange
range
=
{.
start
=
0
,
.
end
=
FST_NODE_LEN
(
node
)};
t
->
node
=
node
;
t
->
range
=
range
;
return
t
;
}
bool
fstNodeGetTransitionAt
(
FstNode
*
node
,
uint64_t
i
,
FstTransition
*
res
)
{
bool
s
=
true
;
if
(
node
->
state
==
OneTransNext
)
{
}
else
if
(
node
->
state
==
OneTrans
)
{
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
else
{
s
=
false
;
}
return
s
;
}
bool
fstNodeGetTransitionAddrAt
(
FstNode
*
node
,
uint64_t
i
,
CompiledAddr
*
res
)
{
bool
s
=
true
;
if
(
node
->
state
==
OneTransNext
)
{
}
else
if
(
node
->
state
==
OneTrans
)
{
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
else
if
(
node
->
state
==
EmptyFinal
){
s
=
false
;
}
return
s
;
}
bool
fstNodeFindInput
(
FstNode
*
node
,
uint8_t
b
,
uint64_t
*
res
)
{
bool
s
=
true
;
uint8_t
input
;
// s.input
if
(
node
->
state
==
OneTransNext
)
{
if
(
b
==
input
)
{
*
res
=
0
;
}
else
{
return
s
;
}
}
else
if
(
node
->
state
==
OneTrans
)
{
if
(
b
==
input
)
{
*
res
=
0
;
}
else
{
return
s
;}
}
else
if
(
node
->
state
==
AnyTrans
)
{
}
else
if
(
node
->
state
==
EmptyFinal
)
{
s
=
false
;
}
return
s
;
}
bool
fstNodeCompile
(
FstNode
*
node
,
void
*
w
,
CompiledAddr
lastAddr
,
CompiledAddr
addr
,
FstBuilderNode
*
builderNode
)
{
size_t
sz
=
taosArrayGetSize
(
builderNode
->
trans
);
assert
(
sz
<
256
);
if
(
sz
==
0
&&
builderNode
->
isFinal
&&
builderNode
->
finalOutput
==
0
)
{
return
true
;
}
else
if
(
sz
!=
1
||
builderNode
->
isFinal
)
{
// AnyTrans->Compile(w, addr, node);
}
else
{
FstTransition
*
tran
=
taosArrayGet
(
builderNode
->
trans
,
0
);
if
(
tran
->
addr
==
lastAddr
&&
tran
->
out
==
0
)
{
//OneTransNext::compile(w, lastAddr, tran->inp);
return
true
;
}
else
{
//OneTrans::Compile(w, lastAddr, *tran);
return
true
;
}
}
return
true
;
}
FstBuilder
*
fstBuilderCreate
(
void
*
w
,
FstType
ty
)
{
FstBuilder
*
b
=
malloc
(
sizeof
(
FstBuilder
));
if
(
NULL
==
b
)
{
return
b
;
}
FstCountingWriter
wtr
=
{.
wtr
=
w
,
.
count
=
0
,
.
summer
=
0
};
b
->
wtr
=
wtr
;
b
->
unfinished
=
malloc
(
sizeof
(
FstUnFinishedNodes
));
return
b
;
}
FstSlice
fstNodeAsSlice
(
FstNode
*
node
)
{
FstSlice
*
slice
=
&
node
->
data
;
FstSlice
s
=
fstSliceCopy
(
slice
,
slice
->
end
,
slice
->
dLen
-
1
);
return
s
;
}
source/libs/
sync/inc/raft_configuration.h
→
source/libs/
index/src/index_fst_automation.c
浏览文件 @
9188298e
/*
/*
* Copyright (c) 2019 TAOS Data, Inc. <
cli
@taosdata.com>
* Copyright (c) 2019 TAOS Data, Inc. <
jhtao
@taosdata.com>
*
*
* This program is free software: you can use, redistribute, and/or modify
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* it under the terms of the GNU Affero General Public License, version 3
...
@@ -12,16 +12,3 @@
...
@@ -12,16 +12,3 @@
* You should have received a copy of the GNU Affero General Public License
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
*/
#ifndef _TD_LIBS_SYNC_RAFT_CONFIGURATION_H
#define _TD_LIBS_SYNC_RAFT_CONFIGURATION_H
#include "sync.h"
#include "sync_type.h"
// return -1 if cannot find this id
int
syncRaftConfigurationIndexOfNode
(
SSyncRaft
*
pRaft
,
SyncNodeId
id
);
int
syncRaftConfigurationVoterCount
(
SSyncRaft
*
pRaft
);
#endif
/* _TD_LIBS_SYNC_RAFT_CONFIGURATION_H */
\ No newline at end of file
source/libs/index/src/index_fst_common.c
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tutil.h"
const
uint8_t
COMMON_INPUTS
[]
=
{
84
,
// '\x00'
85
,
// '\x01'
86
,
// '\x02'
87
,
// '\x03'
88
,
// '\x04'
89
,
// '\x05'
90
,
// '\x06'
91
,
// '\x07'
92
,
// '\x08'
93
,
// '\t'
94
,
// '\n'
95
,
// '\x0b'
96
,
// '\x0c'
97
,
// '\r'
98
,
// '\x0e'
99
,
// '\x0f'
100
,
// '\x10'
101
,
// '\x11'
102
,
// '\x12'
103
,
// '\x13'
104
,
// '\x14'
105
,
// '\x15'
106
,
// '\x16'
107
,
// '\x17'
108
,
// '\x18'
109
,
// '\x19'
110
,
// '\x1a'
111
,
// '\x1b'
112
,
// '\x1c'
113
,
// '\x1d'
114
,
// '\x1e'
115
,
// '\x1f'
116
,
// ' '
80
,
// '!'
117
,
// '"'
118
,
// '#'
79
,
// '$'
39
,
// '%'
30
,
// '&'
81
,
// "'"
75
,
// '('
74
,
// ')'
82
,
// '*'
57
,
// '+'
66
,
// ','
16
,
// '-'
12
,
// '.'
2
,
// '/'
19
,
// '0'
20
,
// '1'
21
,
// '2'
27
,
// '3'
32
,
// '4'
29
,
// '5'
35
,
// '6'
36
,
// '7'
37
,
// '8'
34
,
// '9'
24
,
// ':'
73
,
// ';'
119
,
// '<'
23
,
// '='
120
,
// '>'
40
,
// '?'
83
,
// '@'
44
,
// 'A'
48
,
// 'B'
42
,
// 'C'
43
,
// 'D'
49
,
// 'E'
46
,
// 'F'
62
,
// 'G'
61
,
// 'H'
47
,
// 'I'
69
,
// 'J'
68
,
// 'K'
58
,
// 'L'
56
,
// 'M'
55
,
// 'N'
59
,
// 'O'
51
,
// 'P'
72
,
// 'Q'
54
,
// 'R'
45
,
// 'S'
52
,
// 'T'
64
,
// 'U'
65
,
// 'V'
63
,
// 'W'
71
,
// 'X'
67
,
// 'Y'
70
,
// 'Z'
77
,
// '['
121
,
// '\\'
78
,
// ']'
122
,
// '^'
31
,
// '_'
123
,
// '`'
4
,
// 'a'
25
,
// 'b'
9
,
// 'c'
17
,
// 'd'
1
,
// 'e'
26
,
// 'f'
22
,
// 'g'
13
,
// 'h'
7
,
// 'i'
50
,
// 'j'
38
,
// 'k'
14
,
// 'l'
15
,
// 'm'
10
,
// 'n'
3
,
// 'o'
8
,
// 'p'
60
,
// 'q'
6
,
// 'r'
5
,
// 's'
0
,
// 't'
18
,
// 'u'
33
,
// 'v'
11
,
// 'w'
41
,
// 'x'
28
,
// 'y'
53
,
// 'z'
124
,
// '{'
125
,
// '|'
126
,
// '}'
76
,
// '~'
127
,
// '\x7f'
128
,
// '\x80'
129
,
// '\x81'
130
,
// '\x82'
131
,
// '\x83'
132
,
// '\x84'
133
,
// '\x85'
134
,
// '\x86'
135
,
// '\x87'
136
,
// '\x88'
137
,
// '\x89'
138
,
// '\x8a'
139
,
// '\x8b'
140
,
// '\x8c'
141
,
// '\x8d'
142
,
// '\x8e'
143
,
// '\x8f'
144
,
// '\x90'
145
,
// '\x91'
146
,
// '\x92'
147
,
// '\x93'
148
,
// '\x94'
149
,
// '\x95'
150
,
// '\x96'
151
,
// '\x97'
152
,
// '\x98'
153
,
// '\x99'
154
,
// '\x9a'
155
,
// '\x9b'
156
,
// '\x9c'
157
,
// '\x9d'
158
,
// '\x9e'
159
,
// '\x9f'
160
,
// '\xa0'
161
,
// '¡'
162
,
// '¢'
163
,
// '£'
164
,
// '¤'
165
,
// '¥'
166
,
// '¦'
167
,
// '§'
168
,
// '¨'
169
,
// '©'
170
,
// 'ª'
171
,
// '«'
172
,
// '¬'
173
,
// '\xad'
174
,
// '®'
175
,
// '¯'
176
,
// '°'
177
,
// '±'
178
,
// '²'
179
,
// '³'
180
,
// '´'
181
,
// 'µ'
182
,
// '¶'
183
,
// '·'
184
,
// '¸'
185
,
// '¹'
186
,
// 'º'
187
,
// '»'
188
,
// '¼'
189
,
// '½'
190
,
// '¾'
191
,
// '¿'
192
,
// 'À'
193
,
// 'Á'
194
,
// 'Â'
195
,
// 'Ã'
196
,
// 'Ä'
197
,
// 'Å'
198
,
// 'Æ'
199
,
// 'Ç'
200
,
// 'È'
201
,
// 'É'
202
,
// 'Ê'
203
,
// 'Ë'
204
,
// 'Ì'
205
,
// 'Í'
206
,
// 'Î'
207
,
// 'Ï'
208
,
// 'Ð'
209
,
// 'Ñ'
210
,
// 'Ò'
211
,
// 'Ó'
212
,
// 'Ô'
213
,
// 'Õ'
214
,
// 'Ö'
215
,
// '×'
216
,
// 'Ø'
217
,
// 'Ù'
218
,
// 'Ú'
219
,
// 'Û'
220
,
// 'Ü'
221
,
// 'Ý'
222
,
// 'Þ'
223
,
// 'ß'
224
,
// 'à'
225
,
// 'á'
226
,
// 'â'
227
,
// 'ã'
228
,
// 'ä'
229
,
// 'å'
230
,
// 'æ'
231
,
// 'ç'
232
,
// 'è'
233
,
// 'é'
234
,
// 'ê'
235
,
// 'ë'
236
,
// 'ì'
237
,
// 'í'
238
,
// 'î'
239
,
// 'ï'
240
,
// 'ð'
241
,
// 'ñ'
242
,
// 'ò'
243
,
// 'ó'
244
,
// 'ô'
245
,
// 'õ'
246
,
// 'ö'
247
,
// '÷'
248
,
// 'ø'
249
,
// 'ù'
250
,
// 'ú'
251
,
// 'û'
252
,
// 'ü'
253
,
// 'ý'
254
,
// 'þ'
255
,
// 'ÿ'
};
char
const
COMMON_INPUTS_INV
[]
=
{
't'
,
'e'
,
'/'
,
'o'
,
'a'
,
's'
,
'r'
,
'i'
,
'p'
,
'c'
,
'n'
,
'w'
,
'.'
,
'h'
,
'l'
,
'm'
,
'-'
,
'd'
,
'u'
,
'0'
,
'1'
,
'2'
,
'g'
,
'='
,
':'
,
'b'
,
'f'
,
'3'
,
'y'
,
'5'
,
'&'
,
'_'
,
'4'
,
'v'
,
'9'
,
'6'
,
'7'
,
'8'
,
'k'
,
'%'
,
'?'
,
'x'
,
'C'
,
'D'
,
'A'
,
'S'
,
'F'
,
'I'
,
'B'
,
'E'
,
'j'
,
'P'
,
'T'
,
'z'
,
'R'
,
'N'
,
'M'
,
'+'
,
'L'
,
'O'
,
'q'
,
'H'
,
'G'
,
'W'
,
'U'
,
'V'
,
','
,
'Y'
,
'K'
,
'J'
,
'Z'
,
'X'
,
'Q'
,
';'
,
')'
,
'('
,
'~'
,
'['
,
']'
,
'$'
,
'!'
,
'\''
,
'*'
,
'@'
,
'\x00'
,
'\x01'
,
'\x02'
,
'\x03'
,
'\x04'
,
'\x05'
,
'\x06'
,
'\x07'
,
'\x08'
,
'\t'
,
'\n'
,
'\x0b'
,
'\x0c'
,
'\r'
,
'\x0e'
,
'\x0f'
,
'\x10'
,
'\x11'
,
'\x12'
,
'\x13'
,
'\x14'
,
'\x15'
,
'\x16'
,
'\x17'
,
'\x18'
,
'\x19'
,
'\x1a'
,
'\x1b'
,
'\x1c'
,
'\x1d'
,
'\x1e'
,
'\x1f'
,
' '
,
'"'
,
'#'
,
'<'
,
'>'
,
'\\'
,
'^'
,
'`'
,
'{'
,
'|'
,
'}'
,
'\x7f'
,
'\x80'
,
'\x81'
,
'\x82'
,
'\x83'
,
'\x84'
,
'\x85'
,
'\x86'
,
'\x87'
,
'\x88'
,
'\x89'
,
'\x8a'
,
'\x8b'
,
'\x8c'
,
'\x8d'
,
'\x8e'
,
'\x8f'
,
'\x90'
,
'\x91'
,
'\x92'
,
'\x93'
,
'\x94'
,
'\x95'
,
'\x96'
,
'\x97'
,
'\x98'
,
'\x99'
,
'\x9a'
,
'\x9b'
,
'\x9c'
,
'\x9d'
,
'\x9e'
,
'\x9f'
,
'\xa0'
,
'\xa1'
,
'\xa2'
,
'\xa3'
,
'\xa4'
,
'\xa5'
,
'\xa6'
,
'\xa7'
,
'\xa8'
,
'\xa9'
,
'\xaa'
,
'\xab'
,
'\xac'
,
'\xad'
,
'\xae'
,
'\xaf'
,
'\xb0'
,
'\xb1'
,
'\xb2'
,
'\xb3'
,
'\xb4'
,
'\xb5'
,
'\xb6'
,
'\xb7'
,
'\xb8'
,
'\xb9'
,
'\xba'
,
'\xbb'
,
'\xbc'
,
'\xbd'
,
'\xbe'
,
'\xbf'
,
'\xc0'
,
'\xc1'
,
'\xc2'
,
'\xc3'
,
'\xc4'
,
'\xc5'
,
'\xc6'
,
'\xc7'
,
'\xc8'
,
'\xc9'
,
'\xca'
,
'\xcb'
,
'\xcc'
,
'\xcd'
,
'\xce'
,
'\xcf'
,
'\xd0'
,
'\xd1'
,
'\xd2'
,
'\xd3'
,
'\xd4'
,
'\xd5'
,
'\xd6'
,
'\xd7'
,
'\xd8'
,
'\xd9'
,
'\xda'
,
'\xdb'
,
'\xdc'
,
'\xdd'
,
'\xde'
,
'\xdf'
,
'\xe0'
,
'\xe1'
,
'\xe2'
,
'\xe3'
,
'\xe4'
,
'\xe5'
,
'\xe6'
,
'\xe7'
,
'\xe8'
,
'\xe9'
,
'\xea'
,
'\xeb'
,
'\xec'
,
'\xed'
,
'\xee'
,
'\xef'
,
'\xf0'
,
'\xf1'
,
'\xf2'
,
'\xf3'
,
'\xf4'
,
'\xf5'
,
'\xf6'
,
'\xf7'
,
'\xf8'
,
'\xf9'
,
'\xfa'
,
'\xfb'
,
'\xfc'
,
'\xfd'
,
'\xfe'
,
'\xff'
,
};
source/libs/index/src/index_fst_node.c
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_node.h"
FstBuilderNode
*
fstBuilderNodeDefault
()
{
FstBuilderNode
*
bn
=
malloc
(
sizeof
(
FstBuilderNode
));
bn
->
isFinal
=
false
;
bn
->
finalOutput
=
0
;
bn
->
trans
=
NULL
;
return
bn
;
}
FstBuilderNode
*
fstBuilderNodeClone
(
FstBuilderNode
*
src
)
{
FstBuilderNode
*
node
=
malloc
(
sizeof
(
FstBuilderNode
));
if
(
node
==
NULL
)
{
return
NULL
;
}
size_t
sz
=
taosArrayGetSize
(
src
->
trans
);
SArray
*
trans
=
taosArrayInit
(
sz
,
sizeof
(
FstTransition
));
for
(
size_t
i
=
0
;
i
<
sz
;
i
++
)
{
FstTransition
*
tran
=
taosArrayGet
(
src
->
trans
,
i
);
FstTransition
t
=
*
tran
;
taosArrayPush
(
trans
,
&
t
);
}
node
->
trans
=
trans
;
node
->
isFinal
=
src
->
isFinal
;
node
->
finalOutput
=
src
->
finalOutput
;
return
node
;
}
// not destroy src, User's bussiness
void
fstBuilderNodeCloneFrom
(
FstBuilderNode
*
dst
,
FstBuilderNode
*
src
)
{
if
(
dst
==
NULL
||
src
==
NULL
)
{
return
;
}
dst
->
isFinal
=
src
->
isFinal
;
dst
->
finalOutput
=
src
->
finalOutput
;
dst
->
trans
=
src
->
trans
;
src
->
trans
=
NULL
;
}
source/libs/index/src/index_fst_registry.c
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_registry.h"
uint64_t
fstRegistryHash
(
FstRegistry
*
registry
,
FstBuilderNode
*
bNode
)
{
//TODO(yihaoDeng): refactor later
const
uint64_t
FNV_PRIME
=
1099511628211
;
uint64_t
h
=
14695981039346656037u
;
h
=
(
h
^
(
uint64_t
)
bNode
->
isFinal
)
*
FNV_PRIME
;
h
=
(
h
^
(
bNode
)
->
finalOutput
)
*
FNV_PRIME
;
uint32_t
sz
=
(
uint32_t
)
taosArrayGetSize
(
bNode
->
trans
);
for
(
uint32_t
i
=
0
;
i
<
sz
;
i
++
)
{
FstTransition
*
trn
=
taosArrayGet
(
bNode
->
trans
,
i
);
h
=
(
h
^
(
uint64_t
)(
trn
->
inp
))
*
FNV_PRIME
;
h
=
(
h
^
(
uint64_t
)(
trn
->
out
))
*
FNV_PRIME
;
h
=
(
h
^
(
uint64_t
)(
trn
->
addr
))
*
FNV_PRIME
;
}
return
h
%
(
registry
->
tableSize
);
}
static
void
fstRegistryCellSwap
(
SArray
*
arr
,
uint32_t
a
,
uint32_t
b
)
{
size_t
sz
=
taosArrayGetSize
(
arr
);
if
(
a
>=
sz
||
b
>=
sz
)
{
return
;
}
FstRegistryCell
*
cell1
=
(
FstRegistryCell
*
)
taosArrayGet
(
arr
,
a
);
FstRegistryCell
*
cell2
=
(
FstRegistryCell
*
)
taosArrayGet
(
arr
,
b
);
FstRegistryCell
t
=
{.
addr
=
cell1
->
addr
,
.
node
=
cell1
->
node
};
cell1
->
addr
=
cell2
->
addr
;
cell1
->
node
=
cell2
->
node
;
cell2
->
addr
=
t
.
addr
;
cell2
->
node
=
t
.
node
;
return
;
}
static
void
fstRegistryCellPromote
(
SArray
*
arr
,
uint32_t
start
,
uint32_t
end
)
{
size_t
sz
=
taosArrayGetSize
(
arr
);
if
(
start
>=
sz
&&
end
>=
sz
)
{
return
;
}
assert
(
start
>=
end
);
int32_t
s
=
(
int32_t
)
start
;
int32_t
e
=
(
int32_t
)
end
;
while
(
s
>
e
)
{
fstRegistryCellSwap
(
arr
,
s
-
1
,
s
);
s
-=
1
;
}
}
#define FST_REGISTRY_CELL_IS_EMPTY(cell) (cell->addr == NONE_ADDRESS)
#define FST_REGISTRY_CELL_INSERT(cell, addr) do {cell->addr = addr;} while(0)
FstRegistry
*
fstRegistryCreate
(
uint64_t
tableSize
,
uint64_t
mruSize
)
{
FstRegistry
*
registry
=
malloc
(
sizeof
(
FstRegistry
));
if
(
registry
==
NULL
)
{
return
NULL
;}
uint64_t
nCells
=
tableSize
*
mruSize
;
SArray
*
tb
=
(
SArray
*
)
taosArrayInit
(
nCells
,
sizeof
(
FstRegistryCell
));
for
(
uint64_t
i
=
0
;
i
<
nCells
;
i
++
)
{
FstRegistryCell
*
cell
=
taosArrayGet
(
tb
,
i
);
cell
->
addr
=
NONE_ADDRESS
;
cell
->
node
=
fstBuilderNodeDefault
();
}
registry
->
table
=
tb
;
registry
->
tableSize
=
tableSize
;
registry
->
mruSize
=
mruSize
;
return
registry
;
}
FstRegistryEntry
*
fstRegistryGetEntry
(
FstRegistry
*
registry
,
FstBuilderNode
*
bNode
)
{
if
(
taosArrayGetSize
(
registry
->
table
)
<=
0
)
{
return
NULL
;
}
uint64_t
bucket
=
fstRegistryHash
(
registry
,
bNode
);
uint64_t
start
=
registry
->
mruSize
*
bucket
;
uint64_t
end
=
start
+
registry
->
mruSize
;
FstRegistryEntry
*
entry
=
malloc
(
sizeof
(
FstRegistryEntry
));
if
(
end
-
start
==
1
)
{
FstRegistryCell
*
cell
=
taosArrayGet
(
registry
->
table
,
start
);
//cell->isNode &&
if
(
cell
->
addr
!=
NONE_ADDRESS
&&
cell
->
node
==
bNode
)
{
entry
->
state
=
FOUND
;
entry
->
addr
=
cell
->
addr
;
return
entry
;
}
else
{
// clone from bNode, refactor later
//
fstBuilderNodeCloneFrom
(
cell
->
node
,
bNode
);
entry
->
state
=
NOTFOUND
;
entry
->
cell
=
cell
;
// copy or not
}
}
else
if
(
end
-
start
==
2
)
{
FstRegistryCell
*
cell1
=
taosArrayGet
(
registry
->
table
,
start
);
if
(
cell1
->
addr
!=
NONE_ADDRESS
&&
cell1
->
node
==
bNode
)
{
entry
->
state
=
FOUND
;
entry
->
addr
=
cell1
->
addr
;
return
entry
;
}
FstRegistryCell
*
cell2
=
taosArrayGet
(
registry
->
table
,
start
+
1
);
if
(
cell2
->
addr
!=
NONE_ADDRESS
&&
cell2
->
node
==
bNode
)
{
entry
->
state
=
FOUND
;
entry
->
addr
=
cell2
->
addr
;
// must swap here
fstRegistryCellSwap
(
registry
->
table
,
start
,
start
+
1
);
return
entry
;
}
//clone from bNode, refactor later
fstBuilderNodeCloneFrom
(
cell2
->
node
,
bNode
);
fstRegistryCellSwap
(
registry
->
table
,
start
,
start
+
1
);
FstRegistryCell
*
cCell
=
taosArrayGet
(
registry
->
table
,
start
);
entry
->
state
=
NOTFOUND
;
entry
->
cell
=
cCell
;
}
else
{
uint32_t
i
=
start
;
for
(;
i
<
end
;
i
++
)
{
FstRegistryCell
*
cell
=
(
FstRegistryCell
*
)
taosArrayGet
(
registry
->
table
,
i
);
if
(
cell
->
addr
!=
NONE_ADDRESS
&&
cell
->
node
==
bNode
)
{
entry
->
state
=
FOUND
;
entry
->
addr
=
cell
->
addr
;
fstRegistryCellPromote
(
registry
->
table
,
i
,
start
);
break
;
}
}
if
(
i
>=
end
)
{
uint64_t
last
=
end
-
1
;
FstRegistryCell
*
cell
=
(
FstRegistryCell
*
)
taosArrayGet
(
registry
->
table
,
last
);
//clone from bNode, refactor later
fstBuilderNodeCloneFrom
(
cell
->
node
,
bNode
);
fstRegistryCellPromote
(
registry
->
table
,
last
,
start
);
FstRegistryCell
*
cCell
=
taosArrayGet
(
registry
->
table
,
start
);
entry
->
state
=
NOTFOUND
;
entry
->
cell
=
cCell
;
}
}
return
entry
;
}
source/libs/index/src/index_fst_util.c
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index_fst_util.h"
//A sentinel value used to indicate an empty final state
const
CompiledAddr
EMPTY_ADDRESS
=
0
;
/// A sentinel value used to indicate an invalid state.
const
CompiledAddr
NONE_ADDRESS
=
1
;
// This version number is written to every finite state transducer created by
// this crate. When a finite state transducer is read, its version number is
// checked against this value.
const
uint64_t
version
=
3
;
// The threshold (in number of transitions) at which an index is created for
// a node's transitions. This speeds up lookup time at the expense of FST size
const
uint64_t
TRANS_INDEX_THRESHOLD
=
32
;
//uint8_t commonInput(uint8_t idx) {
// if (idx == 0) { return -1; }
// else {
// return COMMON_INPUTS_INV[idx - 1];
// }
//}
//
//uint8_t commonIdx(uint8_t v, uint8_t max) {
// uint8_t v = ((uint16_t)tCOMMON_INPUTS[v] + 1)%256;
// return v > max ? 0: v;
//}
uint8_t
packSize
(
uint64_t
n
)
{
if
(
n
<
(
1u
<<
8
))
{
return
1
;
}
else
if
(
n
<
(
1u
<<
16
))
{
return
2
;
}
else
if
(
n
<
(
1u
<<
24
))
{
return
3
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
32
))
{
return
4
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
40
))
{
return
5
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
48
))
{
return
6
;
}
else
if
(
n
<
((
uint64_t
)(
1
)
<<
56
))
{
return
7
;
}
else
{
return
8
;
}
}
uint64_t
unpackUint64
(
uint8_t
*
ch
,
uint8_t
sz
)
{
uint64_t
n
;
for
(
uint8_t
i
=
0
;
i
<
sz
;
i
++
)
{
n
=
n
|
(
ch
[
i
]
<<
(
8
*
i
));
}
return
n
;
}
uint8_t
packDeltaSize
(
CompiledAddr
nodeAddr
,
CompiledAddr
transAddr
)
{
if
(
transAddr
==
EMPTY_ADDRESS
)
{
return
packSize
(
EMPTY_ADDRESS
);
}
else
{
return
packSize
(
nodeAddr
-
transAddr
);
}
}
CompiledAddr
unpackDelta
(
char
*
data
,
uint64_t
len
,
uint64_t
nodeAddr
)
{
uint64_t
delta
=
unpackUint64
(
data
,
len
);
// delta_add = u64_to_usize
if
(
delta
==
EMPTY_ADDRESS
)
{
return
EMPTY_ADDRESS
;
}
else
{
return
nodeAddr
-
delta
;
}
}
// fst slice func
FstSlice
fstSliceCreate
(
uint8_t
*
data
,
uint64_t
dLen
)
{
FstSlice
slice
=
{.
data
=
data
,
.
dLen
=
dLen
,
.
start
=
0
,
.
end
=
dLen
-
1
};
return
slice
;
}
FstSlice
fstSliceCopy
(
FstSlice
*
slice
,
uint32_t
start
,
uint32_t
end
)
{
FstSlice
t
;
if
(
start
>=
slice
->
dLen
||
end
>=
slice
->
dLen
||
start
>
end
)
{
t
.
data
=
NULL
;
return
t
;
};
t
.
data
=
slice
->
data
;
t
.
dLen
=
slice
->
dLen
;
t
.
start
=
start
;
t
.
end
=
end
;
return
t
;
}
bool
fstSliceEmpty
(
FstSlice
*
slice
)
{
return
slice
->
data
==
NULL
||
slice
->
dLen
<=
0
;
}
source/libs/index/test/CMakeLists.txt
0 → 100644
浏览文件 @
9188298e
add_executable
(
indexTest
""
)
target_sources
(
indexTest
PRIVATE
"../src/index.c"
"indexTests.cpp"
)
target_include_directories
(
indexTest
PUBLIC
"
${
CMAKE_SOURCE_DIR
}
/include/libs/index"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/../inc"
)
target_link_libraries
(
indexTest
os
util
common
gtest_main
index
)
add_test
(
NAME index_test
COMMAND indexTest
)
source/libs/index/test/indexTests.cpp
浏览文件 @
9188298e
#include <gtest/gtest.h>
#include <string>
#include <iostream>
#include "index.h"
#include "indexInt.h"
TEST
(
IndexTest
,
index_create_test
)
{
SIndexOpts
*
opts
=
indexOptsCreate
();
SIndex
*
index
=
indexOpen
(
opts
,
"./test"
);
if
(
index
==
NULL
)
{
std
::
cout
<<
"index open failed"
<<
std
::
endl
;
}
// write
for
(
int
i
=
0
;
i
<
100000
;
i
++
)
{
SIndexMultiTerm
*
terms
=
indexMultiTermCreate
();
std
::
string
val
=
"field"
;
indexMultiTermAdd
(
terms
,
"tag1"
,
strlen
(
"tag1"
),
val
.
c_str
(),
val
.
size
());
val
.
append
(
std
::
to_string
(
i
));
indexMultiTermAdd
(
terms
,
"tag2"
,
strlen
(
"tag2"
),
val
.
c_str
(),
val
.
size
());
val
.
insert
(
0
,
std
::
to_string
(
i
));
indexMultiTermAdd
(
terms
,
"tag3"
,
strlen
(
"tag3"
),
val
.
c_str
(),
val
.
size
());
val
.
append
(
"const"
);
indexMultiTermAdd
(
terms
,
"tag4"
,
strlen
(
"tag4"
),
val
.
c_str
(),
val
.
size
());
indexPut
(
index
,
terms
,
i
);
indexMultiTermDestroy
(
terms
);
}
// query
SIndexMultiTermQuery
*
multiQuery
=
indexMultiTermQueryCreate
(
MUST
);
indexMultiTermQueryAdd
(
multiQuery
,
"tag1"
,
strlen
(
"tag1"
),
"field"
,
strlen
(
"field"
),
QUERY_PREFIX
);
indexMultiTermQueryAdd
(
multiQuery
,
"tag3"
,
strlen
(
"tag3"
),
"0field0"
,
strlen
(
"0field0"
),
QUERY_TERM
);
SArray
*
result
=
(
SArray
*
)
taosArrayInit
(
10
,
sizeof
(
int
));
indexSearch
(
index
,
multiQuery
,
result
);
std
::
cout
<<
"taos'size : "
<<
taosArrayGetSize
(
result
)
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
taosArrayGetSize
(
result
);
i
++
)
{
int
*
v
=
(
int
*
)
taosArrayGet
(
result
,
i
);
std
::
cout
<<
"value --->"
<<
*
v
<<
std
::
endl
;
}
indexMultiTermQueryDestroy
(
multiQuery
);
indexOptsDestroy
(
opts
);
indexClose
(
index
);
//
}
source/libs/sync/inc/raft.h
浏览文件 @
9188298e
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
#include "sync.h"
#include "sync.h"
#include "sync_type.h"
#include "sync_type.h"
#include "thash.h"
#include "raft_message.h"
#include "raft_message.h"
#include "sync_raft_impl.h"
#include "sync_raft_impl.h"
#include "sync_raft_quorum.h"
#include "sync_raft_quorum.h"
...
@@ -43,9 +44,9 @@ struct SSyncRaft {
...
@@ -43,9 +44,9 @@ struct SSyncRaft {
// owner sync node
// owner sync node
SSyncNode
*
pNode
;
SSyncNode
*
pNode
;
SSyncCluster
cluster
;
// hash map nodeId -> SNodeInfo*
SHashObj
*
nodeInfoMap
;
int
selfIndex
;
SyncNodeId
selfId
;
SyncNodeId
selfId
;
SyncGroupId
selfGroupId
;
SyncGroupId
selfGroupId
;
...
...
source/libs/sync/inc/raft_log.h
浏览文件 @
9188298e
...
@@ -39,8 +39,6 @@ struct SSyncRaftLog {
...
@@ -39,8 +39,6 @@ struct SSyncRaftLog {
SyncIndex
commitIndex
;
SyncIndex
commitIndex
;
SyncIndex
appliedIndex
;
SyncIndex
appliedIndex
;
};
};
SSyncRaftLog
*
syncRaftLogOpen
();
SSyncRaftLog
*
syncRaftLogOpen
();
...
...
source/libs/sync/inc/raft_replication.h
浏览文件 @
9188298e
...
@@ -20,11 +20,11 @@
...
@@ -20,11 +20,11 @@
#include "syncInt.h"
#include "syncInt.h"
#include "sync_type.h"
#include "sync_type.h"
// syncRaft
Replicate
sends an append RPC with new entries to the given peer,
// syncRaft
MaybeSendAppend
sends an append RPC with new entries to the given peer,
// if necessary. Returns true if a message was sent. The sendIfEmpty
// if necessary. Returns true if a message was sent. The sendIfEmpty
// argument controls whether messages with no entries will be sent
// argument controls whether messages with no entries will be sent
// ("empty" messages are useful to convey updated Commit indexes, but
// ("empty" messages are useful to convey updated Commit indexes, but
// are undesirable when we're sending multiple messages in a batch).
// are undesirable when we're sending multiple messages in a batch).
bool
syncRaft
Replicate
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
,
bool
sendIfEmpty
);
bool
syncRaft
MaybeSendAppend
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
,
bool
sendIfEmpty
);
#endif
/* TD_SYNC_RAFT_REPLICATION_H */
#endif
/* TD_SYNC_RAFT_REPLICATION_H */
source/libs/sync/
src/raft_configuration.c
→
source/libs/sync/
inc/sync_const.h
浏览文件 @
9188298e
...
@@ -13,13 +13,13 @@
...
@@ -13,13 +13,13 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
*/
#i
nclude "raft_configuration.h"
#i
fndef _TD_LIBS_SYNC_CONST_H
#
include "raft.h"
#
define _TD_LIBS_SYNC_CONST_H
int
syncRaftConfigurationIndexOfNode
(
SSyncRaft
*
pRaft
,
SyncNodeId
id
)
{
#include "sync.h"
return
(
int
)(
id
);
}
int
syncRaftConfigurationVoterCount
(
SSyncRaft
*
pRaft
)
{
static
int
kSyncRaftMaxInflghtMsgs
=
20
;
return
pRaft
->
cluster
.
replica
;
}
static
SyncIndex
kMaxCommitIndex
=
UINT64_MAX
;
\ No newline at end of file
#endif
/* _TD_LIBS_SYNC_CONST_H */
source/libs/sync/inc/sync_raft_config_change.h
浏览文件 @
9188298e
...
@@ -33,6 +33,11 @@ struct SSyncRaftChanger {
...
@@ -33,6 +33,11 @@ struct SSyncRaftChanger {
typedef
int
(
*
configChangeFp
)(
SSyncRaftChanger
*
changer
,
const
SSyncConfChangeSingleArray
*
css
,
typedef
int
(
*
configChangeFp
)(
SSyncRaftChanger
*
changer
,
const
SSyncConfChangeSingleArray
*
css
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
);
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
);
// Simple carries out a series of configuration changes that (in aggregate)
// mutates the incoming majority config Voters[0] by at most one. This method
// will return an error if that is not the case, if the resulting quorum is
// zero, or if the configuration is in a joint state (i.e. if there is an
// outgoing configuration).
int
syncRaftChangerSimpleConfig
(
SSyncRaftChanger
*
changer
,
const
SSyncConfChangeSingleArray
*
css
,
int
syncRaftChangerSimpleConfig
(
SSyncRaftChanger
*
changer
,
const
SSyncConfChangeSingleArray
*
css
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
);
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
);
...
...
source/libs/sync/inc/sync_raft_impl.h
浏览文件 @
9188298e
...
@@ -28,6 +28,8 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft);
...
@@ -28,6 +28,8 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft);
void
syncRaftStartElection
(
SSyncRaft
*
pRaft
,
ESyncRaftElectionType
cType
);
void
syncRaftStartElection
(
SSyncRaft
*
pRaft
,
ESyncRaftElectionType
cType
);
void
syncRaftCampaign
(
SSyncRaft
*
pRaft
,
ESyncRaftElectionType
cType
);
void
syncRaftTriggerHeartbeat
(
SSyncRaft
*
pRaft
);
void
syncRaftTriggerHeartbeat
(
SSyncRaft
*
pRaft
);
void
syncRaftRandomizedElectionTimeout
(
SSyncRaft
*
pRaft
);
void
syncRaftRandomizedElectionTimeout
(
SSyncRaft
*
pRaft
);
...
@@ -51,4 +53,6 @@ void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState);
...
@@ -51,4 +53,6 @@ void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState);
void
syncRaftBroadcastAppend
(
SSyncRaft
*
pRaft
);
void
syncRaftBroadcastAppend
(
SSyncRaft
*
pRaft
);
SNodeInfo
*
syncRaftGetNodeById
(
SSyncRaft
*
pRaft
,
SyncNodeId
id
);
#endif
/* _TD_LIBS_SYNC_RAFT_IMPL_H */
#endif
/* _TD_LIBS_SYNC_RAFT_IMPL_H */
source/libs/sync/inc/sync_raft_inflights.h
浏览文件 @
9188298e
...
@@ -18,54 +18,47 @@
...
@@ -18,54 +18,47 @@
#include "sync.h"
#include "sync.h"
/**
// Inflights limits the number of MsgApp (represented by the largest index
* SSyncRaftInflights limits the number of MsgApp (represented by the largest index
// contained within) sent to followers but not yet acknowledged by them. Callers
* contained within) sent to followers but not yet acknowledged by them. Callers
// use Full() to check whether more messages can be sent, call Add() whenever
* use syncRaftInflightFull() to check whether more messages can be sent,
// they are sending a new append, and release "quota" via FreeLE() whenever an
* call syncRaftInflightAdd() whenever they are sending a new append,
// ack is received.
* and release "quota" via FreeLE() whenever an ack is received.
**/
typedef
struct
SSyncRaftInflights
{
typedef
struct
SSyncRaftInflights
{
/
* the starting index in the buffer */
/
/ the starting index in the buffer
int
start
;
int
start
;
/
* number of inflights in the buffer */
/
/ number of inflights in the buffer
int
count
;
int
count
;
/
* the size of the buffer */
/
/ the size of the buffer
int
size
;
int
size
;
/**
// buffer contains the index of the last entry
* buffer contains the index of the last entry
// inside one message.
* inside one message.
**/
SyncIndex
*
buffer
;
SyncIndex
*
buffer
;
}
SSyncRaftInflights
;
}
SSyncRaftInflights
;
SSyncRaftInflights
*
syncRaftOpenInflights
(
int
size
);
SSyncRaftInflights
*
syncRaftOpenInflights
(
int
size
);
void
syncRaftCloseInflights
(
SSyncRaftInflights
*
);
void
syncRaftCloseInflights
(
SSyncRaftInflights
*
);
// reset frees all inflights.
static
FORCE_INLINE
void
syncRaftInflightReset
(
SSyncRaftInflights
*
inflights
)
{
static
FORCE_INLINE
void
syncRaftInflightReset
(
SSyncRaftInflights
*
inflights
)
{
inflights
->
count
=
0
;
inflights
->
count
=
0
;
inflights
->
start
=
0
;
inflights
->
start
=
0
;
}
}
// Full returns true if no more messages can be sent at the moment.
static
FORCE_INLINE
bool
syncRaftInflightFull
(
SSyncRaftInflights
*
inflights
)
{
static
FORCE_INLINE
bool
syncRaftInflightFull
(
SSyncRaftInflights
*
inflights
)
{
return
inflights
->
count
==
inflights
->
size
;
return
inflights
->
count
==
inflights
->
size
;
}
}
/**
// Add notifies the Inflights that a new message with the given index is being
* syncRaftInflightAdd notifies the Inflights that a new message with the given index is being
// dispatched. Full() must be called prior to Add() to verify that there is room
* dispatched. syncRaftInflightFull() must be called prior to syncRaftInflightAdd()
// for one more message, and consecutive calls to add Add() must provide a
* to verify that there is room for one more message,
// monotonic sequence of indexes.
* and consecutive calls to add syncRaftInflightAdd() must provide a
* monotonic sequence of indexes.
**/
void
syncRaftInflightAdd
(
SSyncRaftInflights
*
inflights
,
SyncIndex
inflightIndex
);
void
syncRaftInflightAdd
(
SSyncRaftInflights
*
inflights
,
SyncIndex
inflightIndex
);
/**
// FreeLE frees the inflights smaller or equal to the given `to` flight.
* syncRaftInflightFreeLE frees the inflights smaller or equal to the given `to` flight.
**/
void
syncRaftInflightFreeLE
(
SSyncRaftInflights
*
inflights
,
SyncIndex
toIndex
);
void
syncRaftInflightFreeLE
(
SSyncRaftInflights
*
inflights
,
SyncIndex
toIndex
);
/**
/**
...
...
source/libs/sync/inc/sync_raft_node_map.h
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_LIBS_SYNC_RAFT_NODE_MAP_H
#define _TD_LIBS_SYNC_RAFT_NODE_MAP_H
#include "thash.h"
#include "sync.h"
#include "sync_type.h"
struct
SSyncRaftNodeMap
{
SHashObj
*
nodeIdMap
;
};
void
syncRaftInitNodeMap
(
SSyncRaftNodeMap
*
nodeMap
);
void
syncRaftFreeNodeMap
(
SSyncRaftNodeMap
*
nodeMap
);
void
syncRaftClearNodeMap
(
SSyncRaftNodeMap
*
nodeMap
);
bool
syncRaftIsInNodeMap
(
const
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
);
void
syncRaftCopyNodeMap
(
SSyncRaftNodeMap
*
from
,
SSyncRaftNodeMap
*
to
);
void
syncRaftUnionNodeMap
(
SSyncRaftNodeMap
*
nodeMap
,
SSyncRaftNodeMap
*
to
);
void
syncRaftAddToNodeMap
(
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
);
void
syncRaftRemoveFromNodeMap
(
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
);
int32_t
syncRaftNodeMapSize
(
const
SSyncRaftNodeMap
*
nodeMap
);
// return true if reach the end
bool
syncRaftIterateNodeMap
(
const
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
*
pId
);
bool
syncRaftIsAllNodeInProgressMap
(
SSyncRaftNodeMap
*
nodeMap
,
SSyncRaftProgressMap
*
progressMap
);
#endif
/* _TD_LIBS_SYNC_RAFT_NODE_MAP_H */
\ No newline at end of file
source/libs/sync/inc/sync_raft_progress.h
浏览文件 @
9188298e
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
#include "sync_type.h"
#include "sync_type.h"
#include "sync_raft_inflights.h"
#include "sync_raft_inflights.h"
#include "thash.h"
/**
/**
* State defines how the leader should interact with the follower.
* State defines how the leader should interact with the follower.
...
@@ -64,141 +65,123 @@ static const char* kProgressStateString[] = {
...
@@ -64,141 +65,123 @@ static const char* kProgressStateString[] = {
"Snapshot"
,
"Snapshot"
,
};
};
/**
// Progress represents a follower’s progress in the view of the leader. Leader
* Progress represents a follower’s progress in the view of the leader. Leader maintains
// maintains progresses of all followers, and sends entries to the follower
* progresses of all followers, and sends entries to the follower based on its progress.
// based on its progress.
**/
//
// NB(tbg): Progress is basically a state machine whose transitions are mostly
// strewn around `*raft.raft`. Additionally, some fields are only used when in a
// certain State. All of this isn't ideal.
struct
SSyncRaftProgress
{
struct
SSyncRaftProgress
{
// index in raft cluster config
SyncGroupId
groupId
;
int
selfIndex
;
SyncNodeId
id
;
SyncNodeId
id
;
int16_t
refCount
;
SyncIndex
nextIndex
;
SyncIndex
nextIndex
;
SyncIndex
matchIndex
;
SyncIndex
matchIndex
;
/**
// State defines how the leader should interact with the follower.
* State defines how the leader should interact with the follower.
//
*
// When in StateProbe, leader sends at most one replication message
* When in StateProbe, leader sends at most one replication message
// per heartbeat interval. It also probes actual progress of the follower.
* per heartbeat interval. It also probes actual progress of the follower.
//
*
// When in StateReplicate, leader optimistically increases next
* When in StateReplicate, leader optimistically increases next
// to the latest entry sent after sending replication message. This is
* to the latest entry sent after sending replication message. This is
// an optimized state for fast replicating log entries to the follower.
* an optimized state for fast replicating log entries to the follower.
//
*
// When in StateSnapshot, leader should have sent out snapshot
* When in StateSnapshot, leader should have sent out snapshot
// before and stops sending any replication message.
* before and stops sending any replication message.
**/
ESyncRaftProgressState
state
;
ESyncRaftProgressState
state
;
/**
// PendingSnapshot is used in StateSnapshot.
* pendingSnapshotIndex is used in PROGRESS_STATE_SNAPSHOT.
// If there is a pending snapshot, the pendingSnapshot will be set to the
* If there is a pending snapshot, the pendingSnapshotIndex will be set to the
// index of the snapshot. If pendingSnapshot is set, the replication process of
* index of the snapshot. If pendingSnapshotIndex is set, the replication process of
// this Progress will be paused. raft will not resend snapshot until the pending one
* this Progress will be paused. raft will not resend snapshot until the pending one
// is reported to be failed.
* is reported to be failed.
**/
SyncIndex
pendingSnapshotIndex
;
SyncIndex
pendingSnapshotIndex
;
/
**
/
/ RecentActive is true if the progress is recently active. Receiving any messages
* recentActive is true if the progress is recently active. Receiving any messages
// from the corresponding follower indicates the progress is active.
* from the corresponding follower indicates the progress is active
.
// RecentActive can be reset to false after an election timeout
.
* RecentActive can be reset to false after an election timeout.
//
**/
// TODO(tbg): the leader should always have this set to true.
bool
recentActive
;
bool
recentActive
;
/**
// ProbeSent is used while this follower is in StateProbe. When ProbeSent is
* probeSent is used while this follower is in StateProbe. When probeSent is
// true, raft should pause sending replication message to this peer until
* true, raft should pause sending replication message to this peer until
// ProbeSent is reset. See ProbeAcked() and IsPaused().
* probeSent is reset. See ProbeAcked() and IsPaused().
**/
bool
probeSent
;
bool
probeSent
;
/**
// Inflights is a sliding window for the inflight messages.
* inflights is a sliding window for the inflight messages.
// Each inflight message contains one or more log entries.
* Each inflight message contains one or more log entries.
// The max number of entries per message is defined in raft config as MaxSizePerMsg.
* The max number of entries per message is defined in raft config as MaxSizePerMsg.
// Thus inflight effectively limits both the number of inflight messages
* Thus inflight effectively limits both the number of inflight messages
// and the bandwidth each Progress can use.
* and the bandwidth each Progress can use.
// When inflights is Full, no more message should be sent.
* When inflights is Full, no more message should be sent.
// When a leader sends out a message, the index of the last
* When a leader sends out a message, the index of the last
// entry should be added to inflights. The index MUST be added
* entry should be added to inflights. The index MUST be added
// into inflights in order.
* into inflights in order.
// When a leader receives a reply, the previous inflights should
* When a leader receives a reply, the previous inflights should
// be freed by calling inflights.FreeLE with the index of the last
* be freed by calling inflights.FreeLE with the index of the last
// received entry.
* received entry.
**/
SSyncRaftInflights
*
inflights
;
SSyncRaftInflights
*
inflights
;
/**
// IsLearner is true if this progress is tracked for a learner.
* IsLearner is true if this progress is tracked for a learner.
**/
bool
isLearner
;
bool
isLearner
;
};
};
struct
SSyncRaftProgressMap
{
struct
SSyncRaftProgressMap
{
SSyncRaftProgress
progress
[
TSDB_MAX_REPLICA
];
// map nodeId -> SSyncRaftProgress*
SHashObj
*
progressMap
;
};
};
static
FORCE_INLINE
const
char
*
syncRaftProgressStateString
(
const
SSyncRaftProgress
*
progress
)
{
static
FORCE_INLINE
const
char
*
syncRaftProgressStateString
(
const
SSyncRaftProgress
*
progress
)
{
return
kProgressStateString
[
progress
->
state
];
return
kProgressStateString
[
progress
->
state
];
}
}
void
syncRaft
InitProgress
(
int
i
,
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
void
syncRaft
ResetProgress
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
/**
// BecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
* syncRaftProgressBecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
// optionally and if larger, the index of the pending snapshot.
* optionally and if larger, the index of the pending snapshot.
**/
void
syncRaftProgressBecomeProbe
(
SSyncRaftProgress
*
progress
);
void
syncRaftProgressBecomeProbe
(
SSyncRaftProgress
*
progress
);
/**
// BecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
* syncRaftProgressBecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
**/
void
syncRaftProgressBecomeReplicate
(
SSyncRaftProgress
*
progress
);
void
syncRaftProgressBecomeReplicate
(
SSyncRaftProgress
*
progress
);
/**
// MaybeUpdate is called when an MsgAppResp arrives from the follower, with the
* syncRaftProgressMaybeUpdate is called when an MsgAppResp arrives from the follower, with the
// index acked by it. The method returns false if the given n index comes from
* index acked by it. The method returns false if the given n index comes from
// an outdated message. Otherwise it updates the progress and returns true.
* an outdated message. Otherwise it updates the progress and returns true.
**/
bool
syncRaftProgressMaybeUpdate
(
SSyncRaftProgress
*
progress
,
SyncIndex
lastIndex
);
bool
syncRaftProgressMaybeUpdate
(
SSyncRaftProgress
*
progress
,
SyncIndex
lastIndex
);
/**
// OptimisticUpdate signals that appends all the way up to and including index n
* syncRaftProgressOptimisticNextIndex signals that appends all the way up to and including index n
// are in-flight. As a result, Next is increased to n+1.
* are in-flight. As a result, Next is increased to n+1.
**/
static
FORCE_INLINE
void
syncRaftProgressOptimisticNextIndex
(
SSyncRaftProgress
*
progress
,
SyncIndex
nextIndex
)
{
static
FORCE_INLINE
void
syncRaftProgressOptimisticNextIndex
(
SSyncRaftProgress
*
progress
,
SyncIndex
nextIndex
)
{
progress
->
nextIndex
=
nextIndex
+
1
;
progress
->
nextIndex
=
nextIndex
+
1
;
}
}
/**
// MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The
* syncRaftProgressMaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The
// arguments are the index of the append message rejected by the follower, and
* arguments are the index of the append message rejected by the follower, and
// the hint that we want to decrease to.
* the hint that we want to decrease to.
//
*
// Rejections can happen spuriously as messages are sent out of order or
* Rejections can happen spuriously as messages are sent out of order or
// duplicated. In such cases, the rejection pertains to an index that the
* duplicated. In such cases, the rejection pertains to an index that the
// Progress already knows were previously acknowledged, and false is returned
* Progress already knows were previously acknowledged, and false is returned
// without changing the Progress.
* without changing the Progress.
//
*
// If the rejection is genuine, Next is lowered sensibly, and the Progress is
* If the rejection is genuine, Next is lowered sensibly, and the Progress is
// cleared for sending log entries.
* cleared for sending log entries.
**/
bool
syncRaftProgressMaybeDecrTo
(
SSyncRaftProgress
*
progress
,
bool
syncRaftProgressMaybeDecrTo
(
SSyncRaftProgress
*
progress
,
SyncIndex
rejected
,
SyncIndex
matchHint
);
SyncIndex
rejected
,
SyncIndex
matchHint
);
/**
// IsPaused returns whether sending log entries to this node has been throttled.
* syncRaftProgressIsPaused returns whether sending log entries to this node has been throttled.
// This is done when a node has rejected recent MsgApps, is currently waiting
* This is done when a node has rejected recent MsgApps, is currently waiting
// for a snapshot, or has reached the MaxInflightMsgs limit. In normal
* for a snapshot, or has reached the MaxInflightMsgs limit. In normal
// operation, this is false. A throttled node will be contacted less frequently
* operation, this is false. A throttled node will be contacted less frequently
// until it has reached a state in which it's able to accept a steady stream of
* until it has reached a state in which it's able to accept a steady stream of
// log entries again.
* log entries again.
**/
bool
syncRaftProgressIsPaused
(
SSyncRaftProgress
*
progress
);
bool
syncRaftProgressIsPaused
(
SSyncRaftProgress
*
progress
);
static
FORCE_INLINE
SyncIndex
syncRaftProgressNextIndex
(
SSyncRaftProgress
*
progress
)
{
static
FORCE_INLINE
SyncIndex
syncRaftProgressNextIndex
(
SSyncRaftProgress
*
progress
)
{
...
@@ -221,22 +204,35 @@ static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progres
...
@@ -221,22 +204,35 @@ static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progres
return
progress
->
recentActive
;
return
progress
->
recentActive
;
}
}
int
syncRaftFindProgressIndexByNodeId
(
const
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
void
syncRaftInitProgressMap
(
SSyncRaftProgressMap
*
progressMap
);
void
syncRaftFreeProgressMap
(
SSyncRaftProgressMap
*
progressMap
);
void
syncRaftClearProgressMap
(
SSyncRaftProgressMap
*
progressMap
);
void
syncRaftCopyProgressMap
(
SSyncRaftProgressMap
*
from
,
SSyncRaftProgressMap
*
to
);
SSyncRaftProgress
*
syncRaftFindProgressByNodeId
(
const
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
int
syncRaftAddToProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
S
yncNodeId
id
);
int
syncRaftAddToProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
S
SyncRaftProgress
*
progress
);
void
syncRaftRemoveFromProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
void
syncRaftRemoveFromProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
bool
syncRaftIsInProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
/**
/**
* return true if progress's log is up-todate
* return true if progress's log is up-todate
**/
**/
bool
syncRaftProgressIsUptodate
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
bool
syncRaftProgressIsUptodate
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending
// snapshot index.
void
syncRaftProgressBecomeSnapshot
(
SSyncRaftProgress
*
progress
,
SyncIndex
snapshotIndex
);
void
syncRaftProgressBecomeSnapshot
(
SSyncRaftProgress
*
progress
,
SyncIndex
snapshotIndex
);
void
syncRaftCopyProgress
(
const
SSyncRaftProgress
*
from
,
SSyncRaftProgress
*
to
);
void
syncRaftCopyProgress
(
const
SSyncRaftProgress
*
from
,
SSyncRaftProgress
*
to
);
void
syncRaftProgressMapCopy
(
const
SSyncRaftProgressMap
*
from
,
SSyncRaftProgressMap
*
to
);
// return true if reach the end
bool
syncRaftIterateProgressMap
(
const
SSyncRaftProgressMap
*
progressMap
,
SSyncRaftProgress
*
pProgress
);
bool
syncRaftVisitProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
visitProgressFp
fp
,
void
*
arg
);
#if 0
#if 0
...
...
source/libs/sync/inc/sync_raft_progress_tracker.h
浏览文件 @
9188298e
...
@@ -21,7 +21,9 @@
...
@@ -21,7 +21,9 @@
#include "sync_raft_quorum_joint.h"
#include "sync_raft_quorum_joint.h"
#include "sync_raft_progress.h"
#include "sync_raft_progress.h"
#include "sync_raft_proto.h"
#include "sync_raft_proto.h"
#include "thash.h"
// Config reflects the configuration tracked in a ProgressTracker.
struct
SSyncRaftProgressTrackerConfig
{
struct
SSyncRaftProgressTrackerConfig
{
SSyncRaftQuorumJointConfig
voters
;
SSyncRaftQuorumJointConfig
voters
;
...
@@ -83,34 +85,47 @@ struct SSyncRaftProgressTracker {
...
@@ -83,34 +85,47 @@ struct SSyncRaftProgressTracker {
SSyncRaftProgressMap
progressMap
;
SSyncRaftProgressMap
progressMap
;
ESyncRaftVoteType
votes
[
TSDB_MAX_REPLICA
];
// nodeid -> ESyncRaftVoteType map
SHashObj
*
votesMap
;
int
maxInflightMsgs
;
int
maxInflightMsgs
;
SSyncRaft
*
pRaft
;
};
};
SSyncRaftProgressTracker
*
syncRaftOpenProgressTracker
();
SSyncRaftProgressTracker
*
syncRaftOpenProgressTracker
(
SSyncRaft
*
pRaft
);
void
syncRaftInitTrackConfig
(
SSyncRaftProgressTrackerConfig
*
config
);
void
syncRaftFreeTrackConfig
(
SSyncRaftProgressTrackerConfig
*
config
);
void
syncRaftFreeTrackConfig
(
SSyncRaftProgressTrackerConfig
*
config
);
// ResetVotes prepares for a new round of vote counting via recordVote.
void
syncRaftResetVotes
(
SSyncRaftProgressTracker
*
);
void
syncRaftResetVotes
(
SSyncRaftProgressTracker
*
);
typedef
void
(
*
visitProgressFp
)(
int
i
,
SSyncRaftProgress
*
progress
,
void
*
arg
);
void
syncRaftProgressVisit
(
SSyncRaftProgressTracker
*
,
visitProgressFp
visit
,
void
*
arg
);
void
syncRaftProgressVisit
(
SSyncRaftProgressTracker
*
,
visitProgressFp
visit
,
void
*
arg
);
/**
// RecordVote records that the node with the given id voted for this Raft
* syncRaftRecordVote records that the node with the given id voted for this Raft
// instance if v == true (and declined it otherwise).
* instance if v == true (and declined it otherwise).
void
syncRaftRecordVote
(
SSyncRaftProgressTracker
*
tracker
,
SyncNodeId
id
,
bool
grant
);
**/
void
syncRaftRecordVote
(
SSyncRaftProgressTracker
*
tracker
,
int
i
,
bool
grant
);
void
syncRaftC
loneTrackerConfig
(
const
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressTrackerConfig
*
result
);
void
syncRaftC
opyTrackerConfig
(
const
SSyncRaftProgressTrackerConfig
*
from
,
SSyncRaftProgressTrackerConfig
*
to
);
int
syncRaftCheck
Progress
(
const
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
);
int
syncRaftCheck
TrackerConfigInProgress
(
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
);
/**
// TallyVotes returns the number of granted and rejected Votes, and whether the
* syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the
// election outcome is known.
* election outcome is known.
**/
ESyncRaftVoteResult
syncRaftTallyVotes
(
SSyncRaftProgressTracker
*
tracker
,
int
*
rejected
,
int
*
granted
);
ESyncRaftVoteResult
syncRaftTallyVotes
(
SSyncRaftProgressTracker
*
tracker
,
int
*
rejected
,
int
*
granted
);
void
syncRaftConfigState
(
const
SSyncRaftProgressTracker
*
tracker
,
SSyncConfigState
*
cs
);
void
syncRaftConfigState
(
SSyncRaftProgressTracker
*
tracker
,
SSyncConfigState
*
cs
);
// Committed returns the largest log index known to be committed based on what
// the voting members of the group have acknowledged.
SyncIndex
syncRaftCommittedIndex
(
SSyncRaftProgressTracker
*
tracker
);
// QuorumActive returns true if the quorum is active from the view of the local
// raft state machine. Otherwise, it returns false.
bool
syncRaftQuorumActive
(
SSyncRaftProgressTracker
*
tracker
);
bool
syncRaftIsInNodeMap
(
const
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
);
bool
syncRaftIsInNodeMap
(
const
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
);
...
...
source/libs/sync/inc/sync_raft_proto.h
浏览文件 @
9188298e
...
@@ -17,6 +17,7 @@
...
@@ -17,6 +17,7 @@
#define TD_SYNC_RAFT_PROTO_H
#define TD_SYNC_RAFT_PROTO_H
#include "sync_type.h"
#include "sync_type.h"
#include "sync_raft_node_map.h"
typedef
enum
ESyncRaftConfChangeType
{
typedef
enum
ESyncRaftConfChangeType
{
SYNC_RAFT_Conf_AddNode
=
0
,
SYNC_RAFT_Conf_AddNode
=
0
,
...
@@ -58,4 +59,19 @@ typedef struct SSyncConfigState {
...
@@ -58,4 +59,19 @@ typedef struct SSyncConfigState {
bool
autoLeave
;
bool
autoLeave
;
}
SSyncConfigState
;
}
SSyncConfigState
;
static
FORCE_INLINE
bool
syncRaftConfArrayIsEmpty
(
const
SSyncConfChangeSingleArray
*
ary
)
{
return
ary
->
n
==
0
;
}
static
FORCE_INLINE
void
syncRaftInitConfArray
(
SSyncConfChangeSingleArray
*
ary
)
{
*
ary
=
(
SSyncConfChangeSingleArray
)
{
.
changes
=
NULL
,
.
n
=
0
,
};
}
static
FORCE_INLINE
void
syncRaftFreeConfArray
(
SSyncConfChangeSingleArray
*
ary
)
{
if
(
ary
->
changes
!=
NULL
)
free
(
ary
->
changes
);
}
#endif
/* TD_SYNC_RAFT_PROTO_H */
#endif
/* TD_SYNC_RAFT_PROTO_H */
source/libs/sync/inc/sync_raft_quorum_joint.h
浏览文件 @
9188298e
...
@@ -19,24 +19,31 @@
...
@@ -19,24 +19,31 @@
#include "taosdef.h"
#include "taosdef.h"
#include "sync.h"
#include "sync.h"
#include "sync_type.h"
#include "sync_type.h"
#include "sync_raft_node_map.h"
#include "thash.h"
/**
// JointConfig is a configuration of two groups of (possibly overlapping)
* SSyncRaftQuorumJointConfig is a configuration of two groups of (possibly overlapping)
// majority configurations. Decisions require the support of both majorities.
* majority configurations. Decisions require the support of both majorities.
**/
typedef
struct
SSyncRaftQuorumJointConfig
{
typedef
struct
SSyncRaftQuorumJointConfig
{
SSyncRaftNodeMap
outgoing
;
SSyncRaftNodeMap
outgoing
;
SSyncRaftNodeMap
incoming
;
SSyncRaftNodeMap
incoming
;
}
SSyncRaftQuorumJointConfig
;
}
SSyncRaftQuorumJointConfig
;
/**
// IDs returns a newly initialized map representing the set of voters present
* syncRaftVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
// in the joint configuration.
* a result indicating whether the vote is pending, lost, or won. A joint quorum
void
syncRaftJointConfigIDs
(
SSyncRaftQuorumJointConfig
*
config
,
SSyncRaftNodeMap
*
nodeMap
);
* requires both majority quorums to vote in favor.
**/
ESyncRaftVoteType
syncRaftVoteResult
(
SSyncRaftQuorumJointConfig
*
config
,
const
ESyncRaftVoteType
*
votes
);
bool
syncRaftIsInNodeMap
(
const
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
);
// CommittedIndex returns the largest committed index for the given joint
// quorum. An index is jointly committed if it is committed in both constituent
// majorities.
SyncIndex
syncRaftJointConfigCommittedIndex
(
const
SSyncRaftQuorumJointConfig
*
config
,
matchAckIndexerFp
indexer
,
void
*
arg
);
// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns
// a result indicating whether the vote is pending, lost, or won. A joint quorum
// requires both majority quorums to vote in favor.
ESyncRaftVoteType
syncRaftVoteResult
(
SSyncRaftQuorumJointConfig
*
config
,
SHashObj
*
votesMap
);
void
syncRaftInitQuorumJointConfig
(
SSyncRaftQuorumJointConfig
*
config
);
static
FORCE_INLINE
bool
syncRaftJointConfigInOutgoing
(
const
SSyncRaftQuorumJointConfig
*
config
,
SyncNodeId
id
)
{
static
FORCE_INLINE
bool
syncRaftJointConfigInOutgoing
(
const
SSyncRaftQuorumJointConfig
*
config
,
SyncNodeId
id
)
{
return
syncRaftIsInNodeMap
(
&
config
->
outgoing
,
id
);
return
syncRaftIsInNodeMap
(
&
config
->
outgoing
,
id
);
...
@@ -59,7 +66,19 @@ static FORCE_INLINE const SSyncRaftNodeMap* syncRaftJointConfigOutgoing(const SS
...
@@ -59,7 +66,19 @@ static FORCE_INLINE const SSyncRaftNodeMap* syncRaftJointConfigOutgoing(const SS
}
}
static
FORCE_INLINE
void
syncRaftJointConfigClearOutgoing
(
SSyncRaftQuorumJointConfig
*
config
)
{
static
FORCE_INLINE
void
syncRaftJointConfigClearOutgoing
(
SSyncRaftQuorumJointConfig
*
config
)
{
memset
(
&
config
->
outgoing
,
0
,
sizeof
(
SSyncCluster
));
syncRaftClearNodeMap
(
&
config
->
outgoing
);
}
static
FORCE_INLINE
bool
syncRaftJointConfigIsIncomingEmpty
(
const
SSyncRaftQuorumJointConfig
*
config
)
{
return
syncRaftNodeMapSize
(
&
config
->
incoming
)
==
0
;
}
static
FORCE_INLINE
bool
syncRaftJointConfigIsOutgoingEmpty
(
const
SSyncRaftQuorumJointConfig
*
config
)
{
return
syncRaftNodeMapSize
(
&
config
->
outgoing
)
==
0
;
}
static
FORCE_INLINE
bool
syncRaftJointConfigIsInOutgoing
(
const
SSyncRaftQuorumJointConfig
*
config
,
SyncNodeId
id
)
{
return
syncRaftIsInNodeMap
(
&
config
->
outgoing
,
id
);
}
}
#endif
/* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */
#endif
/* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */
source/libs/sync/inc/sync_raft_quorum_majority.h
浏览文件 @
9188298e
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
#include "sync.h"
#include "sync.h"
#include "sync_type.h"
#include "sync_type.h"
#include "sync_raft_quorum.h"
#include "sync_raft_quorum.h"
#include "thash.h"
/**
/**
* syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
* syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
...
@@ -26,6 +27,10 @@
...
@@ -26,6 +27,10 @@
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a
* quorum of no has been reached).
* quorum of no has been reached).
**/
**/
ESyncRaftVoteResult
syncRaftMajorityVoteResult
(
SSyncRaftNodeMap
*
config
,
const
ESyncRaftVoteType
*
votes
);
ESyncRaftVoteResult
syncRaftMajorityVoteResult
(
SSyncRaftNodeMap
*
config
,
SHashObj
*
votesMap
);
// CommittedIndex computes the committed index from those supplied via the
// provided AckedIndexer (for the active config).
SyncIndex
syncRaftMajorityConfigCommittedIndex
(
const
SSyncRaftNodeMap
*
config
,
matchAckIndexerFp
indexer
,
void
*
arg
);
#endif
/* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */
#endif
/* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */
source/libs/sync/inc/sync_raft_restore.h
浏览文件 @
9188298e
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
// the Changer only needs a ProgressMap (not a whole Tracker) at which point
// the Changer only needs a ProgressMap (not a whole Tracker) at which point
// this can just take LastIndex and MaxInflight directly instead and cook up
// this can just take LastIndex and MaxInflight directly instead and cook up
// the results from that alone.
// the results from that alone.
int
syncRaftRestoreConfig
(
SSyncRaftChanger
*
changer
,
const
SSyncConfigState
*
cs
);
int
syncRaftRestoreConfig
(
SSyncRaftChanger
*
changer
,
const
SSyncConfigState
*
cs
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
);
#endif
/* TD_SYNC_RAFT_RESTORE_H */
#endif
/* TD_SYNC_RAFT_RESTORE_H */
source/libs/sync/inc/sync_type.h
浏览文件 @
9188298e
...
@@ -32,6 +32,8 @@ typedef struct SSyncRaftProgress SSyncRaftProgress;
...
@@ -32,6 +32,8 @@ typedef struct SSyncRaftProgress SSyncRaftProgress;
typedef
struct
SSyncRaftProgressMap
SSyncRaftProgressMap
;
typedef
struct
SSyncRaftProgressMap
SSyncRaftProgressMap
;
typedef
struct
SSyncRaftProgressTrackerConfig
SSyncRaftProgressTrackerConfig
;
typedef
struct
SSyncRaftProgressTrackerConfig
SSyncRaftProgressTrackerConfig
;
typedef
struct
SSyncRaftNodeMap
SSyncRaftNodeMap
;
typedef
struct
SSyncRaftProgressTracker
SSyncRaftProgressTracker
;
typedef
struct
SSyncRaftProgressTracker
SSyncRaftProgressTracker
;
typedef
struct
SSyncRaftChanger
SSyncRaftChanger
;
typedef
struct
SSyncRaftChanger
SSyncRaftChanger
;
...
@@ -68,11 +70,6 @@ typedef struct SSyncClusterConfig {
...
@@ -68,11 +70,6 @@ typedef struct SSyncClusterConfig {
const
SSyncCluster
*
cluster
;
const
SSyncCluster
*
cluster
;
}
SSyncClusterConfig
;
}
SSyncClusterConfig
;
typedef
struct
{
int32_t
replica
;
SyncNodeId
nodeId
[
TSDB_MAX_REPLICA
];
}
SSyncRaftNodeMap
;
typedef
enum
{
typedef
enum
{
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
=
0
,
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
=
0
,
SYNC_RAFT_CAMPAIGN_ELECTION
=
1
,
SYNC_RAFT_CAMPAIGN_ELECTION
=
1
,
...
@@ -80,9 +77,6 @@ typedef enum {
...
@@ -80,9 +77,6 @@ typedef enum {
}
ESyncRaftElectionType
;
}
ESyncRaftElectionType
;
typedef
enum
{
typedef
enum
{
// the init vote resp status
SYNC_RAFT_VOTE_RESP_UNKNOWN
=
0
,
// grant the vote request
// grant the vote request
SYNC_RAFT_VOTE_RESP_GRANT
=
1
,
SYNC_RAFT_VOTE_RESP_GRANT
=
1
,
...
@@ -90,4 +84,8 @@ typedef enum {
...
@@ -90,4 +84,8 @@ typedef enum {
SYNC_RAFT_VOTE_RESP_REJECT
=
2
,
SYNC_RAFT_VOTE_RESP_REJECT
=
2
,
}
ESyncRaftVoteType
;
}
ESyncRaftVoteType
;
typedef
void
(
*
visitProgressFp
)(
SSyncRaftProgress
*
progress
,
void
*
arg
);
typedef
void
(
*
matchAckIndexerFp
)(
SyncNodeId
id
,
void
*
arg
,
SyncIndex
*
index
);
#endif
/* _TD_LIBS_SYNC_TYPE_H */
#endif
/* _TD_LIBS_SYNC_TYPE_H */
source/libs/sync/src/raft.c
浏览文件 @
9188298e
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
*/
*/
#include "raft.h"
#include "raft.h"
#include "
raft_configuration
.h"
#include "
sync_raft_impl
.h"
#include "raft_log.h"
#include "raft_log.h"
#include "sync_raft_restore.h"
#include "sync_raft_restore.h"
#include "raft_replication.h"
#include "raft_replication.h"
...
@@ -59,8 +59,13 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) {
...
@@ -59,8 +59,13 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) {
logStore
=
&
(
pRaft
->
logStore
);
logStore
=
&
(
pRaft
->
logStore
);
fsm
=
&
(
pRaft
->
fsm
);
fsm
=
&
(
pRaft
->
fsm
);
pRaft
->
nodeInfoMap
=
taosHashInit
(
TSDB_MAX_REPLICA
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_INT
),
true
,
HASH_ENTRY_LOCK
);
if
(
pRaft
->
nodeInfoMap
==
NULL
)
{
return
-
1
;
}
// init progress tracker
// init progress tracker
pRaft
->
tracker
=
syncRaftOpenProgressTracker
();
pRaft
->
tracker
=
syncRaftOpenProgressTracker
(
pRaft
);
if
(
pRaft
->
tracker
==
NULL
)
{
if
(
pRaft
->
tracker
==
NULL
)
{
return
-
1
;
return
-
1
;
}
}
...
@@ -96,11 +101,22 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) {
...
@@ -96,11 +101,22 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) {
.
tracker
=
pRaft
->
tracker
,
.
tracker
=
pRaft
->
tracker
,
.
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
),
.
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
),
};
};
if
(
syncRaftRestoreConfig
(
&
changer
,
&
confState
)
<
0
)
{
SSyncRaftProgressTrackerConfig
config
;
SSyncRaftProgressMap
progressMap
;
if
(
syncRaftRestoreConfig
(
&
changer
,
&
confState
,
&
config
,
&
progressMap
)
<
0
)
{
syncError
(
"syncRaftRestoreConfig for vgid %d fail"
,
pInfo
->
vgId
);
syncError
(
"syncRaftRestoreConfig for vgid %d fail"
,
pInfo
->
vgId
);
return
-
1
;
return
-
1
;
}
}
// save restored config and progress map to tracker
syncRaftCopyProgressMap
(
&
progressMap
,
&
pRaft
->
tracker
->
progressMap
);
syncRaftCopyTrackerConfig
(
&
config
,
&
pRaft
->
tracker
->
config
);
// free progress map and config
syncRaftFreeProgressMap
(
&
progressMap
);
syncRaftFreeTrackConfig
(
&
config
);
if
(
!
syncRaftIsEmptyServerState
(
&
serverState
))
{
if
(
!
syncRaftIsEmptyServerState
(
&
serverState
))
{
syncRaftLoadState
(
pRaft
,
&
serverState
);
syncRaftLoadState
(
pRaft
,
&
serverState
);
}
}
...
@@ -140,6 +156,7 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
...
@@ -140,6 +156,7 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
int32_t
syncRaftTick
(
SSyncRaft
*
pRaft
)
{
int32_t
syncRaftTick
(
SSyncRaft
*
pRaft
)
{
pRaft
->
currentTick
+=
1
;
pRaft
->
currentTick
+=
1
;
pRaft
->
tickFp
(
pRaft
);
return
0
;
return
0
;
}
}
...
@@ -151,8 +168,8 @@ static int deserializeClusterStateFromBuffer(SSyncConfigState* cluster, const ch
...
@@ -151,8 +168,8 @@ static int deserializeClusterStateFromBuffer(SSyncConfigState* cluster, const ch
return
0
;
return
0
;
}
}
static
void
visitProgressMaybeSendAppend
(
int
i
,
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
static
void
visitProgressMaybeSendAppend
(
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
syncRaft
Replicate
(
arg
,
progress
,
false
);
syncRaft
MaybeSendAppend
(
arg
,
progress
,
false
);
}
}
// switchToConfig reconfigures this node to use the provided configuration. It
// switchToConfig reconfigures this node to use the provided configuration. It
...
@@ -169,13 +186,12 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
...
@@ -169,13 +186,12 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
SSyncRaftProgress
*
progress
=
NULL
;
SSyncRaftProgress
*
progress
=
NULL
;
syncRaftConfigState
(
pRaft
->
tracker
,
cs
);
syncRaftConfigState
(
pRaft
->
tracker
,
cs
);
i
=
syncRaftFindProgressIndex
ByNodeId
(
&
pRaft
->
tracker
->
progressMap
,
selfId
);
progress
=
syncRaftFindProgress
ByNodeId
(
&
pRaft
->
tracker
->
progressMap
,
selfId
);
exist
=
(
i
!=
-
1
);
exist
=
(
progress
!=
NULL
);
// Update whether the node itself is a learner, resetting to false when the
// Update whether the node itself is a learner, resetting to false when the
// node is removed.
// node is removed.
if
(
exist
)
{
if
(
exist
)
{
progress
=
&
pRaft
->
tracker
->
progressMap
.
progress
[
i
];
pRaft
->
isLearner
=
progress
->
isLearner
;
pRaft
->
isLearner
=
progress
->
isLearner
;
}
else
{
}
else
{
pRaft
->
isLearner
=
false
;
pRaft
->
isLearner
=
false
;
...
@@ -196,7 +212,7 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
...
@@ -196,7 +212,7 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
// The remaining steps only make sense if this node is the leader and there
// The remaining steps only make sense if this node is the leader and there
// are other nodes.
// are other nodes.
if
(
pRaft
->
state
!=
TAOS_SYNC_STATE_LEADER
||
cs
->
voters
.
replica
==
0
)
{
if
(
pRaft
->
state
!=
TAOS_SYNC_STATE_LEADER
||
syncRaftNodeMapSize
(
&
cs
->
voters
)
==
0
)
{
return
;
return
;
}
}
...
@@ -212,8 +228,11 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
...
@@ -212,8 +228,11 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
// If the the leadTransferee was removed or demoted, abort the leadership transfer.
// If the the leadTransferee was removed or demoted, abort the leadership transfer.
SyncNodeId
leadTransferee
=
pRaft
->
leadTransferee
;
SyncNodeId
leadTransferee
=
pRaft
->
leadTransferee
;
if
(
leadTransferee
!=
SYNC_NON_NODE_ID
&&
!
syncRaftIsInNodeMap
(
&
pRaft
->
tracker
->
config
.
voters
,
leadTransferee
))
{
if
(
leadTransferee
!=
SYNC_NON_NODE_ID
)
{
abortLeaderTransfer
(
pRaft
);
if
(
!
syncRaftIsInNodeMap
(
&
pRaft
->
tracker
->
config
.
voters
.
incoming
,
leadTransferee
)
&&
!
syncRaftIsInNodeMap
(
&
pRaft
->
tracker
->
config
.
voters
.
outgoing
,
leadTransferee
))
{
abortLeaderTransfer
(
pRaft
);
}
}
}
}
}
}
}
...
@@ -286,8 +305,8 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg)
...
@@ -286,8 +305,8 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg)
* but it will not receive MsgApp or MsgHeartbeat, so it will not create
* but it will not receive MsgApp or MsgHeartbeat, so it will not create
* disruptive term increases
* disruptive term increases
**/
**/
int
peerIndex
=
syncRaftConfigurationIndexOfNode
(
pRaft
,
pMsg
->
from
);
SNodeInfo
*
pNode
=
syncRaftGetNodeById
(
pRaft
,
pMsg
->
from
);
if
(
p
eerIndex
<
0
)
{
if
(
p
Node
==
NULL
)
{
return
true
;
return
true
;
}
}
SSyncMessage
*
msg
=
syncNewEmptyAppendRespMsg
(
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
);
SSyncMessage
*
msg
=
syncNewEmptyAppendRespMsg
(
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
);
...
@@ -295,7 +314,7 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg)
...
@@ -295,7 +314,7 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg)
return
true
;
return
true
;
}
}
pRaft
->
io
.
send
(
msg
,
&
(
pRaft
->
cluster
.
nodeInfo
[
peerIndex
])
);
pRaft
->
io
.
send
(
msg
,
pNode
);
}
else
{
}
else
{
// ignore other cases
// ignore other cases
syncInfo
(
"[%d:%d] [term:%"
PRId64
"] ignored a %d message with lower term from %d [term:%"
PRId64
"]"
,
syncInfo
(
"[%d:%d] [term:%"
PRId64
"] ignored a %d message with lower term from %d [term:%"
PRId64
"]"
,
...
...
source/libs/sync/src/raft_handle_append_entries_message.c
浏览文件 @
9188298e
...
@@ -16,15 +16,14 @@
...
@@ -16,15 +16,14 @@
#include "syncInt.h"
#include "syncInt.h"
#include "raft.h"
#include "raft.h"
#include "raft_log.h"
#include "raft_log.h"
#include "
raft_configuration
.h"
#include "
sync_raft_impl
.h"
#include "raft_message.h"
#include "raft_message.h"
int
syncRaftHandleAppendEntriesMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
int
syncRaftHandleAppendEntriesMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
const
RaftMsg_Append_Entries
*
appendEntries
=
&
(
pMsg
->
appendEntries
);
const
RaftMsg_Append_Entries
*
appendEntries
=
&
(
pMsg
->
appendEntries
);
int
peerIndex
=
syncRaftConfigurationIndexOfNode
(
pRaft
,
pMsg
->
from
);
SNodeInfo
*
pNode
=
syncRaftGetNodeById
(
pRaft
,
pMsg
->
from
);
if
(
pNode
==
NULL
)
{
if
(
peerIndex
<
0
)
{
return
0
;
return
0
;
}
}
...
@@ -44,6 +43,6 @@ int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMs
...
@@ -44,6 +43,6 @@ int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMs
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pMsg
->
from
,
appendEntries
->
index
);
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pMsg
->
from
,
appendEntries
->
index
);
out:
out:
pRaft
->
io
.
send
(
pRespMsg
,
&
(
pRaft
->
cluster
.
nodeInfo
[
peerIndex
])
);
pRaft
->
io
.
send
(
pRespMsg
,
pNode
);
return
0
;
return
0
;
}
}
\ No newline at end of file
source/libs/sync/src/raft_handle_election_message.c
浏览文件 @
9188298e
...
@@ -19,24 +19,6 @@
...
@@ -19,24 +19,6 @@
#include "raft_message.h"
#include "raft_message.h"
int
syncRaftHandleElectionMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
int
syncRaftHandleElectionMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
if
(
pRaft
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
syncDebug
(
"[%d:%d] ignoring RAFT_MSG_INTERNAL_ELECTION because already leader"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
);
return
0
;
}
if
(
!
syncRaftIsPromotable
(
pRaft
))
{
syncDebug
(
"[%d:%d] is unpromotable and can not campaign"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
);
return
0
;
}
// if there is pending uncommitted config,cannot start election
if
(
syncRaftLogNumOfPendingConf
(
pRaft
->
log
)
>
0
&&
syncRaftHasUnappliedLog
(
pRaft
->
log
))
{
syncWarn
(
"[%d:%d] cannot syncRaftStartElection at term %"
PRId64
" since there are still pending configuration changes to apply"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
);
return
0
;
}
syncInfo
(
"[%d:%d] is starting a new election at term %"
PRId64
""
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
);
if
(
pRaft
->
preVote
)
{
if
(
pRaft
->
preVote
)
{
syncRaftStartElection
(
pRaft
,
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
);
syncRaftStartElection
(
pRaft
,
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
);
}
else
{
}
else
{
...
...
source/libs/sync/src/raft_handle_vote_message.c
浏览文件 @
9188298e
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
#include "syncInt.h"
#include "syncInt.h"
#include "raft.h"
#include "raft.h"
#include "
raft_configuration
.h"
#include "
sync_raft_impl
.h"
#include "raft_log.h"
#include "raft_log.h"
#include "raft_message.h"
#include "raft_message.h"
...
@@ -23,10 +23,11 @@ static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
...
@@ -23,10 +23,11 @@ static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
int
syncRaftHandleVoteMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
int
syncRaftHandleVoteMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
SSyncMessage
*
pRespMsg
;
SSyncMessage
*
pRespMsg
;
int
voteIndex
=
syncRaftConfigurationIndexOfNode
(
pRaft
,
pMsg
->
from
);
SNodeInfo
*
pNode
=
syncRaftGetNodeById
(
pRaft
,
pMsg
->
from
);
if
(
voteIndex
==
-
1
)
{
if
(
pNode
==
NULL
)
{
return
0
;
return
0
;
}
}
bool
grant
;
bool
grant
;
SyncIndex
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
);
SyncIndex
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
);
SyncTerm
lastTerm
=
syncRaftLogLastTerm
(
pRaft
->
log
);
SyncTerm
lastTerm
=
syncRaftLogLastTerm
(
pRaft
->
log
);
...
@@ -42,17 +43,19 @@ int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
...
@@ -42,17 +43,19 @@ int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
grant
?
"grant"
:
"reject"
,
grant
?
"grant"
:
"reject"
,
pMsg
->
from
,
pMsg
->
vote
.
lastTerm
,
pMsg
->
vote
.
lastIndex
,
pRaft
->
term
);
pMsg
->
from
,
pMsg
->
vote
.
lastTerm
,
pMsg
->
vote
.
lastIndex
,
pRaft
->
term
);
pRaft
->
io
.
send
(
pRespMsg
,
&
(
pRaft
->
cluster
.
nodeInfo
[
voteIndex
])
);
pRaft
->
io
.
send
(
pRespMsg
,
pNode
);
return
0
;
return
0
;
}
}
static
bool
canGrantVoteMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
static
bool
canGrantVoteMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
if
(
!
(
pRaft
->
voteFor
==
SYNC_NON_NODE_ID
||
pMsg
->
term
>
pRaft
->
term
||
pRaft
->
voteFor
==
pMsg
->
from
))
{
bool
canVote
=
return
false
;
// We can vote if this is a repeat of a vote we've already cast...
}
pRaft
->
voteFor
==
pMsg
->
from
||
if
(
!
syncRaftLogIsUptodate
(
pRaft
->
log
,
pMsg
->
vote
.
lastIndex
,
pMsg
->
vote
.
lastTerm
))
{
// ...we haven't voted and we don't think there's a leader yet in this term...
return
false
;
(
pRaft
->
voteFor
==
SYNC_NON_NODE_ID
&&
pRaft
->
leaderId
==
SYNC_NON_NODE_ID
)
||
}
// ...or this is a PreVote for a future term...
(
pMsg
->
vote
.
cType
==
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
&&
pMsg
->
term
>
pRaft
->
term
);
return
true
;
// ...and we believe the candidate is up to date.
return
canVote
&&
syncRaftLogIsUptodate
(
pRaft
->
log
,
pMsg
->
vote
.
lastIndex
,
pMsg
->
vote
.
lastTerm
);
}
}
\ No newline at end of file
source/libs/sync/src/raft_handle_vote_resp_message.c
浏览文件 @
9188298e
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
#include "syncInt.h"
#include "syncInt.h"
#include "raft.h"
#include "raft.h"
#include "
raft_configuration
.h"
#include "
sync_raft_impl
.h"
#include "raft_message.h"
#include "raft_message.h"
int
syncRaftHandleVoteRespMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
int
syncRaftHandleVoteRespMessage
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
)
{
...
@@ -25,8 +25,8 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
...
@@ -25,8 +25,8 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
assert
(
pRaft
->
state
==
TAOS_SYNC_STATE_CANDIDATE
);
assert
(
pRaft
->
state
==
TAOS_SYNC_STATE_CANDIDATE
);
voterIndex
=
syncRaftConfigurationIndexOfNode
(
pRaft
,
pMsg
->
from
);
SNodeInfo
*
pNode
=
syncRaftGetNodeById
(
pRaft
,
pMsg
->
from
);
if
(
voterIndex
==
-
1
)
{
if
(
pNode
==
NULL
)
{
syncError
(
"[%d:%d] recv vote resp from unknown server %d"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pMsg
->
from
);
syncError
(
"[%d:%d] recv vote resp from unknown server %d"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pMsg
->
from
);
return
0
;
return
0
;
}
}
...
@@ -45,12 +45,14 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
...
@@ -45,12 +45,14 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
if
(
result
==
SYNC_RAFT_VOTE_WON
)
{
if
(
result
==
SYNC_RAFT_VOTE_WON
)
{
if
(
pRaft
->
candidateState
.
inPreVote
)
{
if
(
pRaft
->
candidateState
.
inPreVote
)
{
syncRaft
StartElectio
n
(
pRaft
,
SYNC_RAFT_CAMPAIGN_ELECTION
);
syncRaft
Campaig
n
(
pRaft
,
SYNC_RAFT_CAMPAIGN_ELECTION
);
}
else
{
}
else
{
syncRaftBecomeLeader
(
pRaft
);
syncRaftBecomeLeader
(
pRaft
);
syncRaftBroadcastAppend
(
pRaft
);
}
}
}
else
if
(
result
==
SYNC_RAFT_VOTE_LOST
)
{
}
else
if
(
result
==
SYNC_RAFT_VOTE_LOST
)
{
// pb.MsgPreVoteResp contains future term of pre-candidate
// m.Term > r.Term; reuse r.Term
syncRaftBecomeFollower
(
pRaft
,
pRaft
->
term
,
SYNC_NON_NODE_ID
);
syncRaftBecomeFollower
(
pRaft
,
pRaft
->
term
,
SYNC_NON_NODE_ID
);
}
}
...
...
source/libs/sync/src/raft_replication.c
浏览文件 @
9188298e
...
@@ -22,14 +22,14 @@
...
@@ -22,14 +22,14 @@
static
bool
sendSnapshot
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
static
bool
sendSnapshot
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
);
static
bool
sendAppendEntries
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
,
static
bool
sendAppendEntries
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
,
SyncIndex
prevIndex
,
SyncTerm
prevTerm
,
SyncIndex
prevIndex
,
SyncTerm
prevTerm
,
const
SSyncRaftEntry
*
entries
,
int
nEntry
);
SSyncRaftEntry
*
entries
,
int
nEntry
);
//
syncRaftReplicate
sends an append RPC with new entries to the given peer,
//
maybeSendAppend
sends an append RPC with new entries to the given peer,
// if necessary. Returns true if a message was sent. The sendIfEmpty
// if necessary. Returns true if a message was sent. The sendIfEmpty
// argument controls whether messages with no entries will be sent
// argument controls whether messages with no entries will be sent
// ("empty" messages are useful to convey updated Commit indexes, but
// ("empty" messages are useful to convey updated Commit indexes, but
// are undesirable when we're sending multiple messages in a batch).
// are undesirable when we're sending multiple messages in a batch).
bool
syncRaft
Replicate
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
,
bool
sendIfEmpty
)
{
bool
syncRaft
MaybeSendAppend
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
,
bool
sendIfEmpty
)
{
assert
(
pRaft
->
state
==
TAOS_SYNC_STATE_LEADER
);
assert
(
pRaft
->
state
==
TAOS_SYNC_STATE_LEADER
);
SyncNodeId
nodeId
=
progress
->
id
;
SyncNodeId
nodeId
=
progress
->
id
;
...
@@ -68,10 +68,13 @@ static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress) {
...
@@ -68,10 +68,13 @@ static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress) {
static
bool
sendAppendEntries
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
,
static
bool
sendAppendEntries
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
,
SyncIndex
prevIndex
,
SyncTerm
prevTerm
,
SyncIndex
prevIndex
,
SyncTerm
prevTerm
,
const
SSyncRaftEntry
*
entries
,
int
nEntry
)
{
SSyncRaftEntry
*
entries
,
int
nEntry
)
{
SNodeInfo
*
pNode
=
syncRaftGetNodeById
(
pRaft
,
progress
->
id
);
if
(
pNode
==
NULL
)
{
return
false
;
}
SyncIndex
lastIndex
;
SyncIndex
lastIndex
;
SyncTerm
logTerm
=
prevTerm
;
SyncTerm
logTerm
=
prevTerm
;
SNodeInfo
*
pNode
=
&
(
pRaft
->
cluster
.
nodeInfo
[
progress
->
selfIndex
]);
SSyncMessage
*
msg
=
syncNewAppendMsg
(
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
,
SSyncMessage
*
msg
=
syncNewAppendMsg
(
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
,
prevIndex
,
prevTerm
,
pRaft
->
log
->
commitIndex
,
prevIndex
,
prevTerm
,
pRaft
->
log
->
commitIndex
,
...
@@ -87,7 +90,7 @@ static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress,
...
@@ -87,7 +90,7 @@ static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress,
case
PROGRESS_STATE_REPLICATE
:
case
PROGRESS_STATE_REPLICATE
:
lastIndex
=
entries
[
nEntry
-
1
].
index
;
lastIndex
=
entries
[
nEntry
-
1
].
index
;
syncRaftProgressOptimisticNextIndex
(
progress
,
lastIndex
);
syncRaftProgressOptimisticNextIndex
(
progress
,
lastIndex
);
syncRaftInflightAdd
(
&
progress
->
inflights
,
lastIndex
);
syncRaftInflightAdd
(
progress
->
inflights
,
lastIndex
);
break
;
break
;
case
PROGRESS_STATE_PROBE
:
case
PROGRESS_STATE_PROBE
:
progress
->
probeSent
=
true
;
progress
->
probeSent
=
true
;
...
...
source/libs/sync/src/sync.c
浏览文件 @
9188298e
...
@@ -99,7 +99,7 @@ void syncCleanUp() {
...
@@ -99,7 +99,7 @@ void syncCleanUp() {
SSyncNode
*
syncStart
(
const
SSyncInfo
*
pInfo
)
{
SSyncNode
*
syncStart
(
const
SSyncInfo
*
pInfo
)
{
pthread_mutex_lock
(
&
gSyncManager
->
mutex
);
pthread_mutex_lock
(
&
gSyncManager
->
mutex
);
SSyncNode
**
ppNode
=
taosHashGet
(
gSyncManager
->
vgroupTable
,
&
pInfo
->
vgId
,
sizeof
(
SyncGroupId
));
SSyncNode
**
ppNode
=
taosHashGet
(
gSyncManager
->
vgroupTable
,
&
pInfo
->
vgId
,
sizeof
(
SyncGroupId
*
));
if
(
ppNode
!=
NULL
)
{
if
(
ppNode
!=
NULL
)
{
syncInfo
(
"vgroup %d already exist"
,
pInfo
->
vgId
);
syncInfo
(
"vgroup %d already exist"
,
pInfo
->
vgId
);
pthread_mutex_unlock
(
&
gSyncManager
->
mutex
);
pthread_mutex_unlock
(
&
gSyncManager
->
mutex
);
...
@@ -140,7 +140,7 @@ SSyncNode* syncStart(const SSyncInfo* pInfo) {
...
@@ -140,7 +140,7 @@ SSyncNode* syncStart(const SSyncInfo* pInfo) {
void
syncStop
(
const
SSyncNode
*
pNode
)
{
void
syncStop
(
const
SSyncNode
*
pNode
)
{
pthread_mutex_lock
(
&
gSyncManager
->
mutex
);
pthread_mutex_lock
(
&
gSyncManager
->
mutex
);
SSyncNode
**
ppNode
=
taosHashGet
(
gSyncManager
->
vgroupTable
,
&
pNode
->
vgId
,
sizeof
(
SyncGroupId
));
SSyncNode
**
ppNode
=
taosHashGet
(
gSyncManager
->
vgroupTable
,
&
pNode
->
vgId
,
sizeof
(
SyncGroupId
*
));
if
(
ppNode
==
NULL
)
{
if
(
ppNode
==
NULL
)
{
syncInfo
(
"vgroup %d not exist"
,
pNode
->
vgId
);
syncInfo
(
"vgroup %d not exist"
,
pNode
->
vgId
);
pthread_mutex_unlock
(
&
gSyncManager
->
mutex
);
pthread_mutex_unlock
(
&
gSyncManager
->
mutex
);
...
@@ -288,7 +288,7 @@ static void *syncWorkerMain(void *argv) {
...
@@ -288,7 +288,7 @@ static void *syncWorkerMain(void *argv) {
static
void
syncNodeTick
(
void
*
param
,
void
*
tmrId
)
{
static
void
syncNodeTick
(
void
*
param
,
void
*
tmrId
)
{
SyncGroupId
vgId
=
(
SyncGroupId
)
param
;
SyncGroupId
vgId
=
(
SyncGroupId
)
param
;
SSyncNode
**
ppNode
=
taosHashGet
(
gSyncManager
->
vgroupTable
,
&
vgId
,
sizeof
(
SyncGroupId
));
SSyncNode
**
ppNode
=
taosHashGet
(
gSyncManager
->
vgroupTable
,
&
vgId
,
sizeof
(
SyncGroupId
*
));
if
(
ppNode
==
NULL
)
{
if
(
ppNode
==
NULL
)
{
return
;
return
;
}
}
...
...
source/libs/sync/src/sync_raft_config_change.c
浏览文件 @
9188298e
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
*/
#include "raft.h"
#include "syncInt.h"
#include "syncInt.h"
#include "sync_raft_config_change.h"
#include "sync_raft_config_change.h"
#include "sync_raft_progress.h"
#include "sync_raft_progress.h"
...
@@ -40,40 +41,7 @@ static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig*
...
@@ -40,40 +41,7 @@ static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig*
static
void
makeLearner
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
static
void
makeLearner
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
static
void
removeNodeId
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
static
void
removeNodeId
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
);
// syncRaftChangerSimpleConfig carries out a series of configuration changes that (in aggregate)
// mutates the incoming majority config Voters[0] by at most one. This method
// will return an error if that is not the case, if the resulting quorum is
// zero, or if the configuration is in a joint state (i.e. if there is an
// outgoing configuration).
int
syncRaftChangerSimpleConfig
(
SSyncRaftChanger
*
changer
,
const
SSyncConfChangeSingleArray
*
css
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
int
ret
;
ret
=
checkAndCopy
(
changer
,
config
,
progressMap
);
if
(
ret
!=
0
)
{
return
ret
;
}
if
(
hasJointConfig
(
config
))
{
syncError
(
"can't apply simple config change in joint config"
);
return
-
1
;
}
ret
=
applyConfig
(
changer
,
config
,
progressMap
,
css
);
if
(
ret
!=
0
)
{
return
ret
;
}
int
n
=
symDiff
(
syncRaftJointConfigIncoming
(
&
changer
->
tracker
->
config
.
voters
),
syncRaftJointConfigIncoming
(
&
config
->
voters
));
if
(
n
>
1
)
{
syncError
(
"more than one voter changed without entering joint config"
);
return
-
1
;
}
return
checkAndReturn
(
config
,
progressMap
);
}
// EnterJoint verifies that the outgoing (=right) majority config of the joint
// EnterJoint verifies that the outgoing (=right) majority config of the joint
// config is empty and initializes it with a copy of the incoming (=left)
// config is empty and initializes it with a copy of the incoming (=left)
...
@@ -96,12 +64,13 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
...
@@ -96,12 +64,13 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
if
(
ret
!=
0
)
{
if
(
ret
!=
0
)
{
return
ret
;
return
ret
;
}
}
if
(
hasJointConfig
(
config
))
{
if
(
hasJointConfig
(
config
))
{
syncError
(
"config is already joint"
);
syncError
(
"config is already joint"
);
return
-
1
;
return
-
1
;
}
}
if
(
config
->
voters
.
incoming
.
replica
==
0
)
{
if
(
syncRaftJointConfigIsIncomingEmpty
(
&
config
->
voters
)
==
0
)
{
// We allow adding nodes to an empty config for convenience (testing and
// We allow adding nodes to an empty config for convenience (testing and
// bootstrap), but you can't enter a joint state.
// bootstrap), but you can't enter a joint state.
syncError
(
"can't make a zero-voter config joint"
);
syncError
(
"can't make a zero-voter config joint"
);
...
@@ -112,7 +81,7 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
...
@@ -112,7 +81,7 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
syncRaftJointConfigClearOutgoing
(
&
config
->
voters
);
syncRaftJointConfigClearOutgoing
(
&
config
->
voters
);
// Copy incoming to outgoing.
// Copy incoming to outgoing.
memcpy
(
&
config
->
voters
.
outgoing
,
&
config
->
voters
.
incoming
,
sizeof
(
SSyncCluster
)
);
syncRaftCopyNodeMap
(
&
config
->
voters
.
incoming
,
&
config
->
voters
.
outgoing
);
ret
=
applyConfig
(
changer
,
config
,
progressMap
,
css
);
ret
=
applyConfig
(
changer
,
config
,
progressMap
,
css
);
if
(
ret
!=
0
)
{
if
(
ret
!=
0
)
{
...
@@ -123,84 +92,43 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
...
@@ -123,84 +92,43 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
return
checkAndReturn
(
config
,
progressMap
);
return
checkAndReturn
(
config
,
progressMap
);
}
}
//
checkAndCopy copies the tracker's config and progress map (deeply enough for
//
Simple carries out a series of configuration changes that (in aggregate)
//
the purposes of the Changer) and returns those copies. It returns an error
//
mutates the incoming majority config Voters[0] by at most one. This method
//
if checkInvariants does.
//
will return an error if that is not the case, if the resulting quorum is
static
int
checkAndCopy
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
// zero, or if the configuration is in a joint state (i.e. if there is an
syncRaftCloneTrackerConfig
(
&
changer
->
tracker
->
config
,
config
);
// outgoing configuration).
int
i
;
int
syncRaftChangerSimpleConfig
(
SSyncRaftChanger
*
changer
,
const
SSyncConfChangeSingleArray
*
css
,
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
SSyncRaftProgress
*
progress
=
&
(
changer
->
tracker
->
progressMap
.
progress
[
i
])
;
int
ret
;
if
(
progress
->
id
==
SYNC_NON_NODE_ID
)
{
continue
;
ret
=
checkAndCopy
(
changer
,
config
,
progressMap
)
;
}
if
(
ret
!=
0
)
{
syncRaftCopyProgress
(
progress
,
&
(
progressMap
->
progress
[
i
]))
;
return
ret
;
}
}
return
checkAndReturn
(
config
,
progressMap
);
}
// checkAndReturn calls checkInvariants on the input and returns either the
if
(
hasJointConfig
(
config
))
{
// resulting error or the input.
syncError
(
"can't apply simple config change in joint config"
);
static
int
checkAndReturn
(
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
if
(
checkInvariants
(
config
,
progressMap
)
!=
0
)
{
return
-
1
;
return
-
1
;
}
}
return
0
;
ret
=
applyConfig
(
changer
,
config
,
progressMap
,
css
);
}
// checkInvariants makes sure that the config and progress are compatible with
// each other. This is used to check both what the Changer is initialized with,
// as well as what it returns.
static
int
checkInvariants
(
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
int
ret
=
syncRaftCheckProgress
(
config
,
progressMap
);
if
(
ret
!=
0
)
{
if
(
ret
!=
0
)
{
return
ret
;
return
ret
;
}
}
int
i
;
int
n
=
symDiff
(
syncRaftJointConfigIncoming
(
&
changer
->
tracker
->
config
.
voters
),
// Any staged learner was staged because it could not be directly added due
syncRaftJointConfigIncoming
(
&
config
->
voters
));
// to a conflicting voter in the outgoing config.
if
(
n
>
1
)
{
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
syncError
(
"more than one voter changed without entering joint config"
);
if
(
!
syncRaftJointConfigInOutgoing
(
&
config
->
voters
,
config
->
learnersNext
.
nodeId
[
i
]))
{
return
-
1
;
return
-
1
;
}
if
(
progressMap
->
progress
[
i
].
id
!=
SYNC_NON_NODE_ID
&&
progressMap
->
progress
[
i
].
isLearner
)
{
syncError
(
"%d is in LearnersNext, but is already marked as learner"
,
progressMap
->
progress
[
i
].
id
);
return
-
1
;
}
}
// Conversely Learners and Voters doesn't intersect at all.
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
(
syncRaftJointConfigInIncoming
(
&
config
->
voters
,
config
->
learners
.
nodeId
[
i
]))
{
syncError
(
"%d is in Learners and voter.incoming"
,
progressMap
->
progress
[
i
].
id
);
return
-
1
;
}
if
(
progressMap
->
progress
[
i
].
id
!=
SYNC_NON_NODE_ID
&&
!
progressMap
->
progress
[
i
].
isLearner
)
{
syncError
(
"%d is in Learners, but is not marked as learner"
,
progressMap
->
progress
[
i
].
id
);
return
-
1
;
}
}
if
(
!
hasJointConfig
(
config
))
{
// We enforce that empty maps are nil instead of zero.
if
(
config
->
learnersNext
.
replica
>
0
)
{
syncError
(
"cfg.LearnersNext must be nil when not joint"
);
return
-
1
;
}
if
(
config
->
autoLeave
)
{
syncError
(
"AutoLeave must be false when not joint"
);
return
-
1
;
}
}
}
return
0
;
return
checkAndReturn
(
config
,
progressMap
);
}
static
bool
hasJointConfig
(
const
SSyncRaftProgressTrackerConfig
*
config
)
{
return
config
->
voters
.
outgoing
.
replica
>
0
;
}
}
// apply a change to the configuration. By convention, changes to voters are
// always made to the incoming majority config Voters[0]. Voters[1] is either
// empty or preserves the outgoing majority configuration while in a joint state.
static
int
applyConfig
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
static
int
applyConfig
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
,
const
SSyncConfChangeSingleArray
*
css
)
{
SSyncRaftProgressMap
*
progressMap
,
const
SSyncConfChangeSingleArray
*
css
)
{
int
i
;
int
i
;
...
@@ -227,7 +155,7 @@ static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig
...
@@ -227,7 +155,7 @@ static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig
}
}
}
}
if
(
config
->
voters
.
incoming
.
replica
==
0
)
{
if
(
syncRaftJointConfigIsIncomingEmpty
(
&
config
->
voters
)
)
{
syncError
(
"removed all voters"
);
syncError
(
"removed all voters"
);
return
-
1
;
return
-
1
;
}
}
...
@@ -235,86 +163,16 @@ static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig
...
@@ -235,86 +163,16 @@ static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig
return
0
;
return
0
;
}
}
// symdiff returns the count of the symmetric difference between the sets of
// uint64s, i.e. len( (l - r) \union (r - l)).
static
int
symDiff
(
const
SSyncRaftNodeMap
*
l
,
const
SSyncRaftNodeMap
*
r
)
{
int
n
;
int
i
;
int
j0
,
j1
;
const
SSyncRaftNodeMap
*
pairs
[
2
][
2
]
=
{
{
l
,
r
},
// count elems in l but not in r
{
r
,
l
},
// count elems in r but not in l
};
for
(
n
=
0
,
i
=
0
;
i
<
2
;
++
i
)
{
const
SSyncRaftNodeMap
**
pp
=
pairs
[
i
];
const
SSyncRaftNodeMap
*
p0
=
pp
[
0
];
const
SSyncRaftNodeMap
*
p1
=
pp
[
1
];
for
(
j0
=
0
;
j0
<
TSDB_MAX_REPLICA
;
++
j0
)
{
SyncNodeId
id
=
p0
->
nodeId
[
j0
];
if
(
id
==
SYNC_NON_NODE_ID
)
{
continue
;
}
for
(
j1
=
0
;
j1
<
p1
->
replica
;
++
j1
)
{
if
(
p1
->
nodeId
[
j1
]
!=
SYNC_NON_NODE_ID
&&
p1
->
nodeId
[
j1
]
!=
id
)
{
n
+=
1
;
}
}
}
}
return
n
;
}
static
void
initProgress
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
,
bool
isLearner
)
{
}
// nilAwareDelete deletes from a map, nil'ing the map itself if it is empty after.
static
void
nilAwareDelete
(
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
id
)
{
int
i
;
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
(
nodeMap
->
nodeId
[
i
]
==
id
)
{
nodeMap
->
replica
-=
1
;
nodeMap
->
nodeId
[
i
]
=
SYNC_NON_NODE_ID
;
break
;
}
}
assert
(
nodeMap
->
replica
>=
0
);
}
// nilAwareAdd populates a map entry, creating the map if necessary.
static
void
nilAwareAdd
(
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
id
)
{
int
i
,
j
;
for
(
i
=
0
,
j
=
-
1
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
(
nodeMap
->
nodeId
[
i
]
==
id
)
{
return
;
}
if
(
j
==
-
1
&&
nodeMap
->
nodeId
[
i
]
==
SYNC_NON_NODE_ID
)
{
j
=
i
;
}
}
assert
(
j
!=
-
1
);
nodeMap
->
nodeId
[
j
]
=
id
;
nodeMap
->
replica
+=
1
;
}
// makeVoter adds or promotes the given ID to be a voter in the incoming
// makeVoter adds or promotes the given ID to be a voter in the incoming
// majority config.
// majority config.
static
void
makeVoter
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
static
void
makeVoter
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
int
i
=
syncRaftFindProgressIndex
ByNodeId
(
progressMap
,
id
);
SSyncRaftProgress
*
progress
=
syncRaftFindProgress
ByNodeId
(
progressMap
,
id
);
if
(
i
==
-
1
)
{
if
(
progress
==
NULL
)
{
initProgress
(
changer
,
config
,
progressMap
,
id
,
false
);
initProgress
(
changer
,
config
,
progressMap
,
id
,
false
);
i
=
syncRaftFindProgressIndexByNodeId
(
progressMap
,
id
)
;
return
;
}
}
assert
(
i
!=
-
1
);
SSyncRaftProgress
*
progress
=
&
(
progressMap
->
progress
[
i
]);
progress
->
isLearner
=
false
;
progress
->
isLearner
=
false
;
nilAwareDelete
(
&
config
->
learners
,
id
);
nilAwareDelete
(
&
config
->
learners
,
id
);
...
@@ -337,14 +195,12 @@ static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig*
...
@@ -337,14 +195,12 @@ static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig*
// LeaveJoint().
// LeaveJoint().
static
void
makeLearner
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
static
void
makeLearner
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
int
i
=
syncRaftFindProgressIndex
ByNodeId
(
progressMap
,
id
);
SSyncRaftProgress
*
progress
=
syncRaftFindProgress
ByNodeId
(
progressMap
,
id
);
if
(
i
==
-
1
)
{
if
(
progress
==
NULL
)
{
initProgress
(
changer
,
config
,
progressMap
,
id
,
fals
e
);
initProgress
(
changer
,
config
,
progressMap
,
id
,
tru
e
);
i
=
syncRaftFindProgressIndexByNodeId
(
progressMap
,
id
)
;
return
;
}
}
assert
(
i
!=
-
1
);
SSyncRaftProgress
*
progress
=
&
(
progressMap
->
progress
[
i
]);
if
(
progress
->
isLearner
)
{
if
(
progress
->
isLearner
)
{
return
;
return
;
}
}
...
@@ -352,15 +208,15 @@ static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfi
...
@@ -352,15 +208,15 @@ static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfi
removeNodeId
(
changer
,
config
,
progressMap
,
id
);
removeNodeId
(
changer
,
config
,
progressMap
,
id
);
// ... but save the Progress.
// ... but save the Progress.
syncRaftAddToProgressMap
(
progressMap
,
id
);
syncRaftAddToProgressMap
(
progressMap
,
progress
);
// Use LearnersNext if we can't add the learner to Learners directly, i.e.
// Use LearnersNext if we can't add the learner to Learners directly, i.e.
// if the peer is still tracked as a voter in the outgoing config. It will
// if the peer is still tracked as a voter in the outgoing config. It will
// be turned into a learner in LeaveJoint().
// be turned into a learner in LeaveJoint().
//
//
// Otherwise, add a regular learner right away.
// Otherwise, add a regular learner right away.
bool
in
Outgoing
=
syncRaftJointConfigInCluster
(
&
config
->
voters
.
outgoing
,
id
);
bool
in
InOutgoing
=
syncRaftJointConfigIsInOutgoing
(
&
config
->
voters
,
id
);
if
(
inOutgoing
)
{
if
(
in
In
Outgoing
)
{
nilAwareAdd
(
&
config
->
learnersNext
,
id
);
nilAwareAdd
(
&
config
->
learnersNext
,
id
);
}
else
{
}
else
{
nilAwareAdd
(
&
config
->
learners
,
id
);
nilAwareAdd
(
&
config
->
learners
,
id
);
...
@@ -371,8 +227,8 @@ static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfi
...
@@ -371,8 +227,8 @@ static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfi
// removeNodeId this peer as a voter or learner from the incoming config.
// removeNodeId this peer as a voter or learner from the incoming config.
static
void
removeNodeId
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
static
void
removeNodeId
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
int
i
=
syncRaftFindProgressIndex
ByNodeId
(
progressMap
,
id
);
SSyncRaftProgress
*
progress
=
syncRaftFindProgress
ByNodeId
(
progressMap
,
id
);
if
(
i
==
-
1
)
{
if
(
progress
==
NULL
)
{
return
;
return
;
}
}
...
@@ -381,8 +237,173 @@ static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConf
...
@@ -381,8 +237,173 @@ static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConf
nilAwareDelete
(
&
config
->
learnersNext
,
id
);
nilAwareDelete
(
&
config
->
learnersNext
,
id
);
// If the peer is still a voter in the outgoing config, keep the Progress.
// If the peer is still a voter in the outgoing config, keep the Progress.
bool
in
Outgoing
=
syncRaftJointConfigInCluster
(
&
config
->
voters
.
outgoing
,
id
);
bool
in
InOutgoing
=
syncRaftJointConfigIsInOutgoing
(
&
config
->
voters
,
id
);
if
(
!
inOutgoing
)
{
if
(
!
in
In
Outgoing
)
{
syncRaftRemoveFromProgressMap
(
progressMap
,
id
);
syncRaftRemoveFromProgressMap
(
progressMap
,
id
);
}
}
}
// initProgress initializes a new progress for the given node or learner.
static
void
initProgress
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
,
bool
isLearner
)
{
if
(
!
isLearner
)
{
syncRaftJointConfigAddToIncoming
(
&
config
->
voters
,
id
);
}
else
{
nilAwareAdd
(
&
config
->
learners
,
id
);
}
SSyncRaftProgress
*
pProgress
=
(
SSyncRaftProgress
*
)
malloc
(
sizeof
(
SSyncRaftProgress
));
assert
(
pProgress
!=
NULL
);
*
pProgress
=
(
SSyncRaftProgress
)
{
// Initializing the Progress with the last index means that the follower
// can be probed (with the last index).
//
// TODO(tbg): seems awfully optimistic. Using the first index would be
// better. The general expectation here is that the follower has no log
// at all (and will thus likely need a snapshot), though the app may
// have applied a snapshot out of band before adding the replica (thus
// making the first index the better choice).
.
id
=
id
,
.
groupId
=
changer
->
tracker
->
pRaft
->
selfGroupId
,
.
nextIndex
=
changer
->
lastIndex
,
.
matchIndex
=
0
,
.
state
=
PROGRESS_STATE_PROBE
,
.
pendingSnapshotIndex
=
0
,
.
probeSent
=
false
,
.
inflights
=
syncRaftOpenInflights
(
changer
->
tracker
->
maxInflightMsgs
),
.
isLearner
=
isLearner
,
// When a node is first added, we should mark it as recently active.
// Otherwise, CheckQuorum may cause us to step down if it is invoked
// before the added node has had a chance to communicate with us.
.
recentActive
=
true
,
.
refCount
=
0
,
};
syncRaftAddToProgressMap
(
progressMap
,
pProgress
);
}
// checkInvariants makes sure that the config and progress are compatible with
// each other. This is used to check both what the Changer is initialized with,
// as well as what it returns.
static
int
checkInvariants
(
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
int
ret
=
syncRaftCheckTrackerConfigInProgress
(
config
,
progressMap
);
if
(
ret
!=
0
)
{
return
ret
;
}
// Any staged learner was staged because it could not be directly added due
// to a conflicting voter in the outgoing config.
SyncNodeId
*
pNodeId
=
NULL
;
while
(
!
syncRaftIterateNodeMap
(
&
config
->
learnersNext
,
pNodeId
))
{
SyncNodeId
nodeId
=
*
pNodeId
;
if
(
!
syncRaftJointConfigInOutgoing
(
&
config
->
voters
,
nodeId
))
{
syncError
(
"[%d] is in LearnersNext, but not outgoing"
,
nodeId
);
return
-
1
;
}
SSyncRaftProgress
*
progress
=
syncRaftFindProgressByNodeId
(
progressMap
,
nodeId
);
assert
(
progress
);
assert
(
progress
->
id
==
nodeId
);
if
(
progress
->
isLearner
)
{
syncError
(
"[%d:%d] is in LearnersNext, but is already marked as learner"
,
progress
->
groupId
,
nodeId
);
return
-
1
;
}
}
// Conversely Learners and Voters doesn't intersect at all.
pNodeId
=
NULL
;
while
(
!
syncRaftIterateNodeMap
(
&
config
->
learners
,
pNodeId
))
{
SyncNodeId
nodeId
=
*
pNodeId
;
if
(
syncRaftJointConfigInOutgoing
(
&
config
->
voters
,
nodeId
))
{
syncError
(
"%d is in Learners and outgoing"
,
nodeId
);
return
-
1
;
}
SSyncRaftProgress
*
progress
=
syncRaftFindProgressByNodeId
(
progressMap
,
nodeId
);
assert
(
progress
);
assert
(
progress
->
id
==
nodeId
);
if
(
!
progress
->
isLearner
)
{
syncError
(
"[%d:%d] is in Learners, but is not marked as learner"
,
progress
->
groupId
,
nodeId
);
return
-
1
;
}
}
if
(
!
hasJointConfig
(
config
))
{
// We enforce that empty maps are nil instead of zero.
if
(
syncRaftNodeMapSize
(
&
config
->
learnersNext
)
>
0
)
{
syncError
(
"cfg.LearnersNext must be nil when not joint"
);
return
-
1
;
}
if
(
config
->
autoLeave
)
{
syncError
(
"AutoLeave must be false when not joint"
);
return
-
1
;
}
}
return
0
;
}
// checkAndCopy copies the tracker's config and progress map (deeply enough for
// the purposes of the Changer) and returns those copies. It returns an error
// if checkInvariants does.
static
int
checkAndCopy
(
SSyncRaftChanger
*
changer
,
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
syncRaftCopyTrackerConfig
(
&
changer
->
tracker
->
config
,
config
);
syncRaftClearProgressMap
(
progressMap
);
SSyncRaftProgress
*
pProgress
=
NULL
;
while
(
!
syncRaftIterateProgressMap
(
&
changer
->
tracker
->
progressMap
,
pProgress
))
{
syncRaftAddToProgressMap
(
progressMap
,
pProgress
);
}
return
checkAndReturn
(
config
,
progressMap
);
}
// checkAndReturn calls checkInvariants on the input and returns either the
// resulting error or the input.
static
int
checkAndReturn
(
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
if
(
checkInvariants
(
config
,
progressMap
)
!=
0
)
{
return
-
1
;
}
return
0
;
}
static
bool
hasJointConfig
(
const
SSyncRaftProgressTrackerConfig
*
config
)
{
return
!
syncRaftJointConfigIsOutgoingEmpty
(
&
config
->
voters
);
}
// symdiff returns the count of the symmetric difference between the sets of
// uint64s, i.e. len( (l - r) \union (r - l)).
static
int
symDiff
(
const
SSyncRaftNodeMap
*
l
,
const
SSyncRaftNodeMap
*
r
)
{
int
n
;
int
i
;
int
j0
,
j1
;
const
SSyncRaftNodeMap
*
pairs
[
2
][
2
]
=
{
{
l
,
r
},
// count elems in l but not in r
{
r
,
l
},
// count elems in r but not in l
};
for
(
n
=
0
,
i
=
0
;
i
<
2
;
++
i
)
{
const
SSyncRaftNodeMap
**
pp
=
pairs
[
i
];
const
SSyncRaftNodeMap
*
p0
=
pp
[
0
];
const
SSyncRaftNodeMap
*
p1
=
pp
[
1
];
SyncNodeId
*
pNodeId
;
while
(
!
syncRaftIterateNodeMap
(
p0
,
pNodeId
))
{
if
(
!
syncRaftIsInNodeMap
(
p1
,
*
pNodeId
))
{
n
+=
1
;
}
}
}
return
n
;
}
// nilAwareDelete deletes from a map, nil'ing the map itself if it is empty after.
static
void
nilAwareDelete
(
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
id
)
{
syncRaftRemoveFromNodeMap
(
nodeMap
,
id
);
}
// nilAwareAdd populates a map entry, creating the map if necessary.
static
void
nilAwareAdd
(
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
id
)
{
syncRaftAddToNodeMap
(
nodeMap
,
id
);
}
}
\ No newline at end of file
source/libs/sync/src/raft_election.c
→
source/libs/sync/src/
sync_
raft_election.c
浏览文件 @
9188298e
...
@@ -17,15 +17,40 @@
...
@@ -17,15 +17,40 @@
#include "raft.h"
#include "raft.h"
#include "raft_log.h"
#include "raft_log.h"
#include "raft_message.h"
#include "raft_message.h"
#include "sync_raft_progress_tracker.h"
void
syncRaftStartElection
(
SSyncRaft
*
pRaft
,
ESyncRaftElectionType
cType
)
{
void
syncRaftStartElection
(
SSyncRaft
*
pRaft
,
ESyncRaftElectionType
cType
)
{
SyncTerm
term
;
if
(
pRaft
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
syncDebug
(
"[%d:%d] ignoring RAFT_MSG_INTERNAL_ELECTION because already leader"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
);
return
;
}
if
(
!
syncRaftIsPromotable
(
pRaft
))
{
syncWarn
(
"[%d:%d] is unpromotable and can not syncRaftCampaign"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
);
return
;
}
// if there is pending uncommitted config,cannot start election
if
(
syncRaftLogNumOfPendingConf
(
pRaft
->
log
)
>
0
&&
syncRaftHasUnappliedLog
(
pRaft
->
log
))
{
syncWarn
(
"[%d:%d] cannot syncRaftStartElection at term %"
PRId64
" since there are still pending configuration changes to apply"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
);
return
;
}
syncInfo
(
"[%d:%d] is starting a new election at term %"
PRId64
""
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
);
syncRaftCampaign
(
pRaft
,
cType
);
}
// syncRaftCampaign transitions the raft instance to candidate state. This must only be
// called after verifying that this is a legitimate transition.
void
syncRaftCampaign
(
SSyncRaft
*
pRaft
,
ESyncRaftElectionType
cType
)
{
bool
preVote
;
bool
preVote
;
ESyncRaftMessageType
voteMsgType
;
SyncTerm
term
;
if
(
syncRaftIsPromotable
(
pRaft
))
{
if
(
syncRaftIsPromotable
(
pRaft
))
{
syncDebug
(
"[%d:%d] is unpromotable;
c
ampaign() should have been called"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
);
syncDebug
(
"[%d:%d] is unpromotable;
syncRaftC
ampaign() should have been called"
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
);
return
0
;
return
;
}
}
if
(
cType
==
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
)
{
if
(
cType
==
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
)
{
...
@@ -35,7 +60,6 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
...
@@ -35,7 +60,6 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
term
=
pRaft
->
term
+
1
;
term
=
pRaft
->
term
+
1
;
}
else
{
}
else
{
syncRaftBecomeCandidate
(
pRaft
);
syncRaftBecomeCandidate
(
pRaft
);
voteMsgType
=
RAFT_MSG_VOTE
;
term
=
pRaft
->
term
;
term
=
pRaft
->
term
;
preVote
=
false
;
preVote
=
false
;
}
}
...
@@ -43,10 +67,8 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
...
@@ -43,10 +67,8 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
int
quorum
=
syncRaftQuorum
(
pRaft
);
int
quorum
=
syncRaftQuorum
(
pRaft
);
ESyncRaftVoteResult
result
=
syncRaftPollVote
(
pRaft
,
pRaft
->
selfId
,
preVote
,
true
,
NULL
,
NULL
);
ESyncRaftVoteResult
result
=
syncRaftPollVote
(
pRaft
,
pRaft
->
selfId
,
preVote
,
true
,
NULL
,
NULL
);
if
(
result
==
SYNC_RAFT_VOTE_WON
)
{
if
(
result
==
SYNC_RAFT_VOTE_WON
)
{
/**
// We won the election after voting for ourselves (which must mean that
* We won the election after voting for ourselves (which must mean that
// this is a single-node cluster). Advance to the next state.
* this is a single-node cluster). Advance to the next state.
**/
if
(
cType
==
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
)
{
if
(
cType
==
SYNC_RAFT_CAMPAIGN_PRE_ELECTION
)
{
syncRaftStartElection
(
pRaft
,
SYNC_RAFT_CAMPAIGN_ELECTION
);
syncRaftStartElection
(
pRaft
,
SYNC_RAFT_CAMPAIGN_ELECTION
);
}
else
{
}
else
{
...
@@ -59,12 +81,23 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
...
@@ -59,12 +81,23 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
int
i
;
int
i
;
SyncIndex
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
);
SyncIndex
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
);
SyncTerm
lastTerm
=
syncRaftLogLastTerm
(
pRaft
->
log
);
SyncTerm
lastTerm
=
syncRaftLogLastTerm
(
pRaft
->
log
);
for
(
i
=
0
;
i
<
pRaft
->
cluster
.
replica
;
++
i
)
{
SSyncRaftNodeMap
nodeMap
;
if
(
i
==
pRaft
->
cluster
.
selfIndex
)
{
syncRaftJointConfigIDs
(
&
pRaft
->
tracker
->
config
.
voters
,
&
nodeMap
);
SyncNodeId
*
pNodeId
=
NULL
;
while
(
!
syncRaftIterateNodeMap
(
&
nodeMap
,
pNodeId
))
{
SyncNodeId
nodeId
=
*
pNodeId
;
if
(
nodeId
==
SYNC_NON_NODE_ID
)
{
continue
;
continue
;
}
}
SyncNodeId
nodeId
=
pRaft
->
cluster
.
nodeInfo
[
i
].
nodeId
;
if
(
nodeId
==
pRaft
->
selfId
)
{
continue
;
}
SNodeInfo
*
pNode
=
syncRaftGetNodeById
(
pRaft
,
nodeId
);
if
(
pNode
==
NULL
)
{
continue
;
}
SSyncMessage
*
pMsg
=
syncNewVoteMsg
(
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
SSyncMessage
*
pMsg
=
syncNewVoteMsg
(
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
term
,
cType
,
lastIndex
,
lastTerm
);
term
,
cType
,
lastIndex
,
lastTerm
);
...
@@ -72,10 +105,10 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
...
@@ -72,10 +105,10 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
continue
;
continue
;
}
}
syncInfo
(
"[%d:%d] [logterm: %"
PRId64
", index: %"
PRId64
"] sent
%d
request to %d at term %"
PRId64
""
,
syncInfo
(
"[%d:%d] [logterm: %"
PRId64
", index: %"
PRId64
"] sent
vote
request to %d at term %"
PRId64
""
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
lastTerm
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
lastTerm
,
lastIndex
,
voteMsgType
,
nodeId
,
pRaft
->
term
);
lastIndex
,
nodeId
,
pRaft
->
term
);
pRaft
->
io
.
send
(
pMsg
,
&
(
pRaft
->
cluster
.
nodeInfo
[
i
])
);
pRaft
->
io
.
send
(
pMsg
,
pNode
);
}
}
}
}
\ No newline at end of file
source/libs/sync/src/sync_raft_impl.c
浏览文件 @
9188298e
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
*/
*/
#include "raft.h"
#include "raft.h"
#include "
raft_configuration
.h"
#include "
sync_raft_impl
.h"
#include "raft_log.h"
#include "raft_log.h"
#include "raft_replication.h"
#include "raft_replication.h"
#include "sync_raft_progress_tracker.h"
#include "sync_raft_progress_tracker.h"
...
@@ -25,6 +25,8 @@ static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg);
...
@@ -25,6 +25,8 @@ static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg);
static
int
stepCandidate
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
);
static
int
stepCandidate
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
);
static
int
stepLeader
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
);
static
int
stepLeader
(
SSyncRaft
*
pRaft
,
const
SSyncMessage
*
pMsg
);
static
bool
increaseUncommittedSize
(
SSyncRaft
*
pRaft
,
SSyncRaftEntry
*
entries
,
int
n
);
static
int
triggerAll
(
SSyncRaft
*
pRaft
);
static
int
triggerAll
(
SSyncRaft
*
pRaft
);
static
void
tickElection
(
SSyncRaft
*
pRaft
);
static
void
tickElection
(
SSyncRaft
*
pRaft
);
...
@@ -82,13 +84,22 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) {
...
@@ -82,13 +84,22 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) {
resetRaft
(
pRaft
,
pRaft
->
term
);
resetRaft
(
pRaft
,
pRaft
->
term
);
pRaft
->
leaderId
=
pRaft
->
leaderId
;
pRaft
->
leaderId
=
pRaft
->
leaderId
;
pRaft
->
state
=
TAOS_SYNC_STATE_LEADER
;
pRaft
->
state
=
TAOS_SYNC_STATE_LEADER
;
// TODO: check if there is pending config log
int
nPendingConf
=
syncRaftLogNumOfPendingConf
(
pRaft
->
log
);
if
(
nPendingConf
>
1
)
{
syncFatal
(
"unexpected multiple uncommitted config entry"
);
}
syncInfo
(
"[%d:%d] became leader at term %"
PRId64
""
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
);
SSyncRaftProgress
*
progress
=
syncRaftFindProgressByNodeId
(
&
pRaft
->
tracker
->
progressMap
,
pRaft
->
selfId
);
assert
(
progress
!=
NULL
);
// Followers enter replicate mode when they've been successfully probed
// (perhaps after having received a snapshot as a result). The leader is
// trivially in this state. Note that r.reset() has initialized this
// progress with the last index already.
syncRaftProgressBecomeReplicate
(
progress
);
// Conservatively set the pendingConfIndex to the last index in the
// log. There may or may not be a pending config change, but it's
// safe to delay any future proposals until we commit all our
// pending log entries, and scanning the entire tail of the log
// could be expensive.
SyncIndex
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
);
pRaft
->
pendingConfigIndex
=
lastIndex
;
// after become leader, send a no-op log
// after become leader, send a no-op log
SSyncRaftEntry
*
entry
=
(
SSyncRaftEntry
*
)
malloc
(
sizeof
(
SSyncRaftEntry
));
SSyncRaftEntry
*
entry
=
(
SSyncRaftEntry
*
)
malloc
(
sizeof
(
SSyncRaftEntry
));
...
@@ -103,6 +114,7 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) {
...
@@ -103,6 +114,7 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) {
};
};
appendEntries
(
pRaft
,
entry
,
1
);
appendEntries
(
pRaft
,
entry
,
1
);
//syncRaftTriggerHeartbeat(pRaft);
//syncRaftTriggerHeartbeat(pRaft);
syncInfo
(
"[%d:%d] became leader at term %"
PRId64
""
,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
pRaft
->
term
);
}
}
void
syncRaftTriggerHeartbeat
(
SSyncRaft
*
pRaft
)
{
void
syncRaftTriggerHeartbeat
(
SSyncRaft
*
pRaft
)
{
...
@@ -123,15 +135,16 @@ bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) {
...
@@ -123,15 +135,16 @@ bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) {
}
}
int
syncRaftQuorum
(
SSyncRaft
*
pRaft
)
{
int
syncRaftQuorum
(
SSyncRaft
*
pRaft
)
{
return
pRaft
->
cluster
.
replica
/
2
+
1
;
return
0
;
//return pRaft->cluster.replica / 2 + 1;
}
}
ESyncRaftVoteResult
syncRaftPollVote
(
SSyncRaft
*
pRaft
,
SyncNodeId
id
,
ESyncRaftVoteResult
syncRaftPollVote
(
SSyncRaft
*
pRaft
,
SyncNodeId
id
,
bool
preVote
,
bool
grant
,
bool
preVote
,
bool
grant
,
int
*
rejected
,
int
*
granted
)
{
int
*
rejected
,
int
*
granted
)
{
int
voterIndex
=
syncRaftConfigurationIndexOfNode
(
pRaft
,
id
);
SNodeInfo
*
pNode
=
syncRaftGetNodeById
(
pRaft
,
id
);
if
(
voterIndex
==
-
1
)
{
if
(
pNode
==
NULL
)
{
return
SYNC_RAFT_VOTE_PENDING
;
return
true
;
}
}
if
(
grant
)
{
if
(
grant
)
{
...
@@ -142,7 +155,7 @@ ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
...
@@ -142,7 +155,7 @@ ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
preVote
,
id
,
pRaft
->
term
);
pRaft
->
selfGroupId
,
pRaft
->
selfId
,
preVote
,
id
,
pRaft
->
term
);
}
}
syncRaftRecordVote
(
pRaft
->
tracker
,
voterIndex
,
grant
);
syncRaftRecordVote
(
pRaft
->
tracker
,
pNode
->
nodeId
,
grant
);
return
syncRaftTallyVotes
(
pRaft
->
tracker
,
rejected
,
granted
);
return
syncRaftTallyVotes
(
pRaft
->
tracker
,
rejected
,
granted
);
}
}
/*
/*
...
@@ -154,7 +167,7 @@ ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
...
@@ -154,7 +167,7 @@ ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
pRaft->selfGroupId, pRaft->selfId, id, pRaft->term);
pRaft->selfGroupId, pRaft->selfId, id, pRaft->term);
}
}
int voteIndex = syncRaft
ConfigurationIndexOfNode
(pRaft, id);
int voteIndex = syncRaft
GetNodeById
(pRaft, id);
assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0);
assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0);
assert(pRaft->candidateState.votes[voteIndex] == SYNC_RAFT_VOTE_RESP_UNKNOWN);
assert(pRaft->candidateState.votes[voteIndex] == SYNC_RAFT_VOTE_RESP_UNKNOWN);
...
@@ -185,19 +198,30 @@ void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState) {
...
@@ -185,19 +198,30 @@ void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState) {
pRaft
->
voteFor
=
serverState
->
voteFor
;
pRaft
->
voteFor
=
serverState
->
voteFor
;
}
}
static
void
visitProgressSendAppend
(
int
i
,
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
static
void
visitProgressSendAppend
(
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
SSyncRaft
*
pRaft
=
(
SSyncRaft
*
)
arg
;
SSyncRaft
*
pRaft
=
(
SSyncRaft
*
)
arg
;
if
(
pRaft
->
selfId
==
progress
->
id
)
{
if
(
pRaft
->
selfId
==
progress
->
id
)
{
return
;
return
;
}
}
syncRaft
Replicate
(
arg
,
progress
,
true
);
syncRaft
MaybeSendAppend
(
arg
,
progress
,
true
);
}
}
// bcastAppend sends RPC, with entries to all peers that are not up-to-date
// according to the progress recorded in r.prs.
void
syncRaftBroadcastAppend
(
SSyncRaft
*
pRaft
)
{
void
syncRaftBroadcastAppend
(
SSyncRaft
*
pRaft
)
{
syncRaftProgressVisit
(
pRaft
->
tracker
,
visitProgressSendAppend
,
pRaft
);
syncRaftProgressVisit
(
pRaft
->
tracker
,
visitProgressSendAppend
,
pRaft
);
}
}
SNodeInfo
*
syncRaftGetNodeById
(
SSyncRaft
*
pRaft
,
SyncNodeId
id
)
{
SNodeInfo
**
ppNode
=
taosHashGet
(
pRaft
->
nodeInfoMap
,
&
id
,
sizeof
(
SyncNodeId
*
));
if
(
ppNode
!=
NULL
)
{
return
*
ppNode
;
}
return
NULL
;
}
static
int
convertClear
(
SSyncRaft
*
pRaft
)
{
static
int
convertClear
(
SSyncRaft
*
pRaft
)
{
}
}
...
@@ -223,7 +247,7 @@ static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
...
@@ -223,7 +247,7 @@ static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
syncRaftHandleVoteRespMessage
(
pRaft
,
pMsg
);
syncRaftHandleVoteRespMessage
(
pRaft
,
pMsg
);
return
0
;
return
0
;
}
else
if
(
msgType
==
RAFT_MSG_APPEND
)
{
}
else
if
(
msgType
==
RAFT_MSG_APPEND
)
{
syncRaftBecomeFollower
(
pRaft
,
p
Raft
->
term
,
pMsg
->
from
);
syncRaftBecomeFollower
(
pRaft
,
p
Msg
->
term
,
pMsg
->
from
);
syncRaftHandleAppendEntriesMessage
(
pRaft
,
pMsg
);
syncRaftHandleAppendEntriesMessage
(
pRaft
,
pMsg
);
}
}
return
0
;
return
0
;
...
@@ -234,9 +258,7 @@ static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
...
@@ -234,9 +258,7 @@ static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
return
0
;
return
0
;
}
}
/**
// tickElection is run by followers and candidates after r.electionTimeout.
* tickElection is run by followers and candidates per tick.
**/
static
void
tickElection
(
SSyncRaft
*
pRaft
)
{
static
void
tickElection
(
SSyncRaft
*
pRaft
)
{
pRaft
->
electionElapsed
+=
1
;
pRaft
->
electionElapsed
+=
1
;
...
@@ -254,10 +276,16 @@ static void tickElection(SSyncRaft* pRaft) {
...
@@ -254,10 +276,16 @@ static void tickElection(SSyncRaft* pRaft) {
syncRaftStep
(
pRaft
,
syncInitElectionMsg
(
&
msg
,
pRaft
->
selfId
));
syncRaftStep
(
pRaft
,
syncInitElectionMsg
(
&
msg
,
pRaft
->
selfId
));
}
}
// tickHeartbeat is run by leaders to send a MsgBeat after r.heartbeatTimeout.
static
void
tickHeartbeat
(
SSyncRaft
*
pRaft
)
{
static
void
tickHeartbeat
(
SSyncRaft
*
pRaft
)
{
}
}
// TODO
static
bool
increaseUncommittedSize
(
SSyncRaft
*
pRaft
,
SSyncRaftEntry
*
entries
,
int
n
)
{
return
false
;
}
static
void
appendEntries
(
SSyncRaft
*
pRaft
,
SSyncRaftEntry
*
entries
,
int
n
)
{
static
void
appendEntries
(
SSyncRaft
*
pRaft
,
SSyncRaftEntry
*
entries
,
int
n
)
{
SyncIndex
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
);
SyncIndex
lastIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
);
SyncTerm
term
=
pRaft
->
term
;
SyncTerm
term
=
pRaft
->
term
;
...
@@ -268,9 +296,16 @@ static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
...
@@ -268,9 +296,16 @@ static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
entries
[
i
].
index
=
lastIndex
+
1
+
i
;
entries
[
i
].
index
=
lastIndex
+
1
+
i
;
}
}
// Track the size of this uncommitted proposal.
if
(
!
increaseUncommittedSize
(
pRaft
,
entries
,
n
))
{
// Drop the proposal.
return
;
}
syncRaftLogAppend
(
pRaft
->
log
,
entries
,
n
);
syncRaftLogAppend
(
pRaft
->
log
,
entries
,
n
);
SSyncRaftProgress
*
progress
=
&
(
pRaft
->
tracker
->
progressMap
.
progress
[
pRaft
->
cluster
.
selfIndex
]);
SSyncRaftProgress
*
progress
=
syncRaftFindProgressByNodeId
(
&
pRaft
->
tracker
->
progressMap
,
pRaft
->
selfId
);
assert
(
progress
!=
NULL
);
syncRaftProgressMaybeUpdate
(
progress
,
lastIndex
);
syncRaftProgressMaybeUpdate
(
progress
,
lastIndex
);
// Regardless of syncRaftMaybeCommit's return, our caller will call bcastAppend.
// Regardless of syncRaftMaybeCommit's return, our caller will call bcastAppend.
syncRaftMaybeCommit
(
pRaft
);
syncRaftMaybeCommit
(
pRaft
);
...
@@ -297,7 +332,7 @@ static int triggerAll(SSyncRaft* pRaft) {
...
@@ -297,7 +332,7 @@ static int triggerAll(SSyncRaft* pRaft) {
continue;
continue;
}
}
syncRaft
Replicate
(pRaft, pRaft->tracker->progressMap.progress[i], true);
syncRaft
MaybeSendAppend
(pRaft, pRaft->tracker->progressMap.progress[i], true);
}
}
#endif
#endif
return
0
;
return
0
;
...
@@ -307,8 +342,8 @@ static void abortLeaderTransfer(SSyncRaft* pRaft) {
...
@@ -307,8 +342,8 @@ static void abortLeaderTransfer(SSyncRaft* pRaft) {
pRaft
->
leadTransferee
=
SYNC_NON_NODE_ID
;
pRaft
->
leadTransferee
=
SYNC_NON_NODE_ID
;
}
}
static
void
initProgress
(
int
i
,
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
static
void
resetProgress
(
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
syncRaft
InitProgress
(
i
,
(
SSyncRaft
*
)
arg
,
progress
);
syncRaft
ResetProgress
(
(
SSyncRaft
*
)
arg
,
progress
);
}
}
static
void
resetRaft
(
SSyncRaft
*
pRaft
,
SyncTerm
term
)
{
static
void
resetRaft
(
SSyncRaft
*
pRaft
,
SyncTerm
term
)
{
...
@@ -327,7 +362,7 @@ static void resetRaft(SSyncRaft* pRaft, SyncTerm term) {
...
@@ -327,7 +362,7 @@ static void resetRaft(SSyncRaft* pRaft, SyncTerm term) {
abortLeaderTransfer
(
pRaft
);
abortLeaderTransfer
(
pRaft
);
syncRaftResetVotes
(
pRaft
->
tracker
);
syncRaftResetVotes
(
pRaft
->
tracker
);
syncRaftProgressVisit
(
pRaft
->
tracker
,
ini
tProgress
,
pRaft
);
syncRaftProgressVisit
(
pRaft
->
tracker
,
rese
tProgress
,
pRaft
);
pRaft
->
pendingConfigIndex
=
0
;
pRaft
->
pendingConfigIndex
=
0
;
pRaft
->
uncommittedSize
=
0
;
pRaft
->
uncommittedSize
=
0
;
...
...
source/libs/sync/src/sync_raft_inflights.c
浏览文件 @
9188298e
...
@@ -40,19 +40,16 @@ void syncRaftCloseInflights(SSyncRaftInflights* inflights) {
...
@@ -40,19 +40,16 @@ void syncRaftCloseInflights(SSyncRaftInflights* inflights) {
free
(
inflights
);
free
(
inflights
);
}
}
/**
// Add notifies the Inflights that a new message with the given index is being
* syncRaftInflightAdd notifies the Inflights that a new message with the given index is being
// dispatched. Full() must be called prior to Add() to verify that there is room
* dispatched. syncRaftInflightFull() must be called prior to syncRaftInflightAdd()
// for one more message, and consecutive calls to add Add() must provide a
* to verify that there is room for one more message,
// monotonic sequence of indexes.
* and consecutive calls to add syncRaftInflightAdd() must provide a
* monotonic sequence of indexes.
**/
void
syncRaftInflightAdd
(
SSyncRaftInflights
*
inflights
,
SyncIndex
inflightIndex
)
{
void
syncRaftInflightAdd
(
SSyncRaftInflights
*
inflights
,
SyncIndex
inflightIndex
)
{
assert
(
!
syncRaftInflightFull
(
inflights
));
assert
(
!
syncRaftInflightFull
(
inflights
));
int
next
=
inflights
->
start
+
inflights
->
count
;
int
next
=
inflights
->
start
+
inflights
->
count
;
int
size
=
inflights
->
size
;
int
size
=
inflights
->
size
;
/* is next wrapped around buffer? */
if
(
next
>=
size
)
{
if
(
next
>=
size
)
{
next
-=
size
;
next
-=
size
;
}
}
...
@@ -61,12 +58,10 @@ void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex)
...
@@ -61,12 +58,10 @@ void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex)
inflights
->
count
++
;
inflights
->
count
++
;
}
}
/**
// FreeLE frees the inflights smaller or equal to the given `to` flight.
* syncRaftInflightFreeLE frees the inflights smaller or equal to the given `to` flight.
**/
void
syncRaftInflightFreeLE
(
SSyncRaftInflights
*
inflights
,
SyncIndex
toIndex
)
{
void
syncRaftInflightFreeLE
(
SSyncRaftInflights
*
inflights
,
SyncIndex
toIndex
)
{
if
(
inflights
->
count
==
0
||
toIndex
<
inflights
->
buffer
[
inflights
->
start
])
{
if
(
inflights
->
count
==
0
||
toIndex
<
inflights
->
buffer
[
inflights
->
start
])
{
/
* out of the left side of the window */
/
/ out of the left side of the window
return
;
return
;
}
}
...
@@ -95,10 +90,8 @@ void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) {
...
@@ -95,10 +90,8 @@ void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) {
}
}
}
}
/**
// FreeFirstOne releases the first inflight. This is a no-op if nothing is
* syncRaftInflightFreeFirstOne releases the first inflight.
// inflight.
* This is a no-op if nothing is inflight.
**/
void
syncRaftInflightFreeFirstOne
(
SSyncRaftInflights
*
inflights
)
{
void
syncRaftInflightFreeFirstOne
(
SSyncRaftInflights
*
inflights
)
{
syncRaftInflightFreeLE
(
inflights
,
inflights
->
buffer
[
inflights
->
start
]);
syncRaftInflightFreeLE
(
inflights
,
inflights
->
buffer
[
inflights
->
start
]);
}
}
source/libs/sync/src/sync_raft_node_map.c
0 → 100644
浏览文件 @
9188298e
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "sync_raft_node_map.h"
#include "sync_type.h"
#include "sync_raft_progress.h"
void
syncRaftInitNodeMap
(
SSyncRaftNodeMap
*
nodeMap
)
{
nodeMap
->
nodeIdMap
=
taosHashInit
(
TSDB_MAX_REPLICA
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_INT
),
true
,
HASH_ENTRY_LOCK
);
}
void
syncRaftFreeNodeMap
(
SSyncRaftNodeMap
*
nodeMap
)
{
taosHashCleanup
(
nodeMap
->
nodeIdMap
);
}
void
syncRaftClearNodeMap
(
SSyncRaftNodeMap
*
nodeMap
)
{
taosHashClear
(
nodeMap
->
nodeIdMap
);
}
bool
syncRaftIsInNodeMap
(
const
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
)
{
SyncNodeId
**
ppId
=
(
SyncNodeId
**
)
taosHashGet
(
nodeMap
->
nodeIdMap
,
&
nodeId
,
sizeof
(
SyncNodeId
*
));
if
(
ppId
==
NULL
)
{
return
false
;
}
return
true
;
}
void
syncRaftCopyNodeMap
(
SSyncRaftNodeMap
*
from
,
SSyncRaftNodeMap
*
to
)
{
SyncNodeId
*
pId
=
NULL
;
while
(
!
syncRaftIterateNodeMap
(
from
,
pId
))
{
taosHashPut
(
to
->
nodeIdMap
,
&
pId
,
sizeof
(
SyncNodeId
*
),
&
pId
,
sizeof
(
SyncNodeId
*
));
}
}
bool
syncRaftIterateNodeMap
(
const
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
*
pId
)
{
SyncNodeId
**
ppId
=
taosHashIterate
(
nodeMap
->
nodeIdMap
,
pId
);
if
(
ppId
==
NULL
)
{
return
true
;
}
*
pId
=
*
(
*
ppId
);
return
false
;
}
bool
syncRaftIsAllNodeInProgressMap
(
SSyncRaftNodeMap
*
nodeMap
,
SSyncRaftProgressMap
*
progressMap
)
{
SyncNodeId
*
pId
=
NULL
;
while
(
!
syncRaftIterateNodeMap
(
nodeMap
,
pId
))
{
if
(
!
syncRaftIsInProgressMap
(
progressMap
,
*
pId
))
{
return
false
;
}
}
return
true
;
}
void
syncRaftUnionNodeMap
(
SSyncRaftNodeMap
*
nodeMap
,
SSyncRaftNodeMap
*
to
)
{
syncRaftCopyNodeMap
(
nodeMap
,
to
);
}
void
syncRaftAddToNodeMap
(
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
)
{
taosHashPut
(
nodeMap
->
nodeIdMap
,
&
nodeId
,
sizeof
(
SyncNodeId
*
),
&
nodeId
,
sizeof
(
SyncNodeId
*
));
}
void
syncRaftRemoveFromNodeMap
(
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
)
{
taosHashRemove
(
nodeMap
->
nodeIdMap
,
&
nodeId
,
sizeof
(
SyncNodeId
*
));
}
int32_t
syncRaftNodeMapSize
(
const
SSyncRaftNodeMap
*
nodeMap
)
{
return
taosHashGetSize
(
nodeMap
->
nodeIdMap
);
}
\ No newline at end of file
source/libs/sync/src/sync_raft_progress.c
浏览文件 @
9188298e
...
@@ -20,18 +20,26 @@
...
@@ -20,18 +20,26 @@
#include "sync.h"
#include "sync.h"
#include "syncInt.h"
#include "syncInt.h"
static
void
copyProgress
(
SSyncRaftProgress
*
progress
,
void
*
arg
);
static
void
refProgress
(
SSyncRaftProgress
*
progress
);
static
void
unrefProgress
(
SSyncRaftProgress
*
progress
,
void
*
);
static
void
resetProgressState
(
SSyncRaftProgress
*
progress
,
ESyncRaftProgressState
state
);
static
void
resetProgressState
(
SSyncRaftProgress
*
progress
,
ESyncRaftProgressState
state
);
static
void
probeAcked
(
SSyncRaftProgress
*
progress
);
static
void
probeAcked
(
SSyncRaftProgress
*
progress
);
static
void
resumeProgress
(
SSyncRaftProgress
*
progress
);
static
void
resumeProgress
(
SSyncRaftProgress
*
progress
);
void
syncRaftInitProgress
(
int
i
,
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
)
{
void
syncRaftResetProgress
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
)
{
if
(
progress
->
inflights
)
{
syncRaftCloseInflights
(
progress
->
inflights
);
}
SSyncRaftInflights
*
inflights
=
syncRaftOpenInflights
(
pRaft
->
tracker
->
maxInflightMsgs
);
SSyncRaftInflights
*
inflights
=
syncRaftOpenInflights
(
pRaft
->
tracker
->
maxInflightMsgs
);
if
(
inflights
==
NULL
)
{
if
(
inflights
==
NULL
)
{
return
;
return
;
}
}
*
progress
=
(
SSyncRaftProgress
)
{
*
progress
=
(
SSyncRaftProgress
)
{
.
matchIndex
=
i
==
pRaft
->
selfIndex
?
syncRaftLogLastIndex
(
pRaft
->
log
)
:
0
,
.
matchIndex
=
progress
->
id
==
pRaft
->
selfId
?
syncRaftLogLastIndex
(
pRaft
->
log
)
:
0
,
.
nextIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
)
+
1
,
.
nextIndex
=
syncRaftLogLastIndex
(
pRaft
->
log
)
+
1
,
.
inflights
=
inflights
,
.
inflights
=
inflights
,
.
isLearner
=
false
,
.
isLearner
=
false
,
...
@@ -39,11 +47,9 @@ void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress)
...
@@ -39,11 +47,9 @@ void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress)
};
};
}
}
/**
// MaybeUpdate is called when an MsgAppResp arrives from the follower, with the
* syncRaftProgressMaybeUpdate is called when an MsgAppResp arrives from the follower, with the
// index acked by it. The method returns false if the given n index comes from
* index acked by it. The method returns false if the given n index comes from
// an outdated message. Otherwise it updates the progress and returns true.
* an outdated message. Otherwise it updates the progress and returns true.
**/
bool
syncRaftProgressMaybeUpdate
(
SSyncRaftProgress
*
progress
,
SyncIndex
lastIndex
)
{
bool
syncRaftProgressMaybeUpdate
(
SSyncRaftProgress
*
progress
,
SyncIndex
lastIndex
)
{
bool
updated
=
false
;
bool
updated
=
false
;
...
@@ -58,27 +64,36 @@ bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastInde
...
@@ -58,27 +64,36 @@ bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastInde
return
updated
;
return
updated
;
}
}
// MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The
// arguments are the index of the append message rejected by the follower, and
// the hint that we want to decrease to.
//
// Rejections can happen spuriously as messages are sent out of order or
// duplicated. In such cases, the rejection pertains to an index that the
// Progress already knows were previously acknowledged, and false is returned
// without changing the Progress.
//
// If the rejection is genuine, Next is lowered sensibly, and the Progress is
// cleared for sending log entries.
bool
syncRaftProgressMaybeDecrTo
(
SSyncRaftProgress
*
progress
,
bool
syncRaftProgressMaybeDecrTo
(
SSyncRaftProgress
*
progress
,
SyncIndex
rejected
,
SyncIndex
matchHint
)
{
SyncIndex
rejected
,
SyncIndex
matchHint
)
{
if
(
progress
->
state
==
PROGRESS_STATE_REPLICATE
)
{
if
(
progress
->
state
==
PROGRESS_STATE_REPLICATE
)
{
/**
// The rejection must be stale if the progress has matched and "rejected"
* the rejection must be stale if the progress has matched and "rejected"
// is smaller than "match".
* is smaller than "match".
**/
if
(
rejected
<=
progress
->
matchIndex
)
{
if
(
rejected
<=
progress
->
matchIndex
)
{
syncDebug
(
"match index is up to date,ignore"
);
syncDebug
(
"match index is up to date,ignore"
);
return
false
;
return
false
;
}
}
/* directly decrease next to match + 1 */
// Directly decrease next to match + 1.
//
// TODO(tbg): why not use matchHint if it's larger?
progress
->
nextIndex
=
progress
->
matchIndex
+
1
;
progress
->
nextIndex
=
progress
->
matchIndex
+
1
;
return
true
;
return
true
;
}
}
/**
// The rejection must be stale if "rejected" does not match next - 1. This
* The rejection must be stale if "rejected" does not match next - 1. This
// is because non-replicating followers are probed one entry at a time.
* is because non-replicating followers are probed one entry at a time.
**/
if
(
rejected
!=
progress
->
nextIndex
-
1
)
{
if
(
rejected
!=
progress
->
nextIndex
-
1
)
{
syncDebug
(
"rejected index %"
PRId64
" different from next index %"
PRId64
" -> ignore"
syncDebug
(
"rejected index %"
PRId64
" different from next index %"
PRId64
" -> ignore"
,
rejected
,
progress
->
nextIndex
);
,
rejected
,
progress
->
nextIndex
);
...
@@ -91,14 +106,12 @@ bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress,
...
@@ -91,14 +106,12 @@ bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress,
return
true
;
return
true
;
}
}
/**
// IsPaused returns whether sending log entries to this node has been throttled.
* syncRaftProgressIsPaused returns whether sending log entries to this node has been throttled.
// This is done when a node has rejected recent MsgApps, is currently waiting
* This is done when a node has rejected recent MsgApps, is currently waiting
// for a snapshot, or has reached the MaxInflightMsgs limit. In normal
* for a snapshot, or has reached the MaxInflightMsgs limit. In normal
// operation, this is false. A throttled node will be contacted less frequently
* operation, this is false. A throttled node will be contacted less frequently
// until it has reached a state in which it's able to accept a steady stream of
* until it has reached a state in which it's able to accept a steady stream of
// log entries again.
* log entries again.
**/
bool
syncRaftProgressIsPaused
(
SSyncRaftProgress
*
progress
)
{
bool
syncRaftProgressIsPaused
(
SSyncRaftProgress
*
progress
)
{
switch
(
progress
->
state
)
{
switch
(
progress
->
state
)
{
case
PROGRESS_STATE_PROBE
:
case
PROGRESS_STATE_PROBE
:
...
@@ -112,58 +125,44 @@ bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) {
...
@@ -112,58 +125,44 @@ bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) {
}
}
}
}
int
syncRaftFindProgressIndexByNodeId
(
const
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
SSyncRaftProgress
*
syncRaftFindProgressByNodeId
(
const
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
int
i
;
SSyncRaftProgress
**
ppProgress
=
(
SSyncRaftProgress
**
)
taosHashGet
(
progressMap
->
progressMap
,
&
id
,
sizeof
(
SyncNodeId
*
));
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
(
ppProgress
==
NULL
)
{
if
(
progressMap
->
progress
[
i
].
id
==
id
)
{
return
NULL
;
return
i
;
}
}
}
return
-
1
;
return
*
ppProgress
;
}
}
int
syncRaftAddToProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
int
syncRaftAddToProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
SSyncRaftProgress
*
progress
)
{
int
i
,
j
;
refProgress
(
progress
);
taosHashPut
(
progressMap
->
progressMap
,
&
progress
->
id
,
sizeof
(
SyncNodeId
*
),
&
progress
,
sizeof
(
SSyncRaftProgress
*
));
}
for
(
i
=
0
,
j
=
-
1
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
void
syncRaftRemoveFromProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
if
(
progressMap
->
progress
[
i
].
id
==
id
)
{
SSyncRaftProgress
**
ppProgress
=
(
SSyncRaftProgress
**
)
taosHashGet
(
progressMap
->
progressMap
,
&
id
,
sizeof
(
SyncNodeId
*
));
return
i
;
if
(
ppProgress
==
NULL
)
{
}
return
;
if
(
j
==
-
1
&&
progressMap
->
progress
[
i
].
id
==
SYNC_NON_NODE_ID
)
{
j
=
i
;
}
}
}
unrefProgress
(
*
ppProgress
,
NULL
);
assert
(
j
!=
-
1
);
taosHashRemove
(
progressMap
->
progressMap
,
&
id
,
sizeof
(
SyncNodeId
*
));
progressMap
->
progress
[
i
].
id
=
id
;
}
}
void
syncRaftRemoveFromProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
bool
syncRaftIsInProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
SyncNodeId
id
)
{
int
i
;
return
taosHashGet
(
progressMap
->
progressMap
,
&
id
,
sizeof
(
SyncNodeId
*
))
!=
NULL
;
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
(
progressMap
->
progress
[
i
].
id
==
id
)
{
progressMap
->
progress
[
i
].
id
=
SYNC_NON_NODE_ID
;
break
;
}
}
}
}
bool
syncRaftProgressIsUptodate
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
)
{
bool
syncRaftProgressIsUptodate
(
SSyncRaft
*
pRaft
,
SSyncRaftProgress
*
progress
)
{
return
syncRaftLogLastIndex
(
pRaft
->
log
)
+
1
==
progress
->
nextIndex
;
return
syncRaftLogLastIndex
(
pRaft
->
log
)
+
1
==
progress
->
nextIndex
;
}
}
/**
// BecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
* syncRaftProgressBecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
// optionally and if larger, the index of the pending snapshot.
* optionally and if larger, the index of the pending snapshot.
**/
void
syncRaftProgressBecomeProbe
(
SSyncRaftProgress
*
progress
)
{
void
syncRaftProgressBecomeProbe
(
SSyncRaftProgress
*
progress
)
{
/**
// If the original state is StateSnapshot, progress knows that
* If the original state is ProgressStateSnapshot, progress knows that
// the pending snapshot has been sent to this peer successfully, then
* the pending snapshot has been sent to this peer successfully, then
// probes from pendingSnapshot + 1.
* probes from pendingSnapshot + 1.
**/
if
(
progress
->
state
==
PROGRESS_STATE_SNAPSHOT
)
{
if
(
progress
->
state
==
PROGRESS_STATE_SNAPSHOT
)
{
SyncIndex
pendingSnapshotIndex
=
progress
->
pendingSnapshotIndex
;
SyncIndex
pendingSnapshotIndex
=
progress
->
pendingSnapshotIndex
;
resetProgressState
(
progress
,
PROGRESS_STATE_PROBE
);
resetProgressState
(
progress
,
PROGRESS_STATE_PROBE
);
...
@@ -174,111 +173,88 @@ void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) {
...
@@ -174,111 +173,88 @@ void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) {
}
}
}
}
/**
// BecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
* syncRaftProgressBecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
**/
void
syncRaftProgressBecomeReplicate
(
SSyncRaftProgress
*
progress
)
{
void
syncRaftProgressBecomeReplicate
(
SSyncRaftProgress
*
progress
)
{
resetProgressState
(
progress
,
PROGRESS_STATE_REPLICATE
);
resetProgressState
(
progress
,
PROGRESS_STATE_REPLICATE
);
progress
->
nextIndex
=
progress
->
matchIndex
+
1
;
progress
->
nextIndex
=
progress
->
matchIndex
+
1
;
}
}
// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending
// snapshot index.
void
syncRaftProgressBecomeSnapshot
(
SSyncRaftProgress
*
progress
,
SyncIndex
snapshotIndex
)
{
void
syncRaftProgressBecomeSnapshot
(
SSyncRaftProgress
*
progress
,
SyncIndex
snapshotIndex
)
{
resetProgressState
(
progress
,
PROGRESS_STATE_SNAPSHOT
);
resetProgressState
(
progress
,
PROGRESS_STATE_SNAPSHOT
);
progress
->
pendingSnapshotIndex
=
snapshotIndex
;
progress
->
pendingSnapshotIndex
=
snapshotIndex
;
}
}
void
syncRaftCopyProgress
(
const
SSyncRaftProgress
*
progress
,
SSyncRaftProgress
*
out
)
{
void
syncRaftCopyProgress
(
const
SSyncRaftProgress
*
progress
,
SSyncRaftProgress
*
out
)
{
memcpy
(
out
,
progress
,
sizeof
(
SSyncRaftProgress
));
}
/**
* ResetState moves the Progress into the specified State, resetting ProbeSent,
* PendingSnapshot, and Inflights.
**/
static
void
resetProgressState
(
SSyncRaftProgress
*
progress
,
ESyncRaftProgressState
state
)
{
progress
->
probeSent
=
false
;
progress
->
pendingSnapshotIndex
=
0
;
progress
->
state
=
state
;
syncRaftInflightReset
(
progress
->
inflights
);
}
}
/**
void
syncRaftInitProgressMap
(
SSyncRaftProgressMap
*
progressMap
)
{
* probeAcked is called when this peer has accepted an append. It resets
progressMap
->
progressMap
=
taosHashInit
(
TSDB_MAX_REPLICA
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_INT
),
true
,
HASH_ENTRY_LOCK
);
* ProbeSent to signal that additional append messages should be sent without
* further delay.
**/
static
void
probeAcked
(
SSyncRaftProgress
*
progress
)
{
progress
->
probeSent
=
false
;
}
}
#if 0
void
syncRaftFreeProgressMap
(
SSyncRaftProgressMap
*
progressMap
)
{
syncRaftVisitProgressMap
(
progressMap
,
unrefProgress
,
NULL
);
SyncIndex syncRaftProgressNextIndex(SSyncRaft* pRaft, int i) {
taosHashCleanup
(
progressMap
->
progressMap
);
return pRaft->leaderState.progress[i].nextIndex;
}
}
SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i
) {
void
syncRaftClearProgressMap
(
SSyncRaftProgressMap
*
progressMap
)
{
return pRaft->leaderState.progress[i].matchIndex
;
taosHashClear
(
progressMap
->
progressMap
)
;
}
}
void syncRaft
ProgressUpdateLastSend(SSyncRaft* pRaft, int i
) {
void
syncRaft
CopyProgressMap
(
SSyncRaftProgressMap
*
from
,
SSyncRaftProgressMap
*
to
)
{
pRaft->leaderState.progress[i].lastSend = pRaft->io.time(pRaft
);
syncRaftVisitProgressMap
(
from
,
copyProgress
,
to
);
}
}
void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i) {
bool
syncRaftIterateProgressMap
(
const
SSyncRaftProgressMap
*
progressMap
,
SSyncRaftProgress
*
pProgress
)
{
pRaft->leaderState.progress[i].lastSendSnapshot = pRaft->io.time(pRaft);
SSyncRaftProgress
**
ppProgress
=
taosHashIterate
(
progressMap
->
progressMap
,
pProgress
);
}
if
(
ppProgress
==
NULL
)
{
return
true
;
}
bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i) {
*
pProgress
=
*
(
*
ppProgress
);
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]);
return
false
;
bool prev = progress->recentRecv;
progress->recentRecv = false;
return prev;
}
}
void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i) {
bool
syncRaftVisitProgressMap
(
SSyncRaftProgressMap
*
progressMap
,
visitProgressFp
fp
,
void
*
arg
)
{
pRaft->leaderState.progress[i].recentRecv = true;
SSyncRaftProgress
*
pProgress
;
while
(
!
syncRaftIterateProgressMap
(
progressMap
,
pProgress
))
{
fp
(
pProgress
,
arg
);
}
}
}
bool syncRaftProgressGetRecentRecv(SSyncRaft* pRaft, int i) {
static
void
copyProgress
(
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
return pRaft->leaderState.progress[i].recentRecv;
assert
(
progress
->
refCount
>
0
);
SSyncRaftProgressMap
*
to
=
(
SSyncRaftProgressMap
*
)
arg
;
syncRaftAddToProgressMap
(
to
,
progress
);
}
}
void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i) {
static
void
refProgress
(
SSyncRaftProgress
*
progress
)
{
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]);
progress
->
refCount
+=
1
;
resetProgressState(progress, PROGRESS_STATE_SNAPSHOT);
progress->pendingSnapshotIndex = raftLogSnapshotIndex(pRaft->log);
}
}
void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i) {
static
void
unrefProgress
(
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]);
(
void
)
arg
;
progress
->
refCount
-=
1
;
if (progress->state == PROGRESS_STATE_SNAPSHOT) {
assert
(
progress
->
refCount
>=
0
);
assert(progress->pendingSnapshotIndex > 0);
if
(
progress
->
refCount
==
0
)
{
SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex;
free
(
progress
);
resetProgressState(progress, PROGRESS_STATE_PROBE);
progress->nextIndex = max(progress->matchIndex + 1, pendingSnapshotIndex);
} else {
resetProgressState(progress, PROGRESS_STATE_PROBE);
progress->nextIndex = progress->matchIndex + 1;
}
}
}
}
void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i) {
// ResetState moves the Progress into the specified State, resetting ProbeSent,
resetProgressState(pRaft->leaderState.progress, PROGRESS_STATE_REPLICATE);
// PendingSnapshot, and Inflights.
pRaft->leaderState.progress->nextIndex = pRaft->leaderState.progress->matchIndex + 1;
static
void
resetProgressState
(
SSyncRaftProgress
*
progress
,
ESyncRaftProgressState
state
)
{
}
progress
->
probeSent
=
false
;
void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i) {
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]);
progress
->
pendingSnapshotIndex
=
0
;
progress
->
pendingSnapshotIndex
=
0
;
progress->state = PROGRESS_STATE_PROBE;
progress
->
state
=
state
;
syncRaftInflightReset
(
progress
->
inflights
);
}
}
ESyncRaftProgressState syncRaftProgressState(SSyncRaft* pRaft, int i) {
// ProbeAcked is called when this peer has accepted an append. It resets
return pRaft->leaderState.progress[i].state;
// ProbeSent to signal that additional append messages should be sent without
// further delay.
static
void
probeAcked
(
SSyncRaftProgress
*
progress
)
{
progress
->
probeSent
=
false
;
}
}
#endif
\ No newline at end of file
source/libs/sync/src/sync_raft_progress_tracker.c
浏览文件 @
9188298e
...
@@ -13,62 +13,99 @@
...
@@ -13,62 +13,99 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
*/
#include "raft.h"
#include "sync_const.h"
#include "sync_raft_progress_tracker.h"
#include "sync_raft_progress_tracker.h"
#include "sync_raft_proto.h"
#include "sync_raft_proto.h"
SSyncRaftProgressTracker
*
syncRaftOpenProgressTracker
()
{
SSyncRaftProgressTracker
*
syncRaftOpenProgressTracker
(
SSyncRaft
*
pRaft
)
{
SSyncRaftProgressTracker
*
tracker
=
(
SSyncRaftProgressTracker
*
)
malloc
(
sizeof
(
SSyncRaftProgressTracker
));
SSyncRaftProgressTracker
*
tracker
=
(
SSyncRaftProgressTracker
*
)
malloc
(
sizeof
(
SSyncRaftProgressTracker
));
if
(
tracker
==
NULL
)
{
if
(
tracker
==
NULL
)
{
return
NULL
;
return
NULL
;
}
}
tracker
->
votesMap
=
taosHashInit
(
TSDB_MAX_REPLICA
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_INT
),
true
,
HASH_ENTRY_LOCK
);
syncRaftInitTrackConfig
(
&
tracker
->
config
);
tracker
->
pRaft
=
pRaft
;
tracker
->
maxInflightMsgs
=
kSyncRaftMaxInflghtMsgs
;
return
tracker
;
return
tracker
;
}
}
void
syncRaftInitTrackConfig
(
SSyncRaftProgressTrackerConfig
*
config
)
{
syncRaftInitNodeMap
(
&
config
->
learners
);
syncRaftInitNodeMap
(
&
config
->
learnersNext
);
syncRaftInitQuorumJointConfig
(
&
config
->
voters
);
config
->
autoLeave
=
false
;
}
void
syncRaftFreeTrackConfig
(
SSyncRaftProgressTrackerConfig
*
config
)
{
syncRaftFreeNodeMap
(
&
config
->
learners
);
syncRaftFreeNodeMap
(
&
config
->
learnersNext
);
syncRaftFreeNodeMap
(
&
config
->
voters
.
incoming
);
syncRaftFreeNodeMap
(
&
config
->
voters
.
outgoing
);
}
// ResetVotes prepares for a new round of vote counting via recordVote.
void
syncRaftResetVotes
(
SSyncRaftProgressTracker
*
tracker
)
{
void
syncRaftResetVotes
(
SSyncRaftProgressTracker
*
tracker
)
{
memset
(
tracker
->
votes
,
SYNC_RAFT_VOTE_RESP_UNKNOWN
,
sizeof
(
ESyncRaftVoteType
)
*
TSDB_MAX_REPLICA
);
taosHashClear
(
tracker
->
votesMap
);
}
}
void
syncRaftProgressVisit
(
SSyncRaftProgressTracker
*
tracker
,
visitProgressFp
visit
,
void
*
arg
)
{
void
syncRaftProgressVisit
(
SSyncRaftProgressTracker
*
tracker
,
visitProgressFp
visit
,
void
*
arg
)
{
int
i
;
syncRaftVisitProgressMap
(
&
tracker
->
progressMap
,
visit
,
arg
);
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
SSyncRaftProgress
*
progress
=
&
(
tracker
->
progressMap
.
progress
[
i
]);
visit
(
i
,
progress
,
arg
);
}
}
}
void
syncRaftRecordVote
(
SSyncRaftProgressTracker
*
tracker
,
int
i
,
bool
grant
)
{
// RecordVote records that the node with the given id voted for this Raft
if
(
tracker
->
votes
[
i
]
!=
SYNC_RAFT_VOTE_RESP_UNKNOWN
)
{
// instance if v == true (and declined it otherwise).
void
syncRaftRecordVote
(
SSyncRaftProgressTracker
*
tracker
,
SyncNodeId
id
,
bool
grant
)
{
ESyncRaftVoteType
*
pType
=
taosHashGet
(
tracker
->
votesMap
,
&
id
,
sizeof
(
SyncNodeId
*
));
if
(
pType
!=
NULL
)
{
return
;
return
;
}
}
t
racker
->
votes
[
i
]
=
grant
?
SYNC_RAFT_VOTE_RESP_GRANT
:
SYNC_RAFT_VOTE_RESP_REJECT
;
t
aosHashPut
(
tracker
->
votesMap
,
&
id
,
sizeof
(
SyncNodeId
),
&
grant
,
sizeof
(
bool
*
))
;
}
}
void
syncRaftCloneTrackerConfig
(
const
SSyncRaftProgressTrackerConfig
*
from
,
SSyncRaftProgressTrackerConfig
*
to
)
{
void
syncRaftCopyTrackerConfig
(
const
SSyncRaftProgressTrackerConfig
*
from
,
SSyncRaftProgressTrackerConfig
*
to
)
{
memcpy
(
to
,
from
,
sizeof
(
SSyncRaftProgressTrackerConfig
));
}
int
syncRaftCheckTrackerConfigInProgress
(
SSyncRaftProgressTrackerConfig
*
config
,
SSyncRaftProgressMap
*
progressMap
)
{
// NB: intentionally allow the empty config. In production we'll never see a
// non-empty config (we prevent it from being created) but we will need to
// be able to *create* an initial config, for example during bootstrap (or
// during tests). Instead of having to hand-code this, we allow
// transitioning from an empty config into any other legal and non-empty
// config.
if
(
!
syncRaftIsAllNodeInProgressMap
(
&
config
->
voters
.
incoming
,
progressMap
))
return
-
1
;
if
(
!
syncRaftIsAllNodeInProgressMap
(
&
config
->
voters
.
outgoing
,
progressMap
))
return
-
1
;
if
(
!
syncRaftIsAllNodeInProgressMap
(
&
config
->
learners
,
progressMap
))
return
-
1
;
if
(
!
syncRaftIsAllNodeInProgressMap
(
&
config
->
learnersNext
,
progressMap
))
return
-
1
;
return
0
;
}
}
/**
// TallyVotes returns the number of granted and rejected Votes, and whether the
* syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the
// election outcome is known.
* election outcome is known.
**/
ESyncRaftVoteResult
syncRaftTallyVotes
(
SSyncRaftProgressTracker
*
tracker
,
int
*
rejected
,
int
*
granted
)
{
ESyncRaftVoteResult
syncRaftTallyVotes
(
SSyncRaftProgressTracker
*
tracker
,
int
*
rejected
,
int
*
granted
)
{
int
i
;
SSyncRaftProgress
*
progress
=
NULL
;
SSyncRaftProgress
*
progress
;
int
r
,
g
;
int
r
,
g
;
for
(
i
=
0
,
r
=
0
,
g
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
// Make sure to populate granted/rejected correctly even if the Votes slice
progress
=
&
(
tracker
->
progressMap
.
progress
[
i
]);
// contains members no longer part of the configuration. This doesn't really
// matter in the way the numbers are used (they're informational), but might
// as well get it right.
while
(
!
syncRaftIterateProgressMap
(
&
tracker
->
progressMap
,
progress
))
{
if
(
progress
->
id
==
SYNC_NON_NODE_ID
)
{
if
(
progress
->
id
==
SYNC_NON_NODE_ID
)
{
continue
;
continue
;
}
}
if
(
tracker
->
votes
[
i
]
==
SYNC_RAFT_VOTE_RESP_UNKNOWN
)
{
bool
*
v
=
taosHashGet
(
tracker
->
votesMap
,
&
progress
->
id
,
sizeof
(
SyncNodeId
*
));
if
(
v
==
NULL
)
{
continue
;
continue
;
}
}
if
(
tracker
->
votes
[
i
]
==
SYNC_RAFT_VOTE_RESP_GRANT
)
{
if
(
*
v
)
{
g
++
;
g
++
;
}
else
{
}
else
{
r
++
;
r
++
;
...
@@ -77,12 +114,43 @@ ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* r
...
@@ -77,12 +114,43 @@ ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* r
if
(
rejected
)
*
rejected
=
r
;
if
(
rejected
)
*
rejected
=
r
;
if
(
granted
)
*
granted
=
g
;
if
(
granted
)
*
granted
=
g
;
return
syncRaftVoteResult
(
&
(
tracker
->
config
.
voters
),
tracker
->
votes
);
return
syncRaftVoteResult
(
&
(
tracker
->
config
.
voters
),
tracker
->
votesMap
);
}
void
syncRaftConfigState
(
SSyncRaftProgressTracker
*
tracker
,
SSyncConfigState
*
cs
)
{
syncRaftCopyNodeMap
(
&
tracker
->
config
.
voters
.
incoming
,
&
cs
->
voters
);
syncRaftCopyNodeMap
(
&
tracker
->
config
.
voters
.
outgoing
,
&
cs
->
votersOutgoing
);
syncRaftCopyNodeMap
(
&
tracker
->
config
.
learners
,
&
cs
->
learners
);
syncRaftCopyNodeMap
(
&
tracker
->
config
.
learnersNext
,
&
cs
->
learnersNext
);
cs
->
autoLeave
=
tracker
->
config
.
autoLeave
;
}
}
void
syncRaftConfigState
(
const
SSyncRaftProgressTracker
*
tracker
,
SSyncConfigState
*
cs
)
{
static
void
matchAckIndexer
(
SyncNodeId
id
,
void
*
arg
,
SyncIndex
*
index
)
{
memcpy
(
&
cs
->
voters
,
&
tracker
->
config
.
voters
.
incoming
,
sizeof
(
SSyncRaftNodeMap
));
SSyncRaftProgressTracker
*
tracker
=
(
SSyncRaftProgressTracker
*
)
arg
;
memcpy
(
&
cs
->
votersOutgoing
,
&
tracker
->
config
.
voters
.
outgoing
,
sizeof
(
SSyncRaftNodeMap
));
SSyncRaftProgress
*
progress
=
syncRaftFindProgressByNodeId
(
&
tracker
->
progressMap
,
id
);
memcpy
(
&
cs
->
learners
,
&
tracker
->
config
.
learners
,
sizeof
(
SSyncRaftNodeMap
));
if
(
progress
==
NULL
)
{
memcpy
(
&
cs
->
learnersNext
,
&
tracker
->
config
.
learnersNext
,
sizeof
(
SSyncRaftNodeMap
));
*
index
=
0
;
return
;
}
*
index
=
progress
->
matchIndex
;
}
// Committed returns the largest log index known to be committed based on what
// the voting members of the group have acknowledged.
SyncIndex
syncRaftCommittedIndex
(
SSyncRaftProgressTracker
*
tracker
)
{
return
syncRaftJointConfigCommittedIndex
(
&
tracker
->
config
.
voters
,
matchAckIndexer
,
tracker
);
}
static
void
visitProgressActive
(
SSyncRaftProgress
*
progress
,
void
*
arg
)
{
SHashObj
*
votesMap
=
(
SHashObj
*
)
arg
;
taosHashPut
(
votesMap
,
&
progress
->
id
,
sizeof
(
SyncNodeId
),
&
progress
->
recentActive
,
sizeof
(
bool
));
}
// QuorumActive returns true if the quorum is active from the view of the local
// raft state machine. Otherwise, it returns false.
bool
syncRaftQuorumActive
(
SSyncRaftProgressTracker
*
tracker
)
{
SHashObj
*
votesMap
=
taosHashInit
(
TSDB_MAX_REPLICA
,
taosGetDefaultHashFunction
(
TSDB_DATA_TYPE_INT
),
true
,
HASH_ENTRY_LOCK
);
syncRaftVisitProgressMap
(
&
tracker
->
progressMap
,
visitProgressActive
,
votesMap
);
return
syncRaftVoteResult
(
&
tracker
->
config
.
voters
,
votesMap
)
==
SYNC_RAFT_VOTE_WON
;
}
}
\ No newline at end of file
source/libs/sync/src/sync_raft_quorum_joint.c
浏览文件 @
9188298e
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
*/
#include "sync_raft_node_map.h"
#include "sync_raft_quorum_majority.h"
#include "sync_raft_quorum_majority.h"
#include "sync_raft_quorum_joint.h"
#include "sync_raft_quorum_joint.h"
#include "sync_raft_quorum.h"
#include "sync_raft_quorum.h"
...
@@ -22,9 +23,9 @@
...
@@ -22,9 +23,9 @@
* a result indicating whether the vote is pending, lost, or won. A joint quorum
* a result indicating whether the vote is pending, lost, or won. A joint quorum
* requires both majority quorums to vote in favor.
* requires both majority quorums to vote in favor.
**/
**/
ESyncRaftVoteType
syncRaftVoteResult
(
SSyncRaftQuorumJointConfig
*
config
,
const
ESyncRaftVoteType
*
votes
)
{
ESyncRaftVoteType
syncRaftVoteResult
(
SSyncRaftQuorumJointConfig
*
config
,
SHashObj
*
votesMap
)
{
ESyncRaftVoteResult
r1
=
syncRaftMajorityVoteResult
(
&
(
config
->
incoming
),
votes
);
ESyncRaftVoteResult
r1
=
syncRaftMajorityVoteResult
(
&
(
config
->
incoming
),
votes
Map
);
ESyncRaftVoteResult
r2
=
syncRaftMajorityVoteResult
(
&
(
config
->
outgoing
),
votes
);
ESyncRaftVoteResult
r2
=
syncRaftMajorityVoteResult
(
&
(
config
->
outgoing
),
votes
Map
);
if
(
r1
==
r2
)
{
if
(
r1
==
r2
)
{
// If they agree, return the agreed state.
// If they agree, return the agreed state.
...
@@ -40,46 +41,35 @@ ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const E
...
@@ -40,46 +41,35 @@ ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const E
return
SYNC_RAFT_VOTE_PENDING
;
return
SYNC_RAFT_VOTE_PENDING
;
}
}
void
syncRaftJointConfigAddToIncoming
(
SSyncRaftQuorumJointConfig
*
config
,
SyncNodeId
id
)
{
void
syncRaftInitQuorumJointConfig
(
SSyncRaftQuorumJointConfig
*
config
)
{
int
i
,
min
;
syncRaftInitNodeMap
(
&
config
->
incoming
);
syncRaftInitNodeMap
(
&
config
->
outgoing
);
}
for
(
i
=
0
,
min
=
-
1
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
void
syncRaftFreeQuorumJointConfig
(
SSyncRaftQuorumJointConfig
*
config
)
{
if
(
config
->
incoming
.
nodeId
[
i
]
==
id
)
{
syncRaftFreeNodeMap
(
&
config
->
incoming
);
return
;
syncRaftFreeNodeMap
(
&
config
->
outgoing
);
}
}
if
(
min
==
-
1
&&
config
->
incoming
.
nodeId
[
i
]
==
SYNC_NON_NODE_ID
)
{
min
=
i
;
}
}
assert
(
min
!=
-
1
);
void
syncRaftJointConfigAddToIncoming
(
SSyncRaftQuorumJointConfig
*
config
,
SyncNodeId
id
)
{
config
->
incoming
.
nodeId
[
min
]
=
id
;
syncRaftAddToNodeMap
(
&
config
->
incoming
,
id
);
config
->
incoming
.
replica
+=
1
;
}
}
void
syncRaftJointConfigRemoveFromIncoming
(
SSyncRaftQuorumJointConfig
*
config
,
SyncNodeId
id
)
{
void
syncRaftJointConfigRemoveFromIncoming
(
SSyncRaftQuorumJointConfig
*
config
,
SyncNodeId
id
)
{
int
i
;
syncRaftRemoveFromNodeMap
(
&
config
->
incoming
,
id
);
}
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
void
syncRaftJointConfigIDs
(
SSyncRaftQuorumJointConfig
*
config
,
SSyncRaftNodeMap
*
nodeMap
)
{
if
(
config
->
incoming
.
nodeId
[
i
]
==
id
)
{
syncRaftCopyNodeMap
(
&
config
->
incoming
,
nodeMap
);
config
->
incoming
.
replica
-=
1
;
config
->
incoming
.
nodeId
[
i
]
=
SYNC_NON_NODE_ID
;
break
;
}
}
assert
(
config
->
incoming
.
replica
>=
0
);
syncRaftUnionNodeMap
(
&
config
->
outgoing
,
nodeMap
);
}
}
SyncIndex
syncRaftJointConfigCommittedIndex
(
const
SSyncRaftQuorumJointConfig
*
config
,
matchAckIndexerFp
indexer
,
void
*
arg
)
{
SyncIndex
index0
,
index1
;
bool
syncRaftIsInNodeMap
(
const
SSyncRaftNodeMap
*
nodeMap
,
SyncNodeId
nodeId
)
{
index0
=
syncRaftMajorityConfigCommittedIndex
(
&
config
->
incoming
,
indexer
,
arg
);
int
i
;
index1
=
syncRaftMajorityConfigCommittedIndex
(
&
config
->
outgoing
,
indexer
,
arg
);
for
(
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
++
i
)
{
if
(
nodeId
==
nodeMap
->
nodeId
[
i
])
{
return
true
;
}
}
return
false
;
return
index0
<
index1
?
index0
:
index1
;
}
}
\ No newline at end of file
source/libs/sync/src/sync_raft_quorum_majority.c
浏览文件 @
9188298e
此差异已折叠。
点击以展开。
source/libs/sync/src/sync_raft_restore.c
浏览文件 @
9188298e
此差异已折叠。
点击以展开。
source/libs/tkv/src/tkv.c
浏览文件 @
9188298e
...
@@ -158,6 +158,8 @@ static void tkvInit() {
...
@@ -158,6 +158,8 @@ static void tkvInit() {
#ifdef USE_ROCKSDB
#ifdef USE_ROCKSDB
defaultReadOpts
.
ropts
=
rocksdb_readoptions_create
();
defaultReadOpts
.
ropts
=
rocksdb_readoptions_create
();
defaultWriteOpts
.
wopts
=
rocksdb_writeoptions_create
();
defaultWriteOpts
.
wopts
=
rocksdb_writeoptions_create
();
rocksdb_writeoptions_disable_WAL
(
defaultWriteOpts
.
wopts
,
true
);
#endif
#endif
}
}
...
@@ -166,4 +168,4 @@ static void tkvClear() {
...
@@ -166,4 +168,4 @@ static void tkvClear() {
rocksdb_readoptions_destroy
(
defaultReadOpts
.
ropts
);
rocksdb_readoptions_destroy
(
defaultReadOpts
.
ropts
);
rocksdb_writeoptions_destroy
(
defaultWriteOpts
.
wopts
);
rocksdb_writeoptions_destroy
(
defaultWriteOpts
.
wopts
);
#endif
#endif
}
}
\ No newline at end of file
source/libs/wal/src/wal.c
浏览文件 @
9188298e
...
@@ -19,11 +19,19 @@ int32_t walInit() { return 0; }
...
@@ -19,11 +19,19 @@ int32_t walInit() { return 0; }
void
walCleanUp
()
{}
void
walCleanUp
()
{}
SWal
*
walOpen
(
char
*
path
,
SWalCfg
*
pCfg
)
{
return
NULL
;
}
SWal
*
walOpen
(
char
*
path
,
SWalCfg
*
pCfg
)
{
SWal
*
pWal
=
malloc
(
sizeof
(
SWal
));
if
(
pWal
==
NULL
)
{
return
NULL
;
}
return
pWal
;
}
int32_t
walAlter
(
SWal
*
pWal
,
SWalCfg
*
pCfg
)
{
return
0
;
}
int32_t
walAlter
(
SWal
*
pWal
,
SWalCfg
*
pCfg
)
{
return
0
;
}
void
walClose
(
SWal
*
pWal
)
{}
void
walClose
(
SWal
*
pWal
)
{
if
(
pWal
)
free
(
pWal
);
}
void
walFsync
(
SWal
*
pWal
,
bool
force
)
{}
void
walFsync
(
SWal
*
pWal
,
bool
force
)
{}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录