Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
677a27a0
T
TDengine
项目概览
taosdata
/
TDengine
大约 1 年 前同步成功
通知
1184
Star
22015
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
677a27a0
编写于
1月 31, 2023
作者:
X
Xiaoyu Wang
提交者:
GitHub
1月 31, 2023
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #19690 from taosdata/fix/3.0_merge_main
Fix/3.0_merge_main
上级
3d2b5ea6
2d03fb60
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
1545 addition
and
2008 deletion
+1545
-2008
include/common/tdataformat.h
include/common/tdataformat.h
+10
-4
include/libs/wal/wal.h
include/libs/wal/wal.h
+1
-0
source/common/src/tdataformat.c
source/common/src/tdataformat.c
+0
-4
source/dnode/vnode/src/inc/tsdb.h
source/dnode/vnode/src/inc/tsdb.h
+1
-0
source/dnode/vnode/src/inc/vnodeInt.h
source/dnode/vnode/src/inc/vnodeInt.h
+1
-1
source/dnode/vnode/src/meta/metaQuery.c
source/dnode/vnode/src/meta/metaQuery.c
+2
-3
source/dnode/vnode/src/sma/smaSnapshot.c
source/dnode/vnode/src/sma/smaSnapshot.c
+2
-2
source/dnode/vnode/src/tq/tq.c
source/dnode/vnode/src/tq/tq.c
+6
-1
source/dnode/vnode/src/tsdb/tsdbSnapshot.c
source/dnode/vnode/src/tsdb/tsdbSnapshot.c
+1399
-901
source/dnode/vnode/src/tsdb/tsdbUtil.c
source/dnode/vnode/src/tsdb/tsdbUtil.c
+2
-1
source/dnode/vnode/src/vnd/vnodeSnapshot.c
source/dnode/vnode/src/vnd/vnodeSnapshot.c
+1
-1
source/libs/sync/src/syncAppendEntries.c
source/libs/sync/src/syncAppendEntries.c
+0
-292
source/libs/sync/src/syncAppendEntriesReply.c
source/libs/sync/src/syncAppendEntriesReply.c
+0
-60
source/libs/sync/src/syncCommit.c
source/libs/sync/src/syncCommit.c
+1
-219
source/libs/sync/src/syncElection.c
source/libs/sync/src/syncElection.c
+13
-5
source/libs/sync/src/syncMain.c
source/libs/sync/src/syncMain.c
+31
-352
source/libs/sync/src/syncPipeline.c
source/libs/sync/src/syncPipeline.c
+4
-1
source/libs/sync/src/syncReplication.c
source/libs/sync/src/syncReplication.c
+0
-138
source/libs/wal/src/walMeta.c
source/libs/wal/src/walMeta.c
+38
-1
source/libs/wal/src/walRef.c
source/libs/wal/src/walRef.c
+27
-0
source/os/src/osMath.c
source/os/src/osMath.c
+1
-1
source/util/src/talgo.c
source/util/src/talgo.c
+0
-16
tests/parallel_test/container_build.sh
tests/parallel_test/container_build.sh
+2
-2
tests/system-test/7-tmq/tmqUpdate-1ctb.py
tests/system-test/7-tmq/tmqUpdate-1ctb.py
+1
-1
tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py
tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py
+2
-2
未找到文件。
include/common/tdataformat.h
浏览文件 @
677a27a0
...
...
@@ -146,9 +146,9 @@ extern void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max,
int32_t
tColDataAddValueByBind
(
SColData
*
pColData
,
TAOS_MULTI_BIND
*
pBind
);
void
tColDataSortMerge
(
SArray
*
colDataArr
);
//for raw block
int32_t
tColDataAddValueByDataBlock
(
SColData
*
pColData
,
int8_t
type
,
int32_t
bytes
,
int32_t
nRows
,
char
*
lengthOrbitmap
,
char
*
data
);
//
for raw block
int32_t
tColDataAddValueByDataBlock
(
SColData
*
pColData
,
int8_t
type
,
int32_t
bytes
,
int32_t
nRows
,
char
*
lengthOrbitmap
,
char
*
data
);
// for encode/decode
int32_t
tPutColData
(
uint8_t
*
pBuf
,
SColData
*
pColData
);
int32_t
tGetColData
(
uint8_t
*
pBuf
,
SColData
*
pColData
);
...
...
@@ -261,7 +261,13 @@ struct STag {
// STSchema ================================
STSchema
*
tBuildTSchema
(
SSchema
*
aSchema
,
int32_t
numOfCols
,
int32_t
version
);
void
tDestroyTSchema
(
STSchema
*
pTSchema
);
#define tDestroyTSchema(pTSchema) \
do { \
if (pTSchema) { \
taosMemoryFree(pTSchema); \
pTSchema = NULL; \
} \
} while (0)
#endif
...
...
include/libs/wal/wal.h
浏览文件 @
677a27a0
...
...
@@ -201,6 +201,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead);
int32_t
walFetchBody
(
SWalReader
*
pRead
,
SWalCkHead
**
ppHead
);
int32_t
walSkipFetchBody
(
SWalReader
*
pRead
,
const
SWalCkHead
*
pHead
);
SWalRef
*
walRefFirstVer
(
SWal
*
,
SWalRef
*
);
SWalRef
*
walRefCommittedVer
(
SWal
*
);
SWalRef
*
walOpenRef
(
SWal
*
);
...
...
source/common/src/tdataformat.c
浏览文件 @
677a27a0
...
...
@@ -1532,10 +1532,6 @@ STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version) {
return
pTSchema
;
}
void
tDestroyTSchema
(
STSchema
*
pTSchema
)
{
if
(
pTSchema
)
taosMemoryFree
(
pTSchema
);
}
// SColData ========================================
void
tColDataDestroy
(
void
*
ph
)
{
SColData
*
pColData
=
(
SColData
*
)
ph
;
...
...
source/dnode/vnode/src/inc/tsdb.h
浏览文件 @
677a27a0
...
...
@@ -206,6 +206,7 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol
uint8_t
**
ppBuf
);
int32_t
tsdbDecmprColData
(
uint8_t
*
pIn
,
SBlockCol
*
pBlockCol
,
int8_t
cmprAlg
,
int32_t
nVal
,
SColData
*
pColData
,
uint8_t
**
ppBuf
);
int32_t
tRowInfoCmprFn
(
const
void
*
p1
,
const
void
*
p2
);
// tsdbMemTable ==============================================================================================
// SMemTable
int32_t
tsdbMemTableCreate
(
STsdb
*
pTsdb
,
SMemTable
**
ppMemTable
);
...
...
source/dnode/vnode/src/inc/vnodeInt.h
浏览文件 @
677a27a0
...
...
@@ -252,7 +252,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader);
int32_t
tsdbSnapRead
(
STsdbSnapReader
*
pReader
,
uint8_t
**
ppData
);
// STsdbSnapWriter ========================================
int32_t
tsdbSnapWriterOpen
(
STsdb
*
pTsdb
,
int64_t
sver
,
int64_t
ever
,
STsdbSnapWriter
**
ppWriter
);
int32_t
tsdbSnapWrite
(
STsdbSnapWriter
*
pWriter
,
uint8_t
*
pData
,
uint32_t
nData
);
int32_t
tsdbSnapWrite
(
STsdbSnapWriter
*
pWriter
,
SSnapDataHdr
*
pHdr
);
int32_t
tsdbSnapWriterPrepareClose
(
STsdbSnapWriter
*
pWriter
);
int32_t
tsdbSnapWriterClose
(
STsdbSnapWriter
**
ppWriter
,
int8_t
rollback
);
// STqSnapshotReader ==
...
...
source/dnode/vnode/src/meta/metaQuery.c
浏览文件 @
677a27a0
...
...
@@ -706,9 +706,8 @@ int32_t metaGetTbTSchemaEx(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid, int32_t sv
}
}
if
(
sver
<=
0
)
{
metaError
(
"meta/query: incorrect sver: %"
PRId32
"."
,
sver
);
code
=
TSDB_CODE_FAILED
;
if
(
ASSERTS
(
sver
>
0
,
__FILE__
,
__LINE__
,
"failed to get table schema version: %d"
,
sver
))
{
code
=
TSDB_CODE_NOT_FOUND
;
goto
_exit
;
}
...
...
source/dnode/vnode/src/sma/smaSnapshot.c
浏览文件 @
677a27a0
...
...
@@ -446,10 +446,10 @@ int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData)
// rsma1/rsma2
if
(
pHdr
->
type
==
SNAP_DATA_RSMA1
)
{
pHdr
->
type
=
SNAP_DATA_TSDB
;
code
=
tsdbSnapWrite
(
pWriter
->
pDataWriter
[
0
],
p
Data
,
nData
);
code
=
tsdbSnapWrite
(
pWriter
->
pDataWriter
[
0
],
p
Hdr
);
}
else
if
(
pHdr
->
type
==
SNAP_DATA_RSMA2
)
{
pHdr
->
type
=
SNAP_DATA_TSDB
;
code
=
tsdbSnapWrite
(
pWriter
->
pDataWriter
[
1
],
p
Data
,
nData
);
code
=
tsdbSnapWrite
(
pWriter
->
pDataWriter
[
1
],
p
Hdr
);
}
else
if
(
pHdr
->
type
==
SNAP_DATA_QTASK
)
{
code
=
rsmaSnapWriteQTaskInfo
(
pWriter
,
pData
,
nData
);
}
else
{
...
...
source/dnode/vnode/src/tq/tq.c
浏览文件 @
677a27a0
...
...
@@ -520,7 +520,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
tqOffsetResetToData
(
&
fetchOffsetNew
,
0
,
0
);
}
}
else
{
tqOffsetResetToLog
(
&
fetchOffsetNew
,
walGetFirstVer
(
pTq
->
pVnode
->
pWal
));
pHandle
->
pRef
=
walRefFirstVer
(
pTq
->
pVnode
->
pWal
,
pHandle
->
pRef
);
if
(
pHandle
->
pRef
==
NULL
)
{
terrno
=
TSDB_CODE_OUT_OF_MEMORY
;
return
-
1
;
}
tqOffsetResetToLog
(
&
fetchOffsetNew
,
pHandle
->
pRef
->
refVer
-
1
);
}
}
else
if
(
reqOffset
.
type
==
TMQ_OFFSET__RESET_LATEST
)
{
if
(
pHandle
->
execHandle
.
subType
==
TOPIC_SUB_TYPE__COLUMN
)
{
...
...
source/dnode/vnode/src/tsdb/tsdbSnapshot.c
浏览文件 @
677a27a0
...
...
@@ -15,274 +15,628 @@
#include "tsdb.h"
// STsdbSnapReader ========================================
typedef
enum
{
SNAP_DATA_FILE_ITER
=
0
,
SNAP_STT_FILE_ITER
}
EFIterT
;
extern
int32_t
tsdbReadDataBlockEx
(
SDataFReader
*
pReader
,
SDataBlk
*
pDataBlk
,
SBlockData
*
pBlockData
);
extern
int32_t
tsdbUpdateTableSchema
(
SMeta
*
pMeta
,
int64_t
suid
,
int64_t
uid
,
SSkmInfo
*
pSkmInfo
);
extern
int32_t
tsdbWriteDataBlock
(
SDataFWriter
*
pWriter
,
SBlockData
*
pBlockData
,
SMapData
*
mDataBlk
,
int8_t
cmprAlg
);
extern
int32_t
tsdbWriteSttBlock
(
SDataFWriter
*
pWriter
,
SBlockData
*
pBlockData
,
SArray
*
aSttBlk
,
int8_t
cmprAlg
);
// STsdbDataIter2 ========================================
#define TSDB_MEM_TABLE_DATA_ITER 0
#define TSDB_DATA_FILE_DATA_ITER 1
#define TSDB_STT_FILE_DATA_ITER 2
#define TSDB_TOMB_FILE_DATA_ITER 3
typedef
struct
STsdbDataIter2
STsdbDataIter2
;
typedef
struct
STsdbFilterInfo
STsdbFilterInfo
;
typedef
struct
{
SRBTreeNode
n
;
SRowInfo
rInfo
;
EFIterT
type
;
int64_t
suid
;
int64_t
uid
;
SDelData
delData
;
}
SDelInfo
;
struct
STsdbDataIter2
{
STsdbDataIter2
*
next
;
SRBTreeNode
rbtn
;
int32_t
type
;
SRowInfo
rowInfo
;
SDelInfo
delInfo
;
union
{
// TSDB_MEM_TABLE_DATA_ITER
struct
{
SArray
*
aBlockIdx
;
int32_t
iBlockIdx
;
SBlockIdx
*
pBlockIdx
;
SMapData
mBlock
;
int32_t
iBlock
;
};
// .data file
SMemTable
*
pMemTable
;
}
mIter
;
// TSDB_DATA_FILE_DATA_ITER
struct
{
int32_t
iStt
;
SArray
*
aSttBlk
;
int32_t
iSttBlk
;
};
// .stt file
SDataFReader
*
pReader
;
SArray
*
aBlockIdx
;
// SArray<SBlockIdx>
SMapData
mDataBlk
;
SBlockData
bData
;
int32_t
iBlockIdx
;
int32_t
iDataBlk
;
int32_t
iRow
;
}
dIter
;
// TSDB_STT_FILE_DATA_ITER
struct
{
SDataFReader
*
pReader
;
int32_t
iStt
;
SArray
*
aSttBlk
;
SBlockData
bData
;
int32_t
iSttBlk
;
int32_t
iRow
;
}
sIter
;
// TSDB_TOMB_FILE_DATA_ITER
struct
{
SDelFReader
*
pReader
;
SArray
*
aDelIdx
;
SArray
*
aDelData
;
int32_t
iDelIdx
;
int32_t
iDelData
;
}
tIter
;
};
SBlockData
bData
;
int32_t
iRow
;
}
SFDataIter
;
};
struct
STsdbSnapReader
{
STsdb
*
pTsdb
;
#define TSDB_FILTER_FLAG_BY_VERSION 0x1
struct
STsdbFilterInfo
{
int32_t
flag
;
int64_t
sver
;
int64_t
ever
;
STsdbFS
fs
;
int8_t
type
;
// for data file
int8_t
dataDone
;
int32_t
fid
;
SDataFReader
*
pDataFReader
;
SFDataIter
*
pIter
;
SRBTree
rbt
;
SFDataIter
aFDataIter
[
TSDB_MAX_STT_TRIGGER
+
1
];
SBlockData
bData
;
SSkmInfo
skmTable
;
// for del file
int8_t
delDone
;
SDelFReader
*
pDelFReader
;
SArray
*
aDelIdx
;
// SArray<SDelIdx>
int32_t
iDelIdx
;
SArray
*
aDelData
;
// SArray<SDelData>
uint8_t
*
aBuf
[
5
];
};
extern
int32_t
tRowInfoCmprFn
(
const
void
*
p1
,
const
void
*
p2
);
extern
int32_t
tsdbReadDataBlockEx
(
SDataFReader
*
pReader
,
SDataBlk
*
pDataBlk
,
SBlockData
*
pBlockData
);
extern
int32_t
tsdbUpdateTableSchema
(
SMeta
*
pMeta
,
int64_t
suid
,
int64_t
uid
,
SSkmInfo
*
pSkmInfo
);
#define TSDB_RBTN_TO_DATA_ITER(pNode) ((STsdbDataIter2*)(((char*)pNode) - offsetof(STsdbDataIter2, rbtn)))
static
int32_t
tFDataIterCmprFn
(
const
SRBTreeNode
*
pNode1
,
const
SRBTreeNode
*
pNode2
)
{
SFDataIter
*
pIter1
=
(
SFDataIter
*
)(((
uint8_t
*
)
pNode1
)
-
offsetof
(
SFDataIter
,
n
));
SFDataIter
*
pIter2
=
(
SFDataIter
*
)(((
uint8_t
*
)
pNode2
)
-
offsetof
(
SFDataIter
,
n
));
/* open */
static
int32_t
tsdbOpenDataFileDataIter
(
SDataFReader
*
pReader
,
STsdbDataIter2
**
ppIter
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
return
tRowInfoCmprFn
(
&
pIter1
->
rInfo
,
&
pIter2
->
rInfo
);
// create handle
STsdbDataIter2
*
pIter
=
(
STsdbDataIter2
*
)
taosMemoryCalloc
(
1
,
sizeof
(
*
pIter
));
if
(
pIter
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pIter
->
type
=
TSDB_DATA_FILE_DATA_ITER
;
pIter
->
dIter
.
pReader
=
pReader
;
if
((
pIter
->
dIter
.
aBlockIdx
=
taosArrayInit
(
0
,
sizeof
(
SBlockIdx
)))
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tBlockDataCreate
(
&
pIter
->
dIter
.
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pIter
->
dIter
.
iBlockIdx
=
0
;
pIter
->
dIter
.
iDataBlk
=
0
;
pIter
->
dIter
.
iRow
=
0
;
// read data
code
=
tsdbReadBlockIdx
(
pReader
,
pIter
->
dIter
.
aBlockIdx
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
taosArrayGetSize
(
pIter
->
dIter
.
aBlockIdx
)
==
0
)
goto
_clear
;
_exit:
if
(
code
)
{
if
(
pIter
)
{
_clear:
tBlockDataDestroy
(
&
pIter
->
dIter
.
bData
);
taosArrayDestroy
(
pIter
->
dIter
.
aBlockIdx
);
taosMemoryFree
(
pIter
);
pIter
=
NULL
;
}
}
*
ppIter
=
pIter
;
return
code
;
}
static
int32_t
tsdb
SnapReadOpenFile
(
STsdbSnapReader
*
pRead
er
)
{
static
int32_t
tsdb
OpenSttFileDataIter
(
SDataFReader
*
pReader
,
int32_t
iStt
,
STsdbDataIter2
**
ppIt
er
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
SDFileSet
dFileSet
=
{.
fid
=
pReader
->
fid
};
SDFileSet
*
pSet
=
taosArraySearch
(
pReader
->
fs
.
aDFileSet
,
&
dFileSet
,
tDFileSetCmprFn
,
TD_GT
);
if
(
pSet
==
NULL
)
return
code
;
// create handle
STsdbDataIter2
*
pIter
=
(
STsdbDataIter2
*
)
taosMemoryCalloc
(
1
,
sizeof
(
*
pIter
));
if
(
pIter
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pReader
->
fid
=
pSet
->
fid
;
code
=
tsdbDataFReaderOpen
(
&
pReader
->
pDataFReader
,
pReader
->
pTsdb
,
pSet
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pIter
->
type
=
TSDB_STT_FILE_DATA_ITER
;
pIter
->
sIter
.
pReader
=
pReader
;
pIter
->
sIter
.
iStt
=
iStt
;
pIter
->
sIter
.
aSttBlk
=
taosArrayInit
(
0
,
sizeof
(
SSttBlk
));
if
(
pIter
->
sIter
.
aSttBlk
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pReader
->
pIter
=
NULL
;
tRBTreeCreate
(
&
pReader
->
rbt
,
tFDataIterCmprFn
);
code
=
tBlockDataCreate
(
&
pIter
->
sIter
.
bData
)
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// .data file
SFDataIter
*
pIter
=
&
pReader
->
aFDataIter
[
0
];
pIter
->
type
=
SNAP_DATA_FILE_ITER
;
pIter
->
sIter
.
iSttBlk
=
0
;
pIter
->
sIter
.
iRow
=
0
;
code
=
tsdbReadBlockIdx
(
pReader
->
pDataFReader
,
pIter
->
aBlockIdx
);
// read data
code
=
tsdbReadSttBlk
(
pReader
,
iStt
,
pIter
->
sIter
.
aSttBlk
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
for
(
pIter
->
iBlockIdx
=
0
;
pIter
->
iBlockIdx
<
taosArrayGetSize
(
pIter
->
aBlockIdx
);
pIter
->
iBlockIdx
++
)
{
pIter
->
pBlockIdx
=
(
SBlockIdx
*
)
taosArrayGet
(
pIter
->
aBlockIdx
,
pIter
->
iBlockIdx
);
if
(
taosArrayGetSize
(
pIter
->
sIter
.
aSttBlk
)
==
0
)
goto
_clear
;
code
=
tsdbReadDataBlk
(
pReader
->
pDataFReader
,
pIter
->
pBlockIdx
,
&
pIter
->
mBlock
);
_exit:
if
(
code
)
{
if
(
pIter
)
{
_clear:
taosArrayDestroy
(
pIter
->
sIter
.
aSttBlk
);
tBlockDataDestroy
(
&
pIter
->
sIter
.
bData
);
taosMemoryFree
(
pIter
);
pIter
=
NULL
;
}
}
*
ppIter
=
pIter
;
return
code
;
}
static
int32_t
tsdbOpenTombFileDataIter
(
SDelFReader
*
pReader
,
STsdbDataIter2
**
ppIter
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
STsdbDataIter2
*
pIter
=
(
STsdbDataIter2
*
)
taosMemoryCalloc
(
1
,
sizeof
(
*
pIter
));
if
(
pIter
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pIter
->
type
=
TSDB_TOMB_FILE_DATA_ITER
;
for
(
pIter
->
iBlock
=
0
;
pIter
->
iBlock
<
pIter
->
mBlock
.
nItem
;
pIter
->
iBlock
++
)
{
SDataBlk
dataBlk
;
tMapDataGetItemByIdx
(
&
pIter
->
mBlock
,
pIter
->
iBlock
,
&
dataBlk
,
tGetDataBlk
);
pIter
->
tIter
.
pReader
=
pReader
;
if
((
pIter
->
tIter
.
aDelIdx
=
taosArrayInit
(
0
,
sizeof
(
SDelIdx
)))
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
if
((
pIter
->
tIter
.
aDelData
=
taosArrayInit
(
0
,
sizeof
(
SDelData
)))
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
if
(
dataBlk
.
minVer
>
pReader
->
ever
||
dataBlk
.
maxVer
<
pReader
->
sver
)
continue
;
code
=
tsdbReadDelIdx
(
pReader
,
pIter
->
tIter
.
aDelIdx
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbReadDataBlockEx
(
pReader
->
pDataFReader
,
&
dataBlk
,
&
pIter
->
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
taosArrayGetSize
(
pIter
->
tIter
.
aDelIdx
)
==
0
)
goto
_clear
;
ASSERT
(
pIter
->
pBlockIdx
->
suid
==
pIter
->
bData
.
suid
)
;
ASSERT
(
pIter
->
pBlockIdx
->
uid
==
pIter
->
bData
.
uid
)
;
pIter
->
tIter
.
iDelIdx
=
0
;
pIter
->
tIter
.
iDelData
=
0
;
for
(
pIter
->
iRow
=
0
;
pIter
->
iRow
<
pIter
->
bData
.
nRow
;
pIter
->
iRow
++
)
{
int64_t
rowVer
=
pIter
->
bData
.
aVersion
[
pIter
->
iRow
];
_exit:
if
(
code
)
{
if
(
pIter
)
{
_clear:
taosArrayDestroy
(
pIter
->
tIter
.
aDelIdx
);
taosArrayDestroy
(
pIter
->
tIter
.
aDelData
);
taosMemoryFree
(
pIter
);
pIter
=
NULL
;
}
}
*
ppIter
=
pIter
;
return
code
;
}
if
(
rowVer
>=
pReader
->
sver
&&
rowVer
<=
pReader
->
ever
)
{
pIter
->
rInfo
.
suid
=
pIter
->
pBlockIdx
->
suid
;
pIter
->
rInfo
.
uid
=
pIter
->
pBlockIdx
->
uid
;
pIter
->
rInfo
.
row
=
tsdbRowFromBlockData
(
&
pIter
->
bData
,
pIter
->
iRow
);
goto
_add_iter_and_break
;
/* close */
static
void
tsdbCloseDataFileDataIter
(
STsdbDataIter2
*
pIter
)
{
tBlockDataDestroy
(
&
pIter
->
dIter
.
bData
);
tMapDataClear
(
&
pIter
->
dIter
.
mDataBlk
);
taosArrayDestroy
(
pIter
->
dIter
.
aBlockIdx
);
taosMemoryFree
(
pIter
);
}
static
void
tsdbCloseSttFileDataIter
(
STsdbDataIter2
*
pIter
)
{
tBlockDataDestroy
(
&
pIter
->
sIter
.
bData
);
taosArrayDestroy
(
pIter
->
sIter
.
aSttBlk
);
taosMemoryFree
(
pIter
);
}
static
void
tsdbCloseTombFileDataIter
(
STsdbDataIter2
*
pIter
)
{
taosArrayDestroy
(
pIter
->
tIter
.
aDelData
);
taosArrayDestroy
(
pIter
->
tIter
.
aDelIdx
);
taosMemoryFree
(
pIter
);
}
static
void
tsdbCloseDataIter2
(
STsdbDataIter2
*
pIter
)
{
if
(
pIter
->
type
==
TSDB_MEM_TABLE_DATA_ITER
)
{
ASSERT
(
0
);
}
else
if
(
pIter
->
type
==
TSDB_DATA_FILE_DATA_ITER
)
{
tsdbCloseDataFileDataIter
(
pIter
);
}
else
if
(
pIter
->
type
==
TSDB_STT_FILE_DATA_ITER
)
{
tsdbCloseSttFileDataIter
(
pIter
);
}
else
if
(
pIter
->
type
==
TSDB_TOMB_FILE_DATA_ITER
)
{
tsdbCloseTombFileDataIter
(
pIter
);
}
else
{
ASSERT
(
0
);
}
}
/* cmpr */
static
int32_t
tsdbDataIterCmprFn
(
const
SRBTreeNode
*
pNode1
,
const
SRBTreeNode
*
pNode2
)
{
STsdbDataIter2
*
pIter1
=
TSDB_RBTN_TO_DATA_ITER
(
pNode1
);
STsdbDataIter2
*
pIter2
=
TSDB_RBTN_TO_DATA_ITER
(
pNode2
);
return
tRowInfoCmprFn
(
&
pIter1
->
rowInfo
,
&
pIter2
->
rowInfo
);
}
/* seek */
/* iter next */
static
int32_t
tsdbDataFileDataIterNext
(
STsdbDataIter2
*
pIter
,
STsdbFilterInfo
*
pFilterInfo
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
for
(;;)
{
while
(
pIter
->
dIter
.
iRow
<
pIter
->
dIter
.
bData
.
nRow
)
{
if
(
pFilterInfo
)
{
if
(
pFilterInfo
->
flag
&
TSDB_FILTER_FLAG_BY_VERSION
)
{
if
(
pIter
->
dIter
.
bData
.
aVersion
[
pIter
->
dIter
.
iRow
]
<
pFilterInfo
->
sver
||
pIter
->
dIter
.
bData
.
aVersion
[
pIter
->
dIter
.
iRow
]
>
pFilterInfo
->
ever
)
{
pIter
->
dIter
.
iRow
++
;
continue
;
}
}
}
pIter
->
rowInfo
.
suid
=
pIter
->
dIter
.
bData
.
suid
;
pIter
->
rowInfo
.
uid
=
pIter
->
dIter
.
bData
.
uid
;
pIter
->
rowInfo
.
row
=
tsdbRowFromBlockData
(
&
pIter
->
dIter
.
bData
,
pIter
->
dIter
.
iRow
);
pIter
->
dIter
.
iRow
++
;
goto
_exit
;
}
continue
;
for
(;;)
{
while
(
pIter
->
dIter
.
iDataBlk
<
pIter
->
dIter
.
mDataBlk
.
nItem
)
{
SDataBlk
dataBlk
;
tMapDataGetItemByIdx
(
&
pIter
->
dIter
.
mDataBlk
,
pIter
->
dIter
.
iDataBlk
,
&
dataBlk
,
tGetDataBlk
);
// filter
if
(
pFilterInfo
)
{
if
(
pFilterInfo
->
flag
&
TSDB_FILTER_FLAG_BY_VERSION
)
{
if
(
pFilterInfo
->
sver
>
dataBlk
.
maxVer
||
pFilterInfo
->
ever
<
dataBlk
.
minVer
)
{
pIter
->
dIter
.
iDataBlk
++
;
continue
;
}
}
}
_add_iter_and_break:
tRBTreePut
(
&
pReader
->
rbt
,
(
SRBTreeNode
*
)
pIter
);
break
;
}
code
=
tsdbReadDataBlockEx
(
pIter
->
dIter
.
pReader
,
&
dataBlk
,
&
pIter
->
dIter
.
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// .stt file
pIter
=
&
pReader
->
aFDataIter
[
1
];
for
(
int32_t
iStt
=
0
;
iStt
<
pSet
->
nSttF
;
iStt
++
)
{
pIter
->
type
=
SNAP_STT_FILE_ITER
;
pIter
->
iStt
=
iStt
;
pIter
->
dIter
.
iDataBlk
++
;
pIter
->
dIter
.
iRow
=
0
;
code
=
tsdbReadSttBlk
(
pReader
->
pDataFReader
,
iStt
,
pIter
->
aSttBlk
)
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
break
;
}
for
(
pIter
->
iSttBlk
=
0
;
pIter
->
iSttBlk
<
taosArrayGetSize
(
pIter
->
aSttBlk
);
pIter
->
iSttBlk
++
)
{
SSttBlk
*
pSttBlk
=
(
SSttBlk
*
)
taosArrayGet
(
pIter
->
aSttBlk
,
pIter
->
iSttBlk
);
if
(
pIter
->
dIter
.
iRow
<
pIter
->
dIter
.
bData
.
nRow
)
break
;
if
(
pSttBlk
->
minVer
>
pReader
->
ever
)
continue
;
if
(
pSttBlk
->
maxVer
<
pReader
->
sver
)
continue
;
for
(;;)
{
if
(
pIter
->
dIter
.
iBlockIdx
<
taosArrayGetSize
(
pIter
->
dIter
.
aBlockIdx
))
{
SBlockIdx
*
pBlockIdx
=
taosArrayGet
(
pIter
->
dIter
.
aBlockIdx
,
pIter
->
dIter
.
iBlockIdx
);
code
=
tsdbReadSttBlockEx
(
pReader
->
pDataFReader
,
iStt
,
pSttBlk
,
&
pIter
->
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbReadDataBlk
(
pIter
->
dIter
.
pReader
,
pBlockIdx
,
&
pIter
->
dIter
.
mDataBlk
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
for
(
pIter
->
iRow
=
0
;
pIter
->
iRow
<
pIter
->
bData
.
nRow
;
pIter
->
iRow
++
)
{
int64_t
rowVer
=
pIter
->
bData
.
aVersion
[
pIter
->
iRow
]
;
pIter
->
dIter
.
iBlockIdx
++
;
pIter
->
dIter
.
iDataBlk
=
0
;
if
(
rowVer
>=
pReader
->
sver
&&
rowVer
<=
pReader
->
ever
)
{
pIter
->
rInfo
.
suid
=
pIter
->
bData
.
suid
;
pIter
->
rInfo
.
uid
=
pIter
->
bData
.
uid
?
pIter
->
bData
.
uid
:
pIter
->
bData
.
aUid
[
pIter
->
iRow
];
pIter
->
rInfo
.
row
=
tsdbRowFromBlockData
(
&
pIter
->
bData
,
pIter
->
iRow
);
goto
_add_iter
;
break
;
}
else
{
pIter
->
rowInfo
=
(
SRowInfo
){
0
};
goto
_exit
;
}
}
}
continue
;
_add_iter:
tRBTreePut
(
&
pReader
->
rbt
,
(
SRBTreeNode
*
)
pIter
);
pIter
++
;
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d, %s failed since %s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
));
}
else
{
tsdbInfo
(
"vgId:%d, %s done, path:%s, fid:%d"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
pReader
->
pTsdb
->
path
,
pReader
->
fid
);
tsdbError
(
"%s failed at line %d since %s"
,
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbS
napNextRow
(
STsdbSnapReader
*
pReader
)
{
static
int32_t
tsdbS
ttFileDataIterNext
(
STsdbDataIter2
*
pIter
,
STsdbFilterInfo
*
pFilterInfo
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
if
(
pReader
->
pIter
)
{
SFDataIter
*
pIter
=
NULL
;
while
(
true
)
{
_find_row:
pIter
=
pReader
->
pIter
;
for
(
pIter
->
iRow
++
;
pIter
->
iRow
<
pIter
->
bData
.
nRow
;
pIter
->
iRow
++
)
{
int64_t
rowVer
=
pIter
->
bData
.
aVersion
[
pIter
->
iRow
];
if
(
rowVer
>=
pReader
->
sver
&&
rowVer
<=
pReader
->
ever
)
{
pIter
->
rInfo
.
suid
=
pIter
->
bData
.
suid
;
pIter
->
rInfo
.
uid
=
pIter
->
bData
.
uid
?
pIter
->
bData
.
uid
:
pIter
->
bData
.
aUid
[
pIter
->
iRow
];
pIter
->
rInfo
.
row
=
tsdbRowFromBlockData
(
&
pIter
->
bData
,
pIter
->
iRow
);
goto
_out
;
for
(;;)
{
while
(
pIter
->
sIter
.
iRow
<
pIter
->
sIter
.
bData
.
nRow
)
{
if
(
pFilterInfo
)
{
if
(
pFilterInfo
->
flag
&
TSDB_FILTER_FLAG_BY_VERSION
)
{
if
(
pFilterInfo
->
sver
>
pIter
->
sIter
.
bData
.
aVersion
[
pIter
->
sIter
.
iRow
]
||
pFilterInfo
->
ever
<
pIter
->
sIter
.
bData
.
aVersion
[
pIter
->
sIter
.
iRow
])
{
pIter
->
sIter
.
iRow
++
;
continue
;
}
}
}
if
(
pIter
->
type
==
SNAP_DATA_FILE_ITER
)
{
while
(
true
)
{
for
(
pIter
->
iBlock
++
;
pIter
->
iBlock
<
pIter
->
mBlock
.
nItem
;
pIter
->
iBlock
++
)
{
SDataBlk
dataBlk
;
tMapDataGetItemByIdx
(
&
pIter
->
mBlock
,
pIter
->
iBlock
,
&
dataBlk
,
tGetDataBlk
);
if
(
dataBlk
.
minVer
>
pReader
->
ever
||
dataBlk
.
maxVer
<
pReader
->
sver
)
continue
;
pIter
->
rowInfo
.
suid
=
pIter
->
sIter
.
bData
.
suid
;
pIter
->
rowInfo
.
uid
=
pIter
->
sIter
.
bData
.
uid
?
pIter
->
sIter
.
bData
.
uid
:
pIter
->
sIter
.
bData
.
aUid
[
pIter
->
sIter
.
iRow
];
pIter
->
rowInfo
.
row
=
tsdbRowFromBlockData
(
&
pIter
->
sIter
.
bData
,
pIter
->
sIter
.
iRow
);
pIter
->
sIter
.
iRow
++
;
goto
_exit
;
}
code
=
tsdbReadDataBlockEx
(
pReader
->
pDataFReader
,
&
dataBlk
,
&
pIter
->
bData
);
if
(
code
)
goto
_err
;
for
(;;)
{
if
(
pIter
->
sIter
.
iSttBlk
<
taosArrayGetSize
(
pIter
->
sIter
.
aSttBlk
))
{
SSttBlk
*
pSttBlk
=
taosArrayGet
(
pIter
->
sIter
.
aSttBlk
,
pIter
->
sIter
.
iSttBlk
);
pIter
->
iRow
=
-
1
;
goto
_find_row
;
if
(
pFilterInfo
)
{
if
(
pFilterInfo
->
flag
&
TSDB_FILTER_FLAG_BY_VERSION
)
{
if
(
pFilterInfo
->
sver
>
pSttBlk
->
maxVer
||
pFilterInfo
->
ever
<
pSttBlk
->
minVer
)
{
pIter
->
sIter
.
iSttBlk
++
;
continue
;
}
}
pIter
->
iBlockIdx
++
;
if
(
pIter
->
iBlockIdx
>=
taosArrayGetSize
(
pIter
->
aBlockIdx
))
break
;
pIter
->
pBlockIdx
=
(
SBlockIdx
*
)
taosArrayGet
(
pIter
->
aBlockIdx
,
pIter
->
iBlockIdx
);
code
=
tsdbReadDataBlk
(
pReader
->
pDataFReader
,
pIter
->
pBlockIdx
,
&
pIter
->
mBlock
);
if
(
code
)
goto
_err
;
pIter
->
iBlock
=
-
1
;
}
pReader
->
pIter
=
NULL
;
code
=
tsdbReadSttBlockEx
(
pIter
->
sIter
.
pReader
,
pIter
->
sIter
.
iStt
,
pSttBlk
,
&
pIter
->
sIter
.
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pIter
->
sIter
.
iRow
=
0
;
pIter
->
sIter
.
iSttBlk
++
;
break
;
}
else
if
(
pIter
->
type
==
SNAP_STT_FILE_ITER
)
{
for
(
pIter
->
iSttBlk
++
;
pIter
->
iSttBlk
<
taosArrayGetSize
(
pIter
->
aSttBlk
);
pIter
->
iSttBlk
++
)
{
SSttBlk
*
pSttBlk
=
(
SSttBlk
*
)
taosArrayGet
(
pIter
->
aSttBlk
,
pIter
->
iSttBlk
);
}
else
{
pIter
->
rowInfo
=
(
SRowInfo
){
0
};
goto
_exit
;
}
}
}
_exit:
if
(
code
)
{
tsdbError
(
"%s failed at line %d since %s"
,
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
if
(
pSttBlk
->
minVer
>
pReader
->
ever
||
pSttBlk
->
maxVer
<
pReader
->
sver
)
continue
;
static
int32_t
tsdbTombFileDataIterNext
(
STsdbDataIter2
*
pIter
,
STsdbFilterInfo
*
pFilterInfo
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
code
=
tsdbReadSttBlockEx
(
pReader
->
pDataFReader
,
pIter
->
iStt
,
pSttBlk
,
&
pIter
->
bData
);
if
(
code
)
goto
_err
;
for
(;;)
{
while
(
pIter
->
tIter
.
iDelData
<
taosArrayGetSize
(
pIter
->
tIter
.
aDelData
))
{
SDelData
*
pDelData
=
taosArrayGet
(
pIter
->
tIter
.
aDelData
,
pIter
->
tIter
.
iDelData
);
pIter
->
iRow
=
-
1
;
goto
_find_row
;
if
(
pFilterInfo
)
{
if
(
pFilterInfo
->
flag
&
TSDB_FILTER_FLAG_BY_VERSION
)
{
if
(
pFilterInfo
->
sver
>
pDelData
->
version
||
pFilterInfo
->
ever
<
pDelData
->
version
)
{
pIter
->
tIter
.
iDelData
++
;
continue
;
}
}
}
pReader
->
pIter
=
NULL
;
pIter
->
delInfo
.
delData
=
*
pDelData
;
pIter
->
tIter
.
iDelData
++
;
goto
_exit
;
}
for
(;;)
{
if
(
pIter
->
tIter
.
iDelIdx
<
taosArrayGetSize
(
pIter
->
tIter
.
aDelIdx
))
{
SDelIdx
*
pDelIdx
=
taosArrayGet
(
pIter
->
tIter
.
aDelIdx
,
pIter
->
tIter
.
iDelIdx
);
code
=
tsdbReadDelData
(
pIter
->
tIter
.
pReader
,
pDelIdx
,
pIter
->
tIter
.
aDelData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pIter
->
delInfo
.
suid
=
pDelIdx
->
suid
;
pIter
->
delInfo
.
uid
=
pDelIdx
->
uid
;
pIter
->
tIter
.
iDelData
=
0
;
pIter
->
tIter
.
iDelIdx
++
;
break
;
}
else
{
ASSERT
(
0
);
pIter
->
delInfo
=
(
SDelInfo
){
0
};
goto
_exit
;
}
}
}
_out:
pIter
=
(
SFDataIter
*
)
tRBTreeMin
(
&
pReader
->
rbt
);
if
(
pReader
->
pIter
&&
pIter
)
{
int32_t
c
=
tRowInfoCmprFn
(
&
pReader
->
pIter
->
rInfo
,
&
pIter
->
rInfo
);
if
(
c
>
0
)
{
tRBTreePut
(
&
pReader
->
rbt
,
(
SRBTreeNode
*
)
pReader
->
pIter
);
pReader
->
pIter
=
NULL
;
}
else
{
ASSERT
(
c
);
}
_exit:
if
(
code
)
{
tsdbError
(
"%s failed at line %d since %s"
,
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbDataIterNext2
(
STsdbDataIter2
*
pIter
,
STsdbFilterInfo
*
pFilterInfo
)
{
int32_t
code
=
0
;
if
(
pIter
->
type
==
TSDB_MEM_TABLE_DATA_ITER
)
{
ASSERT
(
0
);
return
code
;
}
else
if
(
pIter
->
type
==
TSDB_DATA_FILE_DATA_ITER
)
{
return
tsdbDataFileDataIterNext
(
pIter
,
pFilterInfo
);
}
else
if
(
pIter
->
type
==
TSDB_STT_FILE_DATA_ITER
)
{
return
tsdbSttFileDataIterNext
(
pIter
,
pFilterInfo
);
}
else
if
(
pIter
->
type
==
TSDB_TOMB_FILE_DATA_ITER
)
{
return
tsdbTombFileDataIterNext
(
pIter
,
pFilterInfo
);
}
else
{
ASSERT
(
0
);
return
code
;
}
}
/* get */
// STsdbSnapReader ========================================
struct
STsdbSnapReader
{
STsdb
*
pTsdb
;
int64_t
sver
;
int64_t
ever
;
int8_t
type
;
uint8_t
*
aBuf
[
5
];
STsdbFS
fs
;
TABLEID
tbid
;
SSkmInfo
skmTable
;
// timeseries data
int8_t
dataDone
;
int32_t
fid
;
SDataFReader
*
pDataFReader
;
STsdbDataIter2
*
iterList
;
STsdbDataIter2
*
pIter
;
SRBTree
rbt
;
SBlockData
bData
;
// tombstone data
int8_t
delDone
;
SDelFReader
*
pDelFReader
;
STsdbDataIter2
*
pTIter
;
SArray
*
aDelData
;
};
static
int32_t
tsdbSnapReadFileDataStart
(
STsdbSnapReader
*
pReader
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
SDFileSet
*
pSet
=
taosArraySearch
(
pReader
->
fs
.
aDFileSet
,
&
(
SDFileSet
){.
fid
=
pReader
->
fid
},
tDFileSetCmprFn
,
TD_GT
);
if
(
pSet
==
NULL
)
{
pReader
->
fid
=
INT32_MAX
;
goto
_exit
;
}
pReader
->
fid
=
pSet
->
fid
;
tRBTreeCreate
(
&
pReader
->
rbt
,
tsdbDataIterCmprFn
);
code
=
tsdbDataFReaderOpen
(
&
pReader
->
pDataFReader
,
pReader
->
pTsdb
,
pSet
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbOpenDataFileDataIter
(
pReader
->
pDataFReader
,
&
pReader
->
pIter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pReader
->
pIter
)
{
// iter to next with filter info (sver, ever)
code
=
tsdbDataIterNext2
(
pReader
->
pIter
,
&
(
STsdbFilterInfo
){.
flag
=
TSDB_FILTER_FLAG_BY_VERSION
,
// flag
.
sver
=
pReader
->
sver
,
.
ever
=
pReader
->
ever
});
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pReader
->
pIter
->
rowInfo
.
suid
||
pReader
->
pIter
->
rowInfo
.
uid
)
{
// add to rbtree
tRBTreePut
(
&
pReader
->
rbt
,
&
pReader
->
pIter
->
rbtn
);
// add to iterList
pReader
->
pIter
->
next
=
pReader
->
iterList
;
pReader
->
iterList
=
pReader
->
pIter
;
}
else
{
tsdbCloseDataIter2
(
pReader
->
pIter
);
}
}
if
(
pReader
->
pIter
==
NULL
)
{
pReader
->
pIter
=
(
SFDataIter
*
)
tRBTreeMin
(
&
pReader
->
rbt
);
for
(
int32_t
iStt
=
0
;
iStt
<
pSet
->
nSttF
;
++
iStt
)
{
code
=
tsdbOpenSttFileDataIter
(
pReader
->
pDataFReader
,
iStt
,
&
pReader
->
pIter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pReader
->
pIter
)
{
tRBTreeDrop
(
&
pReader
->
rbt
,
(
SRBTreeNode
*
)
pReader
->
pIter
);
// iter to valid row
code
=
tsdbDataIterNext2
(
pReader
->
pIter
,
&
(
STsdbFilterInfo
){.
flag
=
TSDB_FILTER_FLAG_BY_VERSION
,
// flag
.
sver
=
pReader
->
sver
,
.
ever
=
pReader
->
ever
});
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pReader
->
pIter
->
rowInfo
.
suid
||
pReader
->
pIter
->
rowInfo
.
uid
)
{
// add to rbtree
tRBTreePut
(
&
pReader
->
rbt
,
&
pReader
->
pIter
->
rbtn
);
// add to iterList
pReader
->
pIter
->
next
=
pReader
->
iterList
;
pReader
->
iterList
=
pReader
->
pIter
;
}
else
{
tsdbCloseDataIter2
(
pReader
->
pIter
);
}
}
}
return
code
;
pReader
->
pIter
=
NULL
;
_err:
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbInfo
(
"vgId:%d %s done, fid:%d"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
pReader
->
fid
);
}
return
code
;
}
static
SRowInfo
*
tsdbSnapGetRow
(
STsdbSnapReader
*
pReader
)
{
static
void
tsdbSnapReadFileDataEnd
(
STsdbSnapReader
*
pReader
)
{
while
(
pReader
->
iterList
)
{
STsdbDataIter2
*
pIter
=
pReader
->
iterList
;
pReader
->
iterList
=
pIter
->
next
;
tsdbCloseDataIter2
(
pIter
);
}
tsdbDataFReaderClose
(
&
pReader
->
pDataFReader
);
}
static
int32_t
tsdbSnapReadNextRow
(
STsdbSnapReader
*
pReader
,
SRowInfo
**
ppRowInfo
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
if
(
pReader
->
pIter
)
{
return
&
pReader
->
pIter
->
rInfo
;
}
else
{
tsdbSnapNextRow
(
pReader
);
code
=
tsdbDataIterNext2
(
pReader
->
pIter
,
&
(
STsdbFilterInfo
){.
flag
=
TSDB_FILTER_FLAG_BY_VERSION
,
// flag
.
sver
=
pReader
->
sver
,
.
ever
=
pReader
->
ever
});
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pReader
->
pIter
->
rowInfo
.
suid
==
0
&&
pReader
->
pIter
->
rowInfo
.
uid
==
0
)
{
pReader
->
pIter
=
NULL
;
}
else
{
SRBTreeNode
*
pNode
=
tRBTreeMin
(
&
pReader
->
rbt
);
if
(
pNode
)
{
int32_t
c
=
tsdbDataIterCmprFn
(
&
pReader
->
pIter
->
rbtn
,
pNode
);
if
(
c
>
0
)
{
tRBTreePut
(
&
pReader
->
rbt
,
&
pReader
->
pIter
->
rbtn
);
pReader
->
pIter
=
NULL
;
}
else
if
(
c
==
0
)
{
ASSERT
(
0
);
}
}
}
}
if
(
pReader
->
pIter
==
NULL
)
{
SRBTreeNode
*
pNode
=
tRBTreeMin
(
&
pReader
->
rbt
);
if
(
pNode
)
{
tRBTreeDrop
(
&
pReader
->
rbt
,
pNode
);
pReader
->
pIter
=
TSDB_RBTN_TO_DATA_ITER
(
pNode
);
}
}
if
(
ppRowInfo
)
{
if
(
pReader
->
pIter
)
{
return
&
pReader
->
pIter
->
r
Info
;
*
ppRowInfo
=
&
pReader
->
pIter
->
row
Info
;
}
else
{
return
NULL
;
*
ppRowInfo
=
NULL
;
}
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbSnapReadGetRow
(
STsdbSnapReader
*
pReader
,
SRowInfo
**
ppRowInfo
)
{
if
(
pReader
->
pIter
)
{
*
ppRowInfo
=
&
pReader
->
pIter
->
rowInfo
;
return
0
;
}
return
tsdbSnapReadNextRow
(
pReader
,
ppRowInfo
);
}
static
int32_t
tsdbSnapCmprData
(
STsdbSnapReader
*
pReader
,
uint8_t
**
ppData
)
{
...
...
@@ -318,155 +672,215 @@ _exit:
return
code
;
}
static
int32_t
tsdbSnapReadData
(
STsdbSnapReader
*
pReader
,
uint8_t
**
ppData
)
{
static
int32_t
tsdbSnapRead
TimeSeries
Data
(
STsdbSnapReader
*
pReader
,
uint8_t
**
ppData
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
STsdb
*
pTsdb
=
pReader
->
pTsdb
;
while
(
true
)
{
tBlockDataReset
(
&
pReader
->
bData
);
for
(;;)
{
// start a new file read if need
if
(
pReader
->
pDataFReader
==
NULL
)
{
code
=
tsdbSnapRead
OpenFile
(
pReader
);
code
=
tsdbSnapRead
FileDataStart
(
pReader
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
if
(
pReader
->
pDataFReader
==
NULL
)
break
;
SRowInfo
*
pRowInfo
=
tsdbSnapGetRow
(
pReader
);
SRowInfo
*
pRowInfo
;
code
=
tsdbSnapReadGetRow
(
pReader
,
&
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pRowInfo
==
NULL
)
{
tsdb
DataFReaderClose
(
&
pReader
->
pDataF
Reader
);
tsdb
SnapReadFileDataEnd
(
p
Reader
);
continue
;
}
TABLEID
id
=
{.
suid
=
pRowInfo
->
suid
,
.
uid
=
pRowInfo
->
uid
};
SBlockData
*
pBlockData
=
&
pReader
->
bData
;
code
=
tsdbUpdateTableSchema
(
pTsdb
->
pVnode
->
pMeta
,
id
.
suid
,
id
.
uid
,
&
pReader
->
skmTable
);
code
=
tsdbUpdateTableSchema
(
pTsdb
->
pVnode
->
pMeta
,
pRowInfo
->
suid
,
pRowInfo
->
uid
,
&
pReader
->
skmTable
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tBlockDataInit
(
pBlockData
,
&
id
,
pReader
->
skmTable
.
pTSchema
,
NULL
,
0
);
code
=
tBlockDataInit
(
&
pReader
->
bData
,
(
TABLEID
*
)
pRowInfo
,
pReader
->
skmTable
.
pTSchema
,
NULL
,
0
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
while
(
pRowInfo
->
suid
==
id
.
suid
&&
pRowInfo
->
uid
==
id
.
uid
)
{
code
=
tBlockDataAppendRow
(
pBlockData
,
&
pRowInfo
->
row
,
NULL
,
pRowInfo
->
uid
);
do
{
if
(
!
TABLE_SAME_SCHEMA
(
pReader
->
bData
.
suid
,
pReader
->
bData
.
uid
,
pRowInfo
->
suid
,
pRowInfo
->
uid
))
break
;
if
(
pReader
->
bData
.
uid
&&
pReader
->
bData
.
uid
!=
pRowInfo
->
uid
)
{
code
=
tRealloc
((
uint8_t
**
)
&
pReader
->
bData
.
aUid
,
sizeof
(
int64_t
)
*
(
pReader
->
bData
.
nRow
+
1
));
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
for
(
int32_t
iRow
=
0
;
iRow
<
pReader
->
bData
.
nRow
;
++
iRow
)
{
pReader
->
bData
.
aUid
[
iRow
]
=
pReader
->
bData
.
uid
;
}
pReader
->
bData
.
uid
=
0
;
}
code
=
tBlockDataAppendRow
(
&
pReader
->
bData
,
&
pRowInfo
->
row
,
NULL
,
pRowInfo
->
uid
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbSnap
NextRow
(
pReader
);
code
=
tsdbSnap
ReadNextRow
(
pReader
,
&
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pRowInfo
=
tsdbSnapGetRow
(
pReader
);
if
(
pRowInfo
==
NULL
)
{
tsdbDataFReaderClose
(
&
pReader
->
pDataFReader
);
break
;
}
if
(
pReader
->
bData
.
nRow
>=
4096
)
break
;
}
while
(
pRowInfo
);
ASSERT
(
pReader
->
bData
.
nRow
>
0
);
break
;
}
if
(
pReader
->
bData
.
nRow
>
0
)
{
ASSERT
(
pReader
->
bData
.
suid
||
pReader
->
bData
.
uid
);
code
=
tsdbSnapCmprData
(
pReader
,
ppData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbSnapCmprTombData
(
STsdbSnapReader
*
pReader
,
uint8_t
**
ppData
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
int64_t
size
=
sizeof
(
TABLEID
);
for
(
int32_t
iDelData
=
0
;
iDelData
<
taosArrayGetSize
(
pReader
->
aDelData
);
++
iDelData
)
{
size
+=
tPutDelData
(
NULL
,
taosArrayGet
(
pReader
->
aDelData
,
iDelData
));
}
uint8_t
*
pData
=
(
uint8_t
*
)
taosMemoryMalloc
(
sizeof
(
SSnapDataHdr
)
+
size
);
if
(
pData
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
SSnapDataHdr
*
pHdr
=
(
SSnapDataHdr
*
)
pData
;
pHdr
->
type
=
SNAP_DATA_DEL
;
pHdr
->
size
=
size
;
TABLEID
*
pId
=
(
TABLEID
*
)(
pData
+
sizeof
(
SSnapDataHdr
));
*
pId
=
pReader
->
tbid
;
size
=
sizeof
(
SSnapDataHdr
)
+
sizeof
(
TABLEID
);
for
(
int32_t
iDelData
=
0
;
iDelData
<
taosArrayGetSize
(
pReader
->
aDelData
);
++
iDelData
)
{
size
+=
tPutDelData
(
pData
+
size
,
taosArrayGet
(
pReader
->
aDelData
,
iDelData
));
}
if
(
pBlockData
->
nRow
>=
4096
)
break
;
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
if
(
pData
)
{
taosMemoryFree
(
pData
);
pData
=
NULL
;
}
}
*
ppData
=
pData
;
return
code
;
}
code
=
tsdbSnapCmprData
(
pReader
,
ppData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
static
void
tsdbSnapReadGetTombData
(
STsdbSnapReader
*
pReader
,
SDelInfo
**
ppDelInfo
)
{
if
(
pReader
->
pTIter
==
NULL
||
(
pReader
->
pTIter
->
delInfo
.
suid
==
0
&&
pReader
->
pTIter
->
delInfo
.
uid
==
0
))
{
*
ppDelInfo
=
NULL
;
}
else
{
*
ppDelInfo
=
&
pReader
->
pTIter
->
delInfo
;
}
}
break
;
static
int32_t
tsdbSnapReadNextTombData
(
STsdbSnapReader
*
pReader
,
SDelInfo
**
ppDelInfo
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
code
=
tsdbDataIterNext2
(
pReader
->
pTIter
,
&
(
STsdbFilterInfo
){.
flag
=
TSDB_FILTER_FLAG_BY_VERSION
,
.
sver
=
pReader
->
sver
,
.
ever
=
pReader
->
ever
});
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
ppDelInfo
)
{
tsdbSnapReadGetTombData
(
pReader
,
ppDelInfo
);
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d
, %s failed since %s, path:%s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
),
pTsdb
->
path
);
tsdbError
(
"vgId:%d
%s failed at line %d since %s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
)
);
}
return
code
;
}
static
int32_t
tsdbSnapRead
Del
(
STsdbSnapReader
*
pReader
,
uint8_t
**
ppData
)
{
static
int32_t
tsdbSnapRead
TombData
(
STsdbSnapReader
*
pReader
,
uint8_t
**
ppData
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
STsdb
*
pTsdb
=
pReader
->
pTsdb
;
SDelFile
*
pDelFile
=
pReader
->
fs
.
pDelFile
;
STsdb
*
pTsdb
=
pReader
->
pTsdb
;
// open tombstone data iter if need
if
(
pReader
->
pDelFReader
==
NULL
)
{
if
(
pDelFile
==
NULL
)
{
goto
_exit
;
}
if
(
pReader
->
fs
.
pDelFile
==
NULL
)
goto
_exit
;
// open
code
=
tsdbDelFReaderOpen
(
&
pReader
->
pDelFReader
,
pDelFile
,
pTsdb
);
code
=
tsdbDelFReaderOpen
(
&
pReader
->
pDelFReader
,
p
Reader
->
fs
.
p
DelFile
,
pTsdb
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// read index
code
=
tsdbReadDelIdx
(
pReader
->
pDelFReader
,
pReader
->
aDelIdx
);
code
=
tsdbOpenTombFileDataIter
(
pReader
->
pDelFReader
,
&
pReader
->
pTIter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pReader
->
iDelIdx
=
0
;
if
(
pReader
->
pTIter
)
{
code
=
tsdbSnapReadNextTombData
(
pReader
,
NULL
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
}
while
(
true
)
{
if
(
pReader
->
iDelIdx
>=
taosArrayGetSize
(
pReader
->
aDelIdx
))
{
tsdbDelFReaderClose
(
&
pReader
->
pDelFReader
);
break
;
}
// loop to get tombstone data
SDelInfo
*
pDelInfo
;
tsdbSnapReadGetTombData
(
pReader
,
&
pDelInfo
);
SDelIdx
*
pDelIdx
=
(
SDelIdx
*
)
taosArrayGet
(
pReader
->
aDelIdx
,
pReader
->
iDelIdx
)
;
if
(
pDelInfo
==
NULL
)
goto
_exit
;
pReader
->
iDelIdx
++
;
pReader
->
tbid
=
*
(
TABLEID
*
)
pDelInfo
;
code
=
tsdbReadDelData
(
pReader
->
pDelFReader
,
pDelIdx
,
pReader
->
aDelData
);
if
(
pReader
->
aDelData
)
{
taosArrayClear
(
pReader
->
aDelData
);
}
else
if
((
pReader
->
aDelData
=
taosArrayInit
(
16
,
sizeof
(
SDelData
)))
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
int32_t
size
=
0
;
for
(
int32_t
iDelData
=
0
;
iDelData
<
taosArrayGetSize
(
pReader
->
aDelData
);
iDelData
++
)
{
SDelData
*
pDelData
=
(
SDelData
*
)
taosArrayGet
(
pReader
->
aDelData
,
iDelData
);
if
(
pDelData
->
version
>=
pReader
->
sver
&&
pDelData
->
version
<=
pReader
->
ever
)
{
size
+=
tPutDelData
(
NULL
,
pDelData
);
}
}
if
(
size
==
0
)
continue
;
// org data
size
=
sizeof
(
TABLEID
)
+
size
;
*
ppData
=
taosMemoryMalloc
(
sizeof
(
SSnapDataHdr
)
+
size
);
if
(
*
ppData
==
NULL
)
{
while
(
pDelInfo
&&
pDelInfo
->
suid
==
pReader
->
tbid
.
suid
&&
pDelInfo
->
uid
==
pReader
->
tbid
.
uid
)
{
if
(
taosArrayPush
(
pReader
->
aDelData
,
&
pDelInfo
->
delData
)
<
0
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
SSnapDataHdr
*
pHdr
=
(
SSnapDataHdr
*
)(
*
ppData
);
pHdr
->
type
=
SNAP_DATA_DEL
;
pHdr
->
size
=
size
;
TABLEID
*
pId
=
(
TABLEID
*
)(
&
pHdr
[
1
]);
pId
->
suid
=
pDelIdx
->
suid
;
pId
->
uid
=
pDelIdx
->
uid
;
int32_t
n
=
sizeof
(
SSnapDataHdr
)
+
sizeof
(
TABLEID
);
for
(
int32_t
iDelData
=
0
;
iDelData
<
taosArrayGetSize
(
pReader
->
aDelData
);
iDelData
++
)
{
SDelData
*
pDelData
=
(
SDelData
*
)
taosArrayGet
(
pReader
->
aDelData
,
iDelData
);
if
(
pDelData
->
version
<
pReader
->
sver
)
continue
;
if
(
pDelData
->
version
>
pReader
->
ever
)
continue
;
n
+=
tPutDelData
((
*
ppData
)
+
n
,
pDelData
);
}
tsdbInfo
(
"vgId:%d, vnode snapshot tsdb read del data for %s, suid:%"
PRId64
" uid:%"
PRId64
" size:%d"
,
TD_VID
(
pTsdb
->
pVnode
),
pTsdb
->
path
,
pDelIdx
->
suid
,
pDelIdx
->
uid
,
size
);
code
=
tsdbSnapReadNextTombData
(
pReader
,
&
pDelInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
break
;
// encode tombstone data
if
(
taosArrayGetSize
(
pReader
->
aDelData
)
>
0
)
{
code
=
tsdbSnapCmprTombData
(
pReader
,
ppData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d, %s failed since %s, path:%s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
),
pTsdb
->
path
);
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbDebug
(
"vgId:%d %s done"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
int32_t
tsdbSnapReaderOpen
(
STsdb
*
pTsdb
,
int64_t
sver
,
int64_t
ever
,
int8_t
type
,
STsdbSnapReader
**
ppReader
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
STsdbSnapReader
*
pReader
=
NULL
;
int32_t
code
=
0
;
int32_t
lino
=
0
;
// alloc
pReader
=
(
STsdbSnapReader
*
)
taosMemoryCalloc
(
1
,
sizeof
(
*
pReader
));
STsdbSnapReader
*
pReader
=
(
STsdbSnapReader
*
)
taosMemoryCalloc
(
1
,
sizeof
(
*
pReader
));
if
(
pReader
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
...
...
@@ -476,118 +890,80 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type
pReader
->
ever
=
ever
;
pReader
->
type
=
type
;
code
=
taosThreadRwlockRdlock
(
&
pTsdb
->
rwLock
);
if
(
code
)
{
code
=
TAOS_SYSTEM_ERROR
(
code
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
taosThreadRwlockRdlock
(
&
pTsdb
->
rwLock
);
code
=
tsdbFSRef
(
pTsdb
,
&
pReader
->
fs
);
if
(
code
)
{
taosThreadRwlockUnlock
(
&
pTsdb
->
rwLock
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
taosThreadRwlockUnlock
(
&
pTsdb
->
rwLock
);
code
=
taosThreadRwlockUnlock
(
&
pTsdb
->
rwLock
);
if
(
code
)
{
code
=
TAOS_SYSTEM_ERROR
(
code
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
// data
// init
pReader
->
fid
=
INT32_MIN
;
for
(
int32_t
iIter
=
0
;
iIter
<
sizeof
(
pReader
->
aFDataIter
)
/
sizeof
(
pReader
->
aFDataIter
[
0
]);
iIter
++
)
{
SFDataIter
*
pIter
=
&
pReader
->
aFDataIter
[
iIter
];
if
(
iIter
==
0
)
{
pIter
->
aBlockIdx
=
taosArrayInit
(
0
,
sizeof
(
SBlockIdx
));
if
(
pIter
->
aBlockIdx
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
}
else
{
pIter
->
aSttBlk
=
taosArrayInit
(
0
,
sizeof
(
SSttBlk
));
if
(
pIter
->
aSttBlk
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
}
code
=
tBlockDataCreate
(
&
pIter
->
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tBlockDataCreate
(
&
pReader
->
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// del
pReader
->
aDelIdx
=
taosArrayInit
(
0
,
sizeof
(
SDelIdx
));
if
(
pReader
->
aDelIdx
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pReader
->
aDelData
=
taosArrayInit
(
0
,
sizeof
(
SDelData
));
if
(
pReader
->
aDelData
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d, %s failed at line %d since %s, TSDB path: %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
),
pTsdb
->
path
);
*
ppReader
=
NULL
;
tsdbError
(
"vgId:%d %s failed at line %d since %s, sver:%"
PRId64
" ever:%"
PRId64
" type:%d"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
),
sver
,
ever
,
type
);
if
(
pReader
)
{
taosArrayDestroy
(
pReader
->
aDelData
);
taosArrayDestroy
(
pReader
->
aDelIdx
);
tBlockDataDestroy
(
&
pReader
->
bData
);
tsdbFS
Destroy
(
&
pReader
->
fs
);
tsdbFS
Unref
(
pTsdb
,
&
pReader
->
fs
);
taosMemoryFree
(
pReader
);
pReader
=
NULL
;
}
}
else
{
*
ppReader
=
pReader
;
tsdbInfo
(
"vgId:%d, vnode snapshot tsdb reader opened for %s"
,
TD_VID
(
pTsdb
->
pVnode
),
pTsdb
->
path
);
tsdbInfo
(
"vgId:%d %s done, sver:%"
PRId64
" ever:%"
PRId64
" type:%d"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
sver
,
ever
,
type
);
}
*
ppReader
=
pReader
;
return
code
;
}
int32_t
tsdbSnapReaderClose
(
STsdbSnapReader
**
ppReader
)
{
int32_t
code
=
0
;
STsdbSnapReader
*
pReader
=
*
ppReader
;
// data
if
(
pReader
->
pDataFReader
)
tsdbDataFReaderClose
(
&
pReader
->
pDataFReader
);
for
(
int32_t
iIter
=
0
;
iIter
<
sizeof
(
pReader
->
aFDataIter
)
/
sizeof
(
pReader
->
aFDataIter
[
0
]);
iIter
++
)
{
SFDataIter
*
pIter
=
&
pReader
->
aFDataIter
[
iIter
];
int32_t
code
=
0
;
int32_t
lino
=
0
;
if
(
iIter
==
0
)
{
taosArrayDestroy
(
pIter
->
aBlockIdx
);
tMapDataClear
(
&
pIter
->
mBlock
);
}
else
{
taosArrayDestroy
(
pIter
->
aSttBlk
);
}
STsdbSnapReader
*
pReader
=
*
ppReader
;
STsdb
*
pTsdb
=
pReader
->
pTsdb
;
tBlockDataDestroy
(
&
pIter
->
bData
);
// tombstone
if
(
pReader
->
pTIter
)
{
tsdbCloseDataIter2
(
pReader
->
pTIter
);
pReader
->
pTIter
=
NULL
;
}
if
(
pReader
->
pDelFReader
)
{
tsdbDelFReaderClose
(
&
pReader
->
pDelFReader
);
}
taosArrayDestroy
(
pReader
->
aDelData
);
// timeseries
while
(
pReader
->
iterList
)
{
STsdbDataIter2
*
pIter
=
pReader
->
iterList
;
pReader
->
iterList
=
pIter
->
next
;
tsdbCloseDataIter2
(
pIter
);
}
if
(
pReader
->
pDataFReader
)
{
tsdbDataFReaderClose
(
&
pReader
->
pDataFReader
);
}
tBlockDataDestroy
(
&
pReader
->
bData
);
tDestroyTSchema
(
pReader
->
skmTable
.
pTSchema
);
// del
if
(
pReader
->
pDelFReader
)
tsdbDelFReaderClose
(
&
pReader
->
pDelFReader
);
taosArrayDestroy
(
pReader
->
aDelIdx
);
taosArrayDestroy
(
pReader
->
aDelData
);
// other
tDestroyTSchema
(
pReader
->
skmTable
.
pTSchema
);
tsdbFSUnref
(
pReader
->
pTsdb
,
&
pReader
->
fs
);
tsdbInfo
(
"vgId:%d, vnode snapshot tsdb reader closed for %s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
pReader
->
pTsdb
->
path
);
for
(
int32_t
iBuf
=
0
;
iBuf
<
sizeof
(
pReader
->
aBuf
)
/
sizeof
(
pReader
->
aBuf
[
0
]);
iBuf
++
)
{
tFree
(
pReader
->
aBuf
[
iBuf
]);
}
taosMemoryFree
(
pReader
);
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbDebug
(
"vgId:%d %s done"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
);
}
*
ppReader
=
NULL
;
return
code
;
}
...
...
@@ -600,7 +976,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
// read data file
if
(
!
pReader
->
dataDone
)
{
code
=
tsdbSnapReadData
(
pReader
,
ppData
);
code
=
tsdbSnapRead
TimeSeries
Data
(
pReader
,
ppData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
*
ppData
)
{
goto
_exit
;
...
...
@@ -611,7 +987,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
// read del file
if
(
!
pReader
->
delDone
)
{
code
=
tsdbSnapRead
Del
(
pReader
,
ppData
);
code
=
tsdbSnapRead
TombData
(
pReader
,
ppData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
*
ppData
)
{
goto
_exit
;
...
...
@@ -622,22 +998,18 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d, %s failed since %s, path:%s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
),
pReader
->
pTsdb
->
path
);
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbDebug
(
"vgId:%d
, %s done, path:%s"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
,
pReader
->
pTsdb
->
path
);
tsdbDebug
(
"vgId:%d
%s done"
,
TD_VID
(
pReader
->
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
// STsdbSnapWriter ========================================
struct
STsdbSnapWriter
{
STsdb
*
pTsdb
;
int64_t
sver
;
int64_t
ever
;
STsdbFS
fs
;
// config
STsdb
*
pTsdb
;
int64_t
sver
;
int64_t
ever
;
int32_t
minutes
;
int8_t
precision
;
int32_t
minRow
;
...
...
@@ -646,641 +1018,816 @@ struct STsdbSnapWriter {
int64_t
commitID
;
uint8_t
*
aBuf
[
5
];
// for data file
SBlockData
bData
;
int32_t
fid
;
TABLEID
id
;
SSkmInfo
skmTable
;
struct
{
SDataFReader
*
pReader
;
SArray
*
aBlockIdx
;
int32_t
iBlockIdx
;
SBlockIdx
*
pBlockIdx
;
SMapData
mDataBlk
;
int32_t
iDataBlk
;
SBlockData
bData
;
int32_t
iRow
;
}
dReader
;
struct
{
SDataFWriter
*
pWriter
;
SArray
*
aBlockIdx
;
SMapData
mDataBlk
;
SArray
*
aSttBlk
;
SBlockData
bData
;
SBlockData
sData
;
}
dWriter
;
// for del file
SDelFReader
*
pDelFReader
;
STsdbFS
fs
;
TABLEID
tbid
;
// time-series data
SBlockData
inData
;
int32_t
fid
;
SSkmInfo
skmTable
;
/* reader */
SDataFReader
*
pDataFReader
;
STsdbDataIter2
*
iterList
;
STsdbDataIter2
*
pDIter
;
STsdbDataIter2
*
pSIter
;
SRBTree
rbt
;
// SRBTree<STsdbDataIter2>
/* writer */
SDataFWriter
*
pDataFWriter
;
SArray
*
aBlockIdx
;
SMapData
mDataBlk
;
// SMapData<SDataBlk>
SArray
*
aSttBlk
;
// SArray<SSttBlk>
SBlockData
bData
;
SBlockData
sData
;
// tombstone data
/* reader */
SDelFReader
*
pDelFReader
;
STsdbDataIter2
*
pTIter
;
/* writer */
SDelFWriter
*
pDelFWriter
;
int32_t
iDelIdx
;
SArray
*
aDelIdxR
;
SArray
*
aDelIdx
;
SArray
*
aDelData
;
SArray
*
aDelIdxW
;
};
// SNAP_DATA_TSDB
extern
int32_t
tsdbWriteDataBlock
(
SDataFWriter
*
pWriter
,
SBlockData
*
pBlockData
,
SMapData
*
mDataBlk
,
int8_t
cmprAlg
);
extern
int32_t
tsdbWriteSttBlock
(
SDataFWriter
*
pWriter
,
SBlockData
*
pBlockData
,
SArray
*
aSttBlk
,
int8_t
cmprAlg
);
static
int32_t
tsdbSnapNextTableData
(
STsdbSnapWriter
*
pWriter
)
{
static
int32_t
tsdbSnapWriteTableDataStart
(
STsdbSnapWriter
*
pWriter
,
TABLEID
*
pId
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
if
(
pId
)
{
pWriter
->
tbid
=
*
pId
;
}
else
{
pWriter
->
tbid
=
(
TABLEID
){
INT64_MAX
,
INT64_MAX
};
}
if
(
pWriter
->
pDIter
)
{
STsdbDataIter2
*
pIter
=
pWriter
->
pDIter
;
// assert last table data end
ASSERT
(
pIter
->
dIter
.
iRow
>=
pIter
->
dIter
.
bData
.
nRow
);
ASSERT
(
pIter
->
dIter
.
iDataBlk
>=
pIter
->
dIter
.
mDataBlk
.
nItem
);
for
(;;)
{
if
(
pIter
->
dIter
.
iBlockIdx
>=
taosArrayGetSize
(
pIter
->
dIter
.
aBlockIdx
))
{
pWriter
->
pDIter
=
NULL
;
break
;
}
SBlockIdx
*
pBlockIdx
=
(
SBlockIdx
*
)
taosArrayGet
(
pIter
->
dIter
.
aBlockIdx
,
pIter
->
dIter
.
iBlockIdx
);
int32_t
c
=
tTABLEIDCmprFn
(
pBlockIdx
,
&
pWriter
->
tbid
);
if
(
c
<
0
)
{
code
=
tsdbReadDataBlk
(
pIter
->
dIter
.
pReader
,
pBlockIdx
,
&
pIter
->
dIter
.
mDataBlk
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
SBlockIdx
*
pNewBlockIdx
=
taosArrayReserve
(
pWriter
->
aBlockIdx
,
1
);
if
(
pNewBlockIdx
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pNewBlockIdx
->
suid
=
pBlockIdx
->
suid
;
pNewBlockIdx
->
uid
=
pBlockIdx
->
uid
;
code
=
tsdbWriteDataBlk
(
pWriter
->
pDataFWriter
,
&
pIter
->
dIter
.
mDataBlk
,
pNewBlockIdx
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pIter
->
dIter
.
iBlockIdx
++
;
}
else
if
(
c
==
0
)
{
code
=
tsdbReadDataBlk
(
pIter
->
dIter
.
pReader
,
pBlockIdx
,
&
pIter
->
dIter
.
mDataBlk
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pIter
->
dIter
.
iDataBlk
=
0
;
pIter
->
dIter
.
iBlockIdx
++
;
break
;
}
else
{
pIter
->
dIter
.
iDataBlk
=
pIter
->
dIter
.
mDataBlk
.
nItem
;
break
;
}
}
}
if
(
pId
)
{
code
=
tsdbUpdateTableSchema
(
pWriter
->
pTsdb
->
pVnode
->
pMeta
,
pId
->
suid
,
pId
->
uid
,
&
pWriter
->
skmTable
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
tMapDataReset
(
&
pWriter
->
mDataBlk
);
ASSERT
(
pWriter
->
dReader
.
iRow
>=
pWriter
->
dReader
.
bData
.
nRow
);
code
=
tBlockDataInit
(
&
pWriter
->
bData
,
pId
,
pWriter
->
skmTable
.
pTSchema
,
NULL
,
0
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
if
(
pWriter
->
dReader
.
iBlockIdx
<
taosArrayGetSize
(
pWriter
->
dReader
.
aBlockIdx
))
{
pWriter
->
dReader
.
pBlockIdx
=
(
SBlockIdx
*
)
taosArrayGet
(
pWriter
->
dReader
.
aBlockIdx
,
pWriter
->
dReader
.
iBlockIdx
);
if
(
!
TABLE_SAME_SCHEMA
(
pWriter
->
tbid
.
suid
,
pWriter
->
tbid
.
uid
,
pWriter
->
sData
.
suid
,
pWriter
->
sData
.
uid
))
{
if
((
pWriter
->
sData
.
nRow
>
0
))
{
code
=
tsdbWriteSttBlock
(
pWriter
->
pDataFWriter
,
&
pWriter
->
sData
,
pWriter
->
aSttBlk
,
pWriter
->
cmprAlg
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tsdbReadDataBlk
(
pWriter
->
dReader
.
pReader
,
pWriter
->
dReader
.
pBlockIdx
,
&
pWriter
->
dReader
.
mDataBlk
);
if
(
code
)
goto
_exit
;
if
(
pId
)
{
TABLEID
id
=
{.
suid
=
pWriter
->
tbid
.
suid
,
.
uid
=
pWriter
->
tbid
.
suid
?
0
:
pWriter
->
tbid
.
uid
};
code
=
tBlockDataInit
(
&
pWriter
->
sData
,
&
id
,
pWriter
->
skmTable
.
pTSchema
,
NULL
,
0
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
}
pWriter
->
dReader
.
iBlockIdx
++
;
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
pWriter
->
dReader
.
pBlockIdx
=
NULL
;
tMapDataReset
(
&
pWriter
->
dReader
.
mDataBlk
);
tsdbTrace
(
"vgId:%d %s done, suid:%"
PRId64
" uid:%"
PRId64
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
pWriter
->
tbid
.
suid
,
pWriter
->
tbid
.
uid
);
}
return
code
;
}
static
int32_t
tsdbSnapWriteTableRowImpl
(
STsdbSnapWriter
*
pWriter
,
TSDBROW
*
pRow
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
code
=
tBlockDataAppendRow
(
&
pWriter
->
bData
,
pRow
,
pWriter
->
skmTable
.
pTSchema
,
pWriter
->
tbid
.
uid
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pWriter
->
bData
.
nRow
>=
pWriter
->
maxRow
)
{
code
=
tsdbWriteDataBlock
(
pWriter
->
pDataFWriter
,
&
pWriter
->
bData
,
&
pWriter
->
mDataBlk
,
pWriter
->
cmprAlg
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pWriter
->
dReader
.
iDataBlk
=
0
;
// point to the next one
tBlockDataReset
(
&
pWriter
->
dReader
.
bData
);
pWriter
->
dReader
.
iRow
=
0
;
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbSnapWrite
CopyData
(
STsdbSnapWriter
*
pWriter
,
TABLEID
*
pId
)
{
static
int32_t
tsdbSnapWrite
TableRow
(
STsdbSnapWriter
*
pWriter
,
TSDBROW
*
pRow
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
while
(
true
)
{
if
(
pWriter
->
dReader
.
pBlockIdx
==
NULL
)
break
;
if
(
tTABLEIDCmprFn
(
pWriter
->
dReader
.
pBlockIdx
,
pId
)
>=
0
)
break
;
TSDBKEY
inKey
=
pRow
?
TSDBROW_KEY
(
pRow
)
:
TSDBKEY_MAX
;
if
(
pWriter
->
pDIter
==
NULL
||
(
pWriter
->
pDIter
->
dIter
.
iRow
>=
pWriter
->
pDIter
->
dIter
.
bData
.
nRow
&&
pWriter
->
pDIter
->
dIter
.
iDataBlk
>=
pWriter
->
pDIter
->
dIter
.
mDataBlk
.
nItem
))
{
goto
_write_row
;
}
else
{
for
(;;)
{
while
(
pWriter
->
pDIter
->
dIter
.
iRow
<
pWriter
->
pDIter
->
dIter
.
bData
.
nRow
)
{
TSDBROW
row
=
tsdbRowFromBlockData
(
&
pWriter
->
pDIter
->
dIter
.
bData
,
pWriter
->
pDIter
->
dIter
.
iRow
);
int32_t
c
=
tsdbKeyCmprFn
(
&
inKey
,
&
TSDBROW_KEY
(
&
row
));
if
(
c
<
0
)
{
goto
_write_row
;
}
else
if
(
c
>
0
)
{
code
=
tsdbSnapWriteTableRowImpl
(
pWriter
,
&
row
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pWriter
->
pDIter
->
dIter
.
iRow
++
;
}
else
{
ASSERT
(
0
);
}
}
SBlockIdx
blkIdx
=
*
pWriter
->
dReader
.
pBlockIdx
;
code
=
tsdbWriteDataBlk
(
pWriter
->
dWriter
.
pWriter
,
&
pWriter
->
dReader
.
mDataBlk
,
&
blkIdx
);
if
(
code
)
goto
_exit
;
for
(;;)
{
if
(
pWriter
->
pDIter
->
dIter
.
iDataBlk
>=
pWriter
->
pDIter
->
dIter
.
mDataBlk
.
nItem
)
goto
_write_row
;
if
(
taosArrayPush
(
pWriter
->
dWriter
.
aBlockIdx
,
&
blkIdx
)
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
goto
_exit
;
// FIXME: Here can be slow, use array instead
SDataBlk
dataBlk
;
tMapDataGetItemByIdx
(
&
pWriter
->
pDIter
->
dIter
.
mDataBlk
,
pWriter
->
pDIter
->
dIter
.
iDataBlk
,
&
dataBlk
,
tGetDataBlk
);
int32_t
c
=
tDataBlkCmprFn
(
&
dataBlk
,
&
(
SDataBlk
){.
minKey
=
inKey
,
.
maxKey
=
inKey
});
if
(
c
>
0
)
{
goto
_write_row
;
}
else
if
(
c
<
0
)
{
if
(
pWriter
->
bData
.
nRow
>
0
)
{
code
=
tsdbWriteDataBlock
(
pWriter
->
pDataFWriter
,
&
pWriter
->
bData
,
&
pWriter
->
mDataBlk
,
pWriter
->
cmprAlg
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
tMapDataPutItem
(
&
pWriter
->
pDIter
->
dIter
.
mDataBlk
,
&
dataBlk
,
tPutDataBlk
);
pWriter
->
pDIter
->
dIter
.
iDataBlk
++
;
}
else
{
code
=
tsdbReadDataBlockEx
(
pWriter
->
pDataFReader
,
&
dataBlk
,
&
pWriter
->
pDIter
->
dIter
.
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pWriter
->
pDIter
->
dIter
.
iRow
=
0
;
pWriter
->
pDIter
->
dIter
.
iDataBlk
++
;
break
;
}
}
}
}
code
=
tsdbSnapNextTableData
(
pWriter
);
if
(
code
)
goto
_exit
;
_write_row:
if
(
pRow
)
{
code
=
tsdbSnapWriteTableRowImpl
(
pWriter
,
pRow
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbSnapWriteTableData
Start
(
STsdbSnapWriter
*
pWriter
,
TABLEID
*
pId
)
{
static
int32_t
tsdbSnapWriteTableData
End
(
STsdbSnapWriter
*
pWriter
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
// write a NULL row to end current table data write
code
=
tsdbSnapWriteTableRow
(
pWriter
,
NULL
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pWriter
->
bData
.
nRow
>
0
)
{
if
(
pWriter
->
bData
.
nRow
<
pWriter
->
minRow
)
{
ASSERT
(
TABLE_SAME_SCHEMA
(
pWriter
->
sData
.
suid
,
pWriter
->
sData
.
uid
,
pWriter
->
tbid
.
suid
,
pWriter
->
tbid
.
uid
));
for
(
int32_t
iRow
=
0
;
iRow
<
pWriter
->
bData
.
nRow
;
iRow
++
)
{
code
=
tBlockDataAppendRow
(
&
pWriter
->
sData
,
&
tsdbRowFromBlockData
(
&
pWriter
->
bData
,
iRow
),
NULL
,
pWriter
->
tbid
.
uid
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbSnapWriteCopyData
(
pWriter
,
pId
);
if
(
code
)
goto
_err
;
if
(
pWriter
->
sData
.
nRow
>=
pWriter
->
maxRow
)
{
code
=
tsdbWriteSttBlock
(
pWriter
->
pDataFWriter
,
&
pWriter
->
sData
,
pWriter
->
aSttBlk
,
pWriter
->
cmprAlg
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
}
pWriter
->
id
.
suid
=
pId
->
suid
;
pWriter
->
id
.
uid
=
pId
->
uid
;
tBlockDataClear
(
&
pWriter
->
bData
);
}
else
{
code
=
tsdbWriteDataBlock
(
pWriter
->
pDataFWriter
,
&
pWriter
->
bData
,
&
pWriter
->
mDataBlk
,
pWriter
->
cmprAlg
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
}
code
=
tsdbUpdateTableSchema
(
pWriter
->
pTsdb
->
pVnode
->
pMeta
,
pId
->
suid
,
pId
->
uid
,
&
pWriter
->
skmTable
);
if
(
code
)
goto
_err
;
if
(
pWriter
->
mDataBlk
.
nItem
)
{
SBlockIdx
*
pBlockIdx
=
taosArrayReserve
(
pWriter
->
aBlockIdx
,
1
);
if
(
pBlockIdx
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
tMapDataReset
(
&
pWriter
->
dWriter
.
mDataBlk
);
code
=
tBlockDataInit
(
&
pWriter
->
dWriter
.
bData
,
pId
,
pWriter
->
skmTable
.
pTSchema
,
NULL
,
0
);
if
(
code
)
goto
_err
;
pBlockIdx
->
suid
=
pWriter
->
tbid
.
suid
;
pBlockIdx
->
uid
=
pWriter
->
tbid
.
uid
;
return
code
;
code
=
tsdbWriteDataBlk
(
pWriter
->
pDataFWriter
,
&
pWriter
->
mDataBlk
,
pBlockIdx
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
_err:
tsdbError
(
"vgId:%d, %s failed since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
));
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbSnapWrite
TableDataEnd
(
STsdbSnapWriter
*
pWriter
)
{
static
int32_t
tsdbSnapWrite
FileDataStart
(
STsdbSnapWriter
*
pWriter
,
int32_t
fid
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
if
(
pWriter
->
id
.
suid
==
0
&&
pWriter
->
id
.
uid
==
0
)
return
code
;
ASSERT
(
pWriter
->
pDataFWriter
==
NULL
&&
pWriter
->
fid
<
fid
)
;
int32_t
c
=
1
;
if
(
pWriter
->
dReader
.
pBlockIdx
)
{
c
=
tTABLEIDCmprFn
(
pWriter
->
dReader
.
pBlockIdx
,
&
pWriter
->
id
);
ASSERT
(
c
>=
0
);
}
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
pWriter
->
fid
=
fid
;
pWriter
->
tbid
=
(
TABLEID
){
0
};
SDFileSet
*
pSet
=
taosArraySearch
(
pWriter
->
fs
.
aDFileSet
,
&
(
SDFileSet
){.
fid
=
fid
},
tDFileSetCmprFn
,
TD_EQ
);
// open reader
pWriter
->
pDataFReader
=
NULL
;
pWriter
->
iterList
=
NULL
;
pWriter
->
pDIter
=
NULL
;
pWriter
->
pSIter
=
NULL
;
tRBTreeCreate
(
&
pWriter
->
rbt
,
tsdbDataIterCmprFn
);
if
(
pSet
)
{
code
=
tsdbDataFReaderOpen
(
&
pWriter
->
pDataFReader
,
pTsdb
,
pSet
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbOpenDataFileDataIter
(
pWriter
->
pDataFReader
,
&
pWriter
->
pDIter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pWriter
->
pDIter
)
{
pWriter
->
pDIter
->
next
=
pWriter
->
iterList
;
pWriter
->
iterList
=
pWriter
->
pDIter
;
}
if
(
c
==
0
)
{
SBlockData
*
pBData
=
&
pWriter
->
dWriter
.
bData
;
for
(
int32_t
iStt
=
0
;
iStt
<
pSet
->
nSttF
;
iStt
++
)
{
code
=
tsdbOpenSttFileDataIter
(
pWriter
->
pDataFReader
,
iStt
,
&
pWriter
->
pSIter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
for
(;
pWriter
->
dReader
.
iRow
<
pWriter
->
dReader
.
bData
.
nRow
;
pWriter
->
dReader
.
iRow
++
)
{
TSDBROW
row
=
tsdbRowFromBlockData
(
&
pWriter
->
dReader
.
bData
,
pWriter
->
dReader
.
iRow
);
if
(
pWriter
->
pSIter
)
{
code
=
tsdbSttFileDataIterNext
(
pWriter
->
pSIter
,
NULL
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tBlockDataAppendRow
(
pBData
,
&
row
,
NULL
,
pWriter
->
id
.
uid
);
if
(
code
)
goto
_err
;
// add to tree
tRBTreePut
(
&
pWriter
->
rbt
,
&
pWriter
->
pSIter
->
rbtn
)
;
if
(
pBData
->
nRow
>=
pWriter
->
maxRow
)
{
code
=
tsdbWriteDataBlock
(
pWriter
->
dWriter
.
pWriter
,
pBData
,
&
pWriter
->
dWriter
.
mDataBlk
,
pWriter
->
cmprAlg
)
;
if
(
code
)
goto
_er
r
;
// add to list
pWriter
->
pSIter
->
next
=
pWriter
->
iterList
;
pWriter
->
iterList
=
pWriter
->
pSIte
r
;
}
}
code
=
tsdbWriteDataBlock
(
pWriter
->
dWriter
.
pWriter
,
pBData
,
&
pWriter
->
dWriter
.
mDataBlk
,
pWriter
->
cmprAlg
);
if
(
code
)
goto
_err
;
pWriter
->
pSIter
=
NULL
;
}
// open writer
SDiskID
diskId
;
if
(
pSet
)
{
diskId
=
pSet
->
diskId
;
}
else
{
tfsAllocDisk
(
pTsdb
->
pVnode
->
pTfs
,
0
/*TODO*/
,
&
diskId
);
tfsMkdirRecurAt
(
pTsdb
->
pVnode
->
pTfs
,
pTsdb
->
path
,
diskId
);
}
SDFileSet
wSet
=
{.
diskId
=
diskId
,
.
fid
=
fid
,
.
pHeadF
=
&
(
SHeadFile
){.
commitID
=
pWriter
->
commitID
},
.
pDataF
=
(
pSet
)
?
pSet
->
pDataF
:
&
(
SDataFile
){.
commitID
=
pWriter
->
commitID
},
.
pSmaF
=
(
pSet
)
?
pSet
->
pSmaF
:
&
(
SSmaFile
){.
commitID
=
pWriter
->
commitID
},
.
nSttF
=
1
,
.
aSttF
=
{
&
(
SSttFile
){.
commitID
=
pWriter
->
commitID
}}};
code
=
tsdbDataFWriterOpen
(
&
pWriter
->
pDataFWriter
,
pTsdb
,
&
wSet
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
for
(;
pWriter
->
dReader
.
iDataBlk
<
pWriter
->
dReader
.
mDataBlk
.
nItem
;
pWriter
->
dReader
.
iDataBlk
++
)
{
SDataBlk
dataBlk
;
tMapDataGetItemByIdx
(
&
pWriter
->
dReader
.
mDataBlk
,
pWriter
->
dReader
.
iDataBlk
,
&
dataBlk
,
tGetDataBlk
);
if
(
pWriter
->
aBlockIdx
)
{
taosArrayClear
(
pWriter
->
aBlockIdx
);
}
else
if
((
pWriter
->
aBlockIdx
=
taosArrayInit
(
0
,
sizeof
(
SBlockIdx
)))
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tMapDataPutItem
(
&
pWriter
->
dWriter
.
mDataBlk
,
&
dataBlk
,
tPutDataBlk
);
if
(
code
)
goto
_err
;
}
tMapDataReset
(
&
pWriter
->
mDataBlk
);
code
=
tsdbSnapNextTableData
(
pWriter
);
if
(
code
)
goto
_err
;
if
(
pWriter
->
aSttBlk
)
{
taosArrayClear
(
pWriter
->
aSttBlk
);
}
else
if
((
pWriter
->
aSttBlk
=
taosArrayInit
(
0
,
sizeof
(
SSttBlk
)))
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
if
(
pWriter
->
dWriter
.
mDataBlk
.
nItem
)
{
SBlockIdx
blockIdx
=
{.
suid
=
pWriter
->
id
.
suid
,
.
uid
=
pWriter
->
id
.
uid
};
code
=
tsdbWriteDataBlk
(
pWriter
->
dWriter
.
pWriter
,
&
pWriter
->
dWriter
.
mDataBlk
,
&
blockIdx
);
tBlockDataReset
(
&
pWriter
->
bData
);
tBlockDataReset
(
&
pWriter
->
sData
);
if
(
taosArrayPush
(
pWriter
->
dWriter
.
aBlockIdx
,
&
blockIdx
)
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
goto
_err
;
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s, fid:%d"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
),
fid
);
}
else
{
tsdbDebug
(
"vgId:%d %s done, fid:%d"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
fid
);
}
return
code
;
}
static
int32_t
tsdbSnapWriteTableData
(
STsdbSnapWriter
*
pWriter
,
SRowInfo
*
pRowInfo
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
// switch to new table if need
if
(
pRowInfo
==
NULL
||
pRowInfo
->
uid
!=
pWriter
->
tbid
.
uid
)
{
if
(
pWriter
->
tbid
.
uid
)
{
code
=
tsdbSnapWriteTableDataEnd
(
pWriter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tsdbSnapWriteTableDataStart
(
pWriter
,
(
TABLEID
*
)
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pWriter
->
id
.
suid
=
0
;
pWriter
->
id
.
uid
=
0
;
if
(
pRowInfo
==
NULL
)
goto
_exit
;
return
code
;
code
=
tsdbSnapWriteTableRow
(
pWriter
,
&
pRowInfo
->
row
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
_err:
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbSnapWrite
OpenFile
(
STsdbSnapWriter
*
pWriter
,
int32_t
fid
)
{
static
int32_t
tsdbSnapWrite
NextRow
(
STsdbSnapWriter
*
pWriter
,
SRowInfo
**
ppRowInfo
)
{
int32_t
code
=
0
;
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
ASSERT
(
pWriter
->
dWriter
.
pWriter
==
NULL
);
int32_t
lino
=
0
;
pWriter
->
fid
=
fid
;
pWriter
->
id
=
(
TABLEID
){
0
}
;
SDFileSet
*
pSet
=
taosArraySearch
(
pWriter
->
fs
.
aDFileSet
,
&
(
SDFileSet
){.
fid
=
fid
},
tDFileSetCmprFn
,
TD_EQ
);
if
(
pWriter
->
pSIter
)
{
code
=
tsdbDataIterNext2
(
pWriter
->
pSIter
,
NULL
)
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// Reader
if
(
pSet
)
{
code
=
tsdbDataFReaderOpen
(
&
pWriter
->
dReader
.
pReader
,
pWriter
->
pTsdb
,
pSet
);
if
(
code
)
goto
_err
;
if
(
pWriter
->
pSIter
->
rowInfo
.
suid
==
0
&&
pWriter
->
pSIter
->
rowInfo
.
uid
==
0
)
{
pWriter
->
pSIter
=
NULL
;
}
else
{
SRBTreeNode
*
pNode
=
tRBTreeMin
(
&
pWriter
->
rbt
);
if
(
pNode
)
{
int32_t
c
=
tsdbDataIterCmprFn
(
&
pWriter
->
pSIter
->
rbtn
,
pNode
);
if
(
c
>
0
)
{
tRBTreePut
(
&
pWriter
->
rbt
,
&
pWriter
->
pSIter
->
rbtn
);
pWriter
->
pSIter
=
NULL
;
}
else
if
(
c
==
0
)
{
ASSERT
(
0
);
}
}
}
}
code
=
tsdbReadBlockIdx
(
pWriter
->
dReader
.
pReader
,
pWriter
->
dReader
.
aBlockIdx
);
if
(
code
)
goto
_err
;
}
else
{
ASSERT
(
pWriter
->
dReader
.
pReader
==
NULL
);
taosArrayClear
(
pWriter
->
dReader
.
aBlockIdx
);
}
pWriter
->
dReader
.
iBlockIdx
=
0
;
// point to the next one
code
=
tsdbSnapNextTableData
(
pWriter
);
if
(
code
)
goto
_err
;
// Writer
SHeadFile
fHead
=
{.
commitID
=
pWriter
->
commitID
};
SDataFile
fData
=
{.
commitID
=
pWriter
->
commitID
};
SSmaFile
fSma
=
{.
commitID
=
pWriter
->
commitID
};
SSttFile
fStt
=
{.
commitID
=
pWriter
->
commitID
};
SDFileSet
wSet
=
{.
fid
=
pWriter
->
fid
,
.
pHeadF
=
&
fHead
,
.
pDataF
=
&
fData
,
.
pSmaF
=
&
fSma
};
if
(
pSet
)
{
wSet
.
diskId
=
pSet
->
diskId
;
fData
=
*
pSet
->
pDataF
;
fSma
=
*
pSet
->
pSmaF
;
for
(
int32_t
iStt
=
0
;
iStt
<
pSet
->
nSttF
;
iStt
++
)
{
wSet
.
aSttF
[
iStt
]
=
pSet
->
aSttF
[
iStt
];
if
(
pWriter
->
pSIter
==
NULL
)
{
SRBTreeNode
*
pNode
=
tRBTreeMin
(
&
pWriter
->
rbt
);
if
(
pNode
)
{
tRBTreeDrop
(
&
pWriter
->
rbt
,
pNode
);
pWriter
->
pSIter
=
TSDB_RBTN_TO_DATA_ITER
(
pNode
);
}
wSet
.
nSttF
=
pSet
->
nSttF
+
1
;
// TODO: fix pSet->nSttF == pTsdb->maxFile
}
else
{
SDiskID
did
=
{
0
};
tfsAllocDisk
(
pTsdb
->
pVnode
->
pTfs
,
0
,
&
did
);
tfsMkdirRecurAt
(
pTsdb
->
pVnode
->
pTfs
,
pTsdb
->
path
,
did
);
wSet
.
diskId
=
did
;
wSet
.
nSttF
=
1
;
}
wSet
.
aSttF
[
wSet
.
nSttF
-
1
]
=
&
fStt
;
code
=
tsdbDataFWriterOpen
(
&
pWriter
->
dWriter
.
pWriter
,
pWriter
->
pTsdb
,
&
wSet
);
if
(
code
)
goto
_err
;
taosArrayClear
(
pWriter
->
dWriter
.
aBlockIdx
);
tMapDataReset
(
&
pWriter
->
dWriter
.
mDataBlk
);
taosArrayClear
(
pWriter
->
dWriter
.
aSttBlk
);
tBlockDataReset
(
&
pWriter
->
dWriter
.
bData
);
tBlockDataReset
(
&
pWriter
->
dWriter
.
sData
);
}
return
code
;
if
(
ppRowInfo
)
{
if
(
pWriter
->
pSIter
)
{
*
ppRowInfo
=
&
pWriter
->
pSIter
->
rowInfo
;
}
else
{
*
ppRowInfo
=
NULL
;
}
}
_err:
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbSnapWrite
CloseFile
(
STsdbSnapWriter
*
pWriter
)
{
static
int32_t
tsdbSnapWrite
GetRow
(
STsdbSnapWriter
*
pWriter
,
SRowInfo
**
ppRowInfo
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
ASSERT
(
pWriter
->
dWriter
.
pWriter
);
code
=
tsdbSnapWriteTableDataEnd
(
pWriter
);
if
(
code
)
goto
_err
;
// copy remain table data
TABLEID
id
=
{.
suid
=
INT64_MAX
,
.
uid
=
INT64_MAX
};
code
=
tsdbSnapWriteCopyData
(
pWriter
,
&
id
);
if
(
code
)
goto
_err
;
code
=
tsdbWriteSttBlock
(
pWriter
->
dWriter
.
pWriter
,
&
pWriter
->
dWriter
.
sData
,
pWriter
->
dWriter
.
aSttBlk
,
pWriter
->
cmprAlg
);
if
(
code
)
goto
_err
;
// Indices
code
=
tsdbWriteBlockIdx
(
pWriter
->
dWriter
.
pWriter
,
pWriter
->
dWriter
.
aBlockIdx
);
if
(
code
)
goto
_err
;
code
=
tsdbWriteSttBlk
(
pWriter
->
dWriter
.
pWriter
,
pWriter
->
dWriter
.
aSttBlk
);
if
(
code
)
goto
_err
;
code
=
tsdbUpdateDFileSetHeader
(
pWriter
->
dWriter
.
pWriter
);
if
(
code
)
goto
_err
;
code
=
tsdbFSUpsertFSet
(
&
pWriter
->
fs
,
&
pWriter
->
dWriter
.
pWriter
->
wSet
);
if
(
code
)
goto
_err
;
code
=
tsdbDataFWriterClose
(
&
pWriter
->
dWriter
.
pWriter
,
1
);
if
(
code
)
goto
_err
;
if
(
pWriter
->
dReader
.
pReader
)
{
code
=
tsdbDataFReaderClose
(
&
pWriter
->
dReader
.
pReader
);
if
(
code
)
goto
_err
;
if
(
pWriter
->
pSIter
)
{
*
ppRowInfo
=
&
pWriter
->
pSIter
->
rowInfo
;
goto
_exit
;
}
_exit:
return
code
;
code
=
tsdbSnapWriteNextRow
(
pWriter
,
ppRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
_err:
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
static
int32_t
tsdbSnapWrite
ToDataFile
(
STsdbSnapWriter
*
pWriter
,
int32_t
iRow
,
int8_t
*
done
)
{
static
int32_t
tsdbSnapWrite
FileDataEnd
(
STsdbSnapWriter
*
pWriter
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
SBlockData
*
pBData
=
&
pWriter
->
bData
;
TABLEID
id
=
{.
suid
=
pBData
->
suid
,
.
uid
=
pBData
->
uid
?
pBData
->
uid
:
pBData
->
aUid
[
iRow
]};
TSDBROW
row
=
tsdbRowFromBlockData
(
pBData
,
iRow
);
TSDBKEY
key
=
TSDBROW_KEY
(
&
row
);
ASSERT
(
pWriter
->
pDataFWriter
);
*
done
=
0
;
while
(
pWriter
->
dReader
.
iRow
<
pWriter
->
dReader
.
bData
.
nRow
||
pWriter
->
dReader
.
iDataBlk
<
pWriter
->
dReader
.
mDataBlk
.
nItem
)
{
// Merge row by row
for
(;
pWriter
->
dReader
.
iRow
<
pWriter
->
dReader
.
bData
.
nRow
;
pWriter
->
dReader
.
iRow
++
)
{
TSDBROW
trow
=
tsdbRowFromBlockData
(
&
pWriter
->
dReader
.
bData
,
pWriter
->
dReader
.
iRow
);
TSDBKEY
tKey
=
TSDBROW_KEY
(
&
trow
);
// consume remain data and end with a NULL table row
SRowInfo
*
pRowInfo
;
code
=
tsdbSnapWriteGetRow
(
pWriter
,
&
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
for
(;;
)
{
code
=
tsdbSnapWriteTableData
(
pWriter
,
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
ASSERT
(
pWriter
->
dReader
.
bData
.
suid
==
id
.
suid
&&
pWriter
->
dReader
.
bData
.
uid
==
id
.
uid
)
;
if
(
pRowInfo
==
NULL
)
break
;
int32_t
c
=
tsdbKeyCmprFn
(
&
key
,
&
tKey
);
if
(
c
<
0
)
{
code
=
tBlockDataAppendRow
(
&
pWriter
->
dWriter
.
bData
,
&
row
,
NULL
,
id
.
uid
);
if
(
code
)
goto
_err
;
}
else
if
(
c
>
0
)
{
code
=
tBlockDataAppendRow
(
&
pWriter
->
dWriter
.
bData
,
&
trow
,
NULL
,
id
.
uid
);
if
(
code
)
goto
_err
;
}
else
{
ASSERT
(
0
);
}
code
=
tsdbSnapWriteNextRow
(
pWriter
,
&
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
if
(
pWriter
->
dWriter
.
bData
.
nRow
>=
pWriter
->
maxRow
)
{
code
=
tsdbWriteDataBlock
(
pWriter
->
dWriter
.
pWriter
,
&
pWriter
->
dWriter
.
bData
,
&
pWriter
->
dWriter
.
mDataBlk
,
pWriter
->
cmprAlg
);
if
(
code
)
goto
_err
;
}
// do file-level updates
code
=
tsdbWriteSttBlk
(
pWriter
->
pDataFWriter
,
pWriter
->
aSttBlk
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
c
<
0
)
{
*
done
=
1
;
goto
_exit
;
}
}
code
=
tsdbWriteBlockIdx
(
pWriter
->
pDataFWriter
,
pWriter
->
aBlockIdx
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// Merge row by block
SDataBlk
tDataBlk
=
{.
minKey
=
key
,
.
maxKey
=
key
};
for
(;
pWriter
->
dReader
.
iDataBlk
<
pWriter
->
dReader
.
mDataBlk
.
nItem
;
pWriter
->
dReader
.
iDataBlk
++
)
{
SDataBlk
dataBlk
;
tMapDataGetItemByIdx
(
&
pWriter
->
dReader
.
mDataBlk
,
pWriter
->
dReader
.
iDataBlk
,
&
dataBlk
,
tGetDataBlk
);
code
=
tsdbUpdateDFileSetHeader
(
pWriter
->
pDataFWriter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
int32_t
c
=
tDataBlkCmprFn
(
&
dataBlk
,
&
tDataBlk
);
if
(
c
<
0
)
{
code
=
tsdbWriteDataBlock
(
pWriter
->
dWriter
.
pWriter
,
&
pWriter
->
dWriter
.
bData
,
&
pWriter
->
dWriter
.
mDataBlk
,
pWriter
->
cmprAlg
);
if
(
code
)
goto
_err
;
code
=
tMapDataPutItem
(
&
pWriter
->
dWriter
.
mDataBlk
,
&
dataBlk
,
tPutDataBlk
);
if
(
code
)
goto
_err
;
}
else
if
(
c
>
0
)
{
code
=
tBlockDataAppendRow
(
&
pWriter
->
dWriter
.
bData
,
&
row
,
NULL
,
id
.
uid
);
if
(
code
)
goto
_err
;
if
(
pWriter
->
dWriter
.
bData
.
nRow
>=
pWriter
->
maxRow
)
{
code
=
tsdbWriteDataBlock
(
pWriter
->
dWriter
.
pWriter
,
&
pWriter
->
dWriter
.
bData
,
&
pWriter
->
dWriter
.
mDataBlk
,
pWriter
->
cmprAlg
);
if
(
code
)
goto
_err
;
}
code
=
tsdbFSUpsertFSet
(
&
pWriter
->
fs
,
&
pWriter
->
pDataFWriter
->
wSet
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
*
done
=
1
;
goto
_exit
;
}
else
{
code
=
tsdbReadDataBlockEx
(
pWriter
->
dReader
.
pReader
,
&
dataBlk
,
&
pWriter
->
dReader
.
bData
);
if
(
code
)
goto
_err
;
pWriter
->
dReader
.
iRow
=
0
;
code
=
tsdbDataFWriterClose
(
&
pWriter
->
pDataFWriter
,
1
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pWriter
->
dReader
.
iDataBlk
++
;
break
;
}
}
if
(
pWriter
->
pDataFReader
)
{
code
=
tsdbDataFReaderClose
(
&
pWriter
->
pDataFReader
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
_exit:
return
code
;
// clear sources
while
(
pWriter
->
iterList
)
{
STsdbDataIter2
*
pIter
=
pWriter
->
iterList
;
pWriter
->
iterList
=
pIter
->
next
;
tsdbCloseDataIter2
(
pIter
);
}
_err:
tsdbError
(
"vgId:%d, %s failed since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
));
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
));
}
else
{
tsdbDebug
(
"vgId:%d %s is done"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
static
int32_t
tsdbSnapWriteT
oSttFile
(
STsdbSnapWriter
*
pWriter
,
int32_t
iRow
)
{
static
int32_t
tsdbSnapWriteT
imeSeriesData
(
STsdbSnapWriter
*
pWriter
,
SSnapDataHdr
*
pHdr
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
TABLEID
id
=
{.
suid
=
pWriter
->
bData
.
suid
,
.
uid
=
pWriter
->
bData
.
uid
?
pWriter
->
bData
.
uid
:
pWriter
->
bData
.
aUid
[
iRow
]};
TSDBROW
row
=
tsdbRowFromBlockData
(
&
pWriter
->
bData
,
iRow
);
SBlockData
*
pBData
=
&
pWriter
->
dWriter
.
sData
;
code
=
tDecmprBlockData
(
pHdr
->
data
,
pHdr
->
size
,
&
pWriter
->
inData
,
pWriter
->
aBuf
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pBData
->
suid
||
pBData
->
uid
)
{
if
(
!
TABLE_SAME_SCHEMA
(
pBData
->
suid
,
pBData
->
uid
,
id
.
suid
,
id
.
uid
))
{
code
=
tsdbWriteSttBlock
(
pWriter
->
dWriter
.
pWriter
,
pBData
,
pWriter
->
dWriter
.
aSttBlk
,
pWriter
->
cmprAlg
);
if
(
code
)
goto
_err
;
ASSERT
(
pWriter
->
inData
.
nRow
>
0
);
pBData
->
suid
=
0
;
pBData
->
uid
=
0
;
// switch to new data file if need
int32_t
fid
=
tsdbKeyFid
(
pWriter
->
inData
.
aTSKEY
[
0
],
pWriter
->
minutes
,
pWriter
->
precision
);
if
(
pWriter
->
fid
!=
fid
)
{
if
(
pWriter
->
pDataFWriter
)
{
code
=
tsdbSnapWriteFileDataEnd
(
pWriter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
}
if
(
pBData
->
suid
==
0
&&
pBData
->
uid
==
0
)
{
code
=
tsdbUpdateTableSchema
(
pWriter
->
pTsdb
->
pVnode
->
pMeta
,
pWriter
->
id
.
suid
,
pWriter
->
id
.
uid
,
&
pWriter
->
skmTable
);
if
(
code
)
goto
_err
;
TABLEID
tid
=
{.
suid
=
pWriter
->
id
.
suid
,
.
uid
=
pWriter
->
id
.
suid
?
0
:
pWriter
->
id
.
uid
};
code
=
tBlockDataInit
(
pBData
,
&
tid
,
pWriter
->
skmTable
.
pTSchema
,
NULL
,
0
);
if
(
code
)
goto
_err
;
code
=
tsdbSnapWriteFileDataStart
(
pWriter
,
fid
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tBlockDataAppendRow
(
pBData
,
&
row
,
NULL
,
id
.
uid
);
if
(
code
)
goto
_err
;
// loop write each row
SRowInfo
*
pRowInfo
;
code
=
tsdbSnapWriteGetRow
(
pWriter
,
&
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
for
(
int32_t
iRow
=
0
;
iRow
<
pWriter
->
inData
.
nRow
;
++
iRow
)
{
SRowInfo
rInfo
=
{.
suid
=
pWriter
->
inData
.
suid
,
.
uid
=
pWriter
->
inData
.
uid
?
pWriter
->
inData
.
uid
:
pWriter
->
inData
.
aUid
[
iRow
],
.
row
=
tsdbRowFromBlockData
(
&
pWriter
->
inData
,
iRow
)};
if
(
pBData
->
nRow
>=
pWriter
->
maxRow
)
{
code
=
tsdbWriteSttBlock
(
pWriter
->
dWriter
.
pWriter
,
pBData
,
pWriter
->
dWriter
.
aSttBlk
,
pWriter
->
cmprAlg
);
if
(
code
)
goto
_err
;
for
(;;)
{
if
(
pRowInfo
==
NULL
)
{
code
=
tsdbSnapWriteTableData
(
pWriter
,
&
rInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
break
;
}
else
{
int32_t
c
=
tRowInfoCmprFn
(
&
rInfo
,
pRowInfo
);
if
(
c
<
0
)
{
code
=
tsdbSnapWriteTableData
(
pWriter
,
&
rInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
break
;
}
else
if
(
c
>
0
)
{
code
=
tsdbSnapWriteTableData
(
pWriter
,
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbSnapWriteNextRow
(
pWriter
,
&
pRowInfo
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
else
{
ASSERT
(
0
);
}
}
}
}
_exit:
return
code
;
_err:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbDebug
(
"vgId:%d %s done, suid:%"
PRId64
" uid:%"
PRId64
" nRow:%d"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
pWriter
->
inData
.
suid
,
pWriter
->
inData
.
uid
,
pWriter
->
inData
.
nRow
);
}
return
code
;
}
static
int32_t
tsdbSnapWriteRowData
(
STsdbSnapWriter
*
pWriter
,
int32_t
iRow
)
{
// SNAP_DATA_DEL
static
int32_t
tsdbSnapWriteDelTableDataStart
(
STsdbSnapWriter
*
pWriter
,
TABLEID
*
pId
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
SBlockData
*
pBlockData
=
&
pWriter
->
bData
;
TABLEID
id
=
{.
suid
=
pBlockData
->
suid
,
.
uid
=
pBlockData
->
uid
?
pBlockData
->
uid
:
pBlockData
->
aUid
[
iRow
]};
// End last table data write if need
if
(
tTABLEIDCmprFn
(
&
pWriter
->
id
,
&
id
)
!=
0
)
{
code
=
tsdbSnapWriteTableDataEnd
(
pWriter
);
if
(
code
)
goto
_err
;
}
// Start new table data write if need
if
(
pWriter
->
id
.
suid
==
0
&&
pWriter
->
id
.
uid
==
0
)
{
code
=
tsdbSnapWriteTableDataStart
(
pWriter
,
&
id
);
if
(
code
)
goto
_err
;
}
// Merge with .data file data
int8_t
done
=
0
;
if
(
pWriter
->
dReader
.
pBlockIdx
&&
tTABLEIDCmprFn
(
pWriter
->
dReader
.
pBlockIdx
,
&
id
)
==
0
)
{
code
=
tsdbSnapWriteToDataFile
(
pWriter
,
iRow
,
&
done
);
if
(
code
)
goto
_err
;
}
// Append to the .stt data block (todo: check if need to set/reload sst block)
if
(
!
done
)
{
code
=
tsdbSnapWriteToSttFile
(
pWriter
,
iRow
);
if
(
code
)
goto
_err
;
if
(
pId
)
{
pWriter
->
tbid
=
*
pId
;
}
else
{
pWriter
->
tbid
=
(
TABLEID
){.
suid
=
INT64_MAX
,
.
uid
=
INT64_MAX
};
}
_exit:
return
code
;
taosArrayClear
(
pWriter
->
aDelData
);
_err:
tsdbError
(
"vgId:%d, %s failed since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
));
return
code
;
}
if
(
pWriter
->
pTIter
)
{
while
(
pWriter
->
pTIter
->
tIter
.
iDelIdx
<
taosArrayGetSize
(
pWriter
->
pTIter
->
tIter
.
aDelIdx
))
{
SDelIdx
*
pDelIdx
=
taosArrayGet
(
pWriter
->
pTIter
->
tIter
.
aDelIdx
,
pWriter
->
pTIter
->
tIter
.
iDelIdx
);
static
int32_t
tsdbSnapWriteData
(
STsdbSnapWriter
*
pWriter
,
uint8_t
*
pData
,
uint32_t
nData
)
{
int32_t
code
=
0
;
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
SBlockData
*
pBlockData
=
&
pWriter
->
bData
;
int32_t
c
=
tTABLEIDCmprFn
(
pDelIdx
,
&
pWriter
->
tbid
);
if
(
c
<
0
)
{
code
=
tsdbReadDelData
(
pWriter
->
pDelFReader
,
pDelIdx
,
pWriter
->
pTIter
->
tIter
.
aDelData
)
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
// Decode data
SSnapDataHdr
*
pHdr
=
(
SSnapDataHdr
*
)
pData
;
code
=
tDecmprBlockData
(
pHdr
->
data
,
pHdr
->
size
,
pBlockData
,
pWriter
->
aBuf
);
if
(
code
)
goto
_err
;
SDelIdx
*
pDelIdxNew
=
taosArrayReserve
(
pWriter
->
aDelIdx
,
1
);
if
(
pDelIdxNew
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
ASSERT
(
pBlockData
->
nRow
>
0
);
pDelIdxNew
->
suid
=
pDelIdx
->
suid
;
pDelIdxNew
->
uid
=
pDelIdx
->
uid
;
// Loop to handle each row
for
(
int32_t
iRow
=
0
;
iRow
<
pBlockData
->
nRow
;
iRow
++
)
{
TSKEY
ts
=
pBlockData
->
aTSKEY
[
iRow
];
int32_t
fid
=
tsdbKeyFid
(
ts
,
pWriter
->
minutes
,
pWriter
->
precision
);
code
=
tsdbWriteDelData
(
pWriter
->
pDelFWriter
,
pWriter
->
pTIter
->
tIter
.
aDelData
,
pDelIdxNew
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
pWriter
->
dWriter
.
pWriter
==
NULL
||
pWriter
->
fid
!=
fid
)
{
if
(
pWriter
->
dWriter
.
pWriter
)
{
// ASSERT(fid > pWriter->fid);
pWriter
->
pTIter
->
tIter
.
iDelIdx
++
;
}
else
if
(
c
==
0
)
{
code
=
tsdbReadDelData
(
pWriter
->
pDelFReader
,
pDelIdx
,
pWriter
->
aDelData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbSnapWriteCloseFile
(
pWriter
);
if
(
code
)
goto
_err
;
pWriter
->
pTIter
->
tIter
.
iDelIdx
++
;
break
;
}
else
{
break
;
}
code
=
tsdbSnapWriteOpenFile
(
pWriter
,
fid
);
if
(
code
)
goto
_err
;
}
code
=
tsdbSnapWriteRowData
(
pWriter
,
iRow
);
if
(
code
)
goto
_err
;
}
return
code
;
_err:
tsdbError
(
"vgId:%d, vnode snapshot tsdb write data for %s failed since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
pTsdb
->
path
,
tstrerror
(
code
));
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbTrace
(
"vgId:%d %s done, suid:%"
PRId64
" uid:%"
PRId64
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
pId
->
suid
,
pId
->
uid
);
}
return
code
;
}
// SNAP_DATA_DEL
static
int32_t
tsdbSnapMoveWriteDelData
(
STsdbSnapWriter
*
pWriter
,
TABLEID
*
pId
)
{
static
int32_t
tsdbSnapWriteDelTableDataEnd
(
STsdbSnapWriter
*
pWriter
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
while
(
true
)
{
if
(
pWriter
->
iDelIdx
>=
taosArrayGetSize
(
pWriter
->
aDelIdxR
))
break
;
SDelIdx
*
pDelIdx
=
(
SDelIdx
*
)
taosArrayGet
(
pWriter
->
aDelIdxR
,
pWriter
->
iDelIdx
);
if
(
tTABLEIDCmprFn
(
pDelIdx
,
pId
)
>=
0
)
break
;
code
=
tsdbReadDelData
(
pWriter
->
pDelFReader
,
pDelIdx
,
pWriter
->
aDelData
);
if
(
code
)
goto
_exit
;
SDelIdx
delIdx
=
*
pDelIdx
;
code
=
tsdbWriteDelData
(
pWriter
->
pDelFWriter
,
pWriter
->
aDelData
,
&
delIdx
);
if
(
code
)
goto
_exit
;
if
(
taosArrayPush
(
pWriter
->
aDelIdxW
,
&
delIdx
)
==
NULL
)
{
if
(
taosArrayGetSize
(
pWriter
->
aDelData
)
>
0
)
{
SDelIdx
*
pDelIdx
=
taosArrayReserve
(
pWriter
->
aDelIdx
,
1
);
if
(
pDelIdx
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
goto
_exit
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
}
pWriter
->
iDelIdx
++
;
pDelIdx
->
suid
=
pWriter
->
tbid
.
suid
;
pDelIdx
->
uid
=
pWriter
->
tbid
.
uid
;
code
=
tsdbWriteDelData
(
pWriter
->
pDelFWriter
,
pWriter
->
aDelData
,
pDelIdx
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbTrace
(
"vgId:%d %s done"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
static
int32_t
tsdbSnapWriteDel
(
STsdbSnapWriter
*
pWriter
,
uint8_t
*
pData
,
uint32_t
nData
)
{
static
int32_t
tsdbSnapWriteDel
TableData
(
STsdbSnapWriter
*
pWriter
,
TABLEID
*
pId
,
uint8_t
*
pData
,
int64_t
size
)
{
int32_t
code
=
0
;
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
// Open del file if not opened yet
if
(
pWriter
->
pDelFWriter
==
NULL
)
{
SDelFile
*
pDelFile
=
pWriter
->
fs
.
pDelFile
;
// reader
if
(
pDelFile
)
{
code
=
tsdbDelFReaderOpen
(
&
pWriter
->
pDelFReader
,
pDelFile
,
pTsdb
);
if
(
code
)
goto
_err
;
int32_t
lino
=
0
;
code
=
tsdbReadDelIdx
(
pWriter
->
pDelFReader
,
pWriter
->
aDelIdxR
);
if
(
code
)
goto
_err
;
}
else
{
taosArrayClear
(
pWriter
->
aDelIdxR
);
if
(
pId
==
NULL
||
pId
->
uid
!=
pWriter
->
tbid
.
uid
)
{
if
(
pWriter
->
tbid
.
uid
)
{
code
=
tsdbSnapWriteDelTableDataEnd
(
pWriter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pWriter
->
iDelIdx
=
0
;
// writer
SDelFile
delFile
=
{.
commitID
=
pWriter
->
commitID
};
code
=
tsdbDelFWriterOpen
(
&
pWriter
->
pDelFWriter
,
&
delFile
,
pTsdb
);
if
(
code
)
goto
_err
;
taosArrayClear
(
pWriter
->
aDelIdxW
);
code
=
tsdbSnapWriteDelTableDataStart
(
pWriter
,
pId
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
SSnapDataHdr
*
pHdr
=
(
SSnapDataHdr
*
)
pData
;
TABLEID
id
=
*
(
TABLEID
*
)
pHdr
->
data
;
if
(
pId
==
NULL
)
goto
_exit
;
ASSERT
(
pHdr
->
size
+
sizeof
(
SSnapDataHdr
)
==
nData
);
int64_t
n
=
0
;
while
(
n
<
size
)
{
SDelData
delData
;
n
+=
tGetDelData
(
pData
+
n
,
&
delData
);
// Move write data < id
code
=
tsdbSnapMoveWriteDelData
(
pWriter
,
&
id
);
if
(
code
)
goto
_err
;
if
(
taosArrayPush
(
pWriter
->
aDelData
,
&
delData
)
<
0
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
}
ASSERT
(
n
==
size
);
// Merge incoming data with current
if
(
pWriter
->
iDelIdx
<
taosArrayGetSize
(
pWriter
->
aDelIdxR
)
&&
tTABLEIDCmprFn
(
taosArrayGet
(
pWriter
->
aDelIdxR
,
pWriter
->
iDelIdx
),
&
id
)
==
0
)
{
SDelIdx
*
pDelIdx
=
(
SDelIdx
*
)
taosArrayGet
(
pWriter
->
aDelIdxR
,
pWriter
->
iDelIdx
);
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
return
code
;
}
code
=
tsdbReadDelData
(
pWriter
->
pDelFReader
,
pDelIdx
,
pWriter
->
aDelData
);
if
(
code
)
goto
_err
;
static
int32_t
tsdbSnapWriteDelDataStart
(
STsdbSnapWriter
*
pWriter
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
pWriter
->
iDelIdx
++
;
}
else
{
taosArrayClear
(
pWriter
->
aDelData
);
}
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
SDelFile
*
pDelFile
=
pWriter
->
fs
.
pDelFile
;
int64_t
n
=
sizeof
(
SSnapDataHdr
)
+
sizeof
(
TABLEID
);
while
(
n
<
nData
)
{
SDelData
delData
;
pWriter
->
tbid
=
(
TABLEID
){
0
};
n
+=
tGetDelData
(
pData
+
n
,
&
delData
);
// reader
if
(
pDelFile
)
{
code
=
tsdbDelFReaderOpen
(
&
pWriter
->
pDelFReader
,
pDelFile
,
pTsdb
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
if
(
taosArrayPush
(
pWriter
->
aDelData
,
&
delData
)
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
goto
_err
;
}
code
=
tsdbOpenTombFileDataIter
(
pWriter
->
pDelFReader
,
&
pWriter
->
pTIter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
SDelIdx
delIdx
=
{.
suid
=
id
.
suid
,
.
uid
=
id
.
uid
};
code
=
tsdb
WriteDelData
(
pWriter
->
pDelFWriter
,
pWriter
->
aDelData
,
&
delIdx
);
if
(
code
)
goto
_err
;
// writer
code
=
tsdb
DelFWriterOpen
(
&
pWriter
->
pDelFWriter
,
&
(
SDelFile
){.
commitID
=
pWriter
->
commitID
},
pTsdb
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
if
(
taosArrayPush
(
pWriter
->
aDelIdxW
,
&
delIdx
)
==
NULL
)
{
if
(
(
pWriter
->
aDelIdx
=
taosArrayInit
(
0
,
sizeof
(
SDelIdx
))
)
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
goto
_err
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
if
((
pWriter
->
aDelData
=
taosArrayInit
(
0
,
sizeof
(
SDelData
)))
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
return
code
;
_err:
tsdbError
(
"vgId:%d, vnode snapshot tsdb write del for %s failed since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
pTsdb
->
path
,
tstrerror
(
code
));
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbDebug
(
"vgId:%d %s done"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
static
int32_t
tsdbSnapWriteDelEnd
(
STsdbSnapWriter
*
pWriter
)
{
static
int32_t
tsdbSnapWriteDel
Data
End
(
STsdbSnapWriter
*
pWriter
)
{
int32_t
code
=
0
;
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
int32_t
lino
=
0
;
if
(
pWriter
->
pDelFWriter
==
NULL
)
return
code
;
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
TABLEID
id
=
{.
suid
=
INT64_MAX
,
.
uid
=
INT64_MAX
};
code
=
tsdbSnap
MoveWriteDelData
(
pWriter
,
&
id
);
if
(
code
)
goto
_err
;
// end remaining table with NULL data
code
=
tsdbSnap
WriteDelTableData
(
pWriter
,
NULL
,
NULL
,
0
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
code
=
tsdbWriteDelIdx
(
pWriter
->
pDelFWriter
,
pWriter
->
aDelIdxW
);
if
(
code
)
goto
_err
;
// update file-level info
code
=
tsdbWriteDelIdx
(
pWriter
->
pDelFWriter
,
pWriter
->
aDelIdx
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tsdbUpdateDelFileHdr
(
pWriter
->
pDelFWriter
);
if
(
code
)
goto
_err
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
code
=
tsdbFSUpsertDelFile
(
&
pWriter
->
fs
,
&
pWriter
->
pDelFWriter
->
fDel
);
if
(
code
)
goto
_err
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
code
=
tsdbDelFWriterClose
(
&
pWriter
->
pDelFWriter
,
1
);
if
(
code
)
goto
_err
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
if
(
pWriter
->
pDelFReader
)
{
code
=
tsdbDelFReaderClose
(
&
pWriter
->
pDelFReader
);
if
(
code
)
goto
_err
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
if
(
pWriter
->
pTIter
)
{
tsdbCloseDataIter2
(
pWriter
->
pTIter
);
pWriter
->
pTIter
=
NULL
;
}
tsdbInfo
(
"vgId:%d, vnode snapshot tsdb write del for %s end"
,
TD_VID
(
pTsdb
->
pVnode
),
pTsdb
->
path
);
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbInfo
(
"vgId:%d %s done"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
static
int32_t
tsdbSnapWriteDelData
(
STsdbSnapWriter
*
pWriter
,
SSnapDataHdr
*
pHdr
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
_err:
tsdbError
(
"vgId:%d, vnode snapshot tsdb write del end for %s failed since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
pTsdb
->
path
,
tstrerror
(
code
));
// start to write del data if need
if
(
pWriter
->
pDelFWriter
==
NULL
)
{
code
=
tsdbSnapWriteDelDataStart
(
pWriter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
// do write del data
code
=
tsdbSnapWriteDelTableData
(
pWriter
,
(
TABLEID
*
)
pHdr
->
data
,
pHdr
->
data
+
sizeof
(
TABLEID
),
pHdr
->
size
-
sizeof
(
TABLEID
));
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
));
}
else
{
tsdbTrace
(
"vgId:%d %s done"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
// APIs
int32_t
tsdbSnapWriterOpen
(
STsdb
*
pTsdb
,
int64_t
sver
,
int64_t
ever
,
STsdbSnapWriter
**
ppWriter
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
STsdbSnapWriter
*
pWriter
=
NULL
;
int32_t
code
=
0
;
int32_t
lino
=
0
;
// alloc
pWriter
=
(
STsdbSnapWriter
*
)
taosMemoryCalloc
(
1
,
sizeof
(
*
pWriter
));
STsdbSnapWriter
*
pWriter
=
(
STsdbSnapWriter
*
)
taosMemoryCalloc
(
1
,
sizeof
(
*
pWriter
));
if
(
pWriter
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
...
...
@@ -1288,11 +1835,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr
pWriter
->
pTsdb
=
pTsdb
;
pWriter
->
sver
=
sver
;
pWriter
->
ever
=
ever
;
code
=
tsdbFSCopy
(
pTsdb
,
&
pWriter
->
fs
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// config
pWriter
->
minutes
=
pTsdb
->
keepCfg
.
days
;
pWriter
->
precision
=
pTsdb
->
keepCfg
.
precision
;
pWriter
->
minRow
=
pTsdb
->
pVnode
->
config
.
tsdbCfg
.
minRows
;
...
...
@@ -1300,102 +1842,70 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr
pWriter
->
cmprAlg
=
pTsdb
->
pVnode
->
config
.
tsdbCfg
.
compression
;
pWriter
->
commitID
=
pTsdb
->
pVnode
->
state
.
commitID
;
code
=
tsdbFSCopy
(
pTsdb
,
&
pWriter
->
fs
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// SNAP_DATA_TSDB
code
=
tBlockDataCreate
(
&
pWriter
->
b
Data
);
code
=
tBlockDataCreate
(
&
pWriter
->
in
Data
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
pWriter
->
fid
=
INT32_MIN
;
pWriter
->
id
=
(
TABLEID
){
0
};
// Reader
pWriter
->
dReader
.
aBlockIdx
=
taosArrayInit
(
0
,
sizeof
(
SBlockIdx
));
if
(
pWriter
->
dReader
.
aBlockIdx
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tBlockDataCreate
(
&
pWriter
->
dReader
.
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// Writer
pWriter
->
dWriter
.
aBlockIdx
=
taosArrayInit
(
0
,
sizeof
(
SBlockIdx
));
if
(
pWriter
->
dWriter
.
aBlockIdx
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pWriter
->
dWriter
.
aSttBlk
=
taosArrayInit
(
0
,
sizeof
(
SSttBlk
));
if
(
pWriter
->
dWriter
.
aSttBlk
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tBlockDataCreate
(
&
pWriter
->
dWriter
.
bData
);
code
=
tBlockDataCreate
(
&
pWriter
->
bData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
code
=
tBlockDataCreate
(
&
pWriter
->
dWriter
.
sData
);
code
=
tBlockDataCreate
(
&
pWriter
->
sData
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
// SNAP_DATA_DEL
pWriter
->
aDelIdxR
=
taosArrayInit
(
0
,
sizeof
(
SDelIdx
));
if
(
pWriter
->
aDelIdxR
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pWriter
->
aDelData
=
taosArrayInit
(
0
,
sizeof
(
SDelData
));
if
(
pWriter
->
aDelData
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
pWriter
->
aDelIdxW
=
taosArrayInit
(
0
,
sizeof
(
SDelIdx
));
if
(
pWriter
->
aDelIdxW
==
NULL
)
{
code
=
TSDB_CODE_OUT_OF_MEMORY
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d, %s failed at line %d since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
*
ppWriter
=
NULL
;
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
if
(
pWriter
)
{
if
(
pWriter
->
aDelIdxW
)
taosArrayDestroy
(
pWriter
->
aDelIdxW
);
if
(
pWriter
->
aDelData
)
taosArrayDestroy
(
pWriter
->
aDelData
);
if
(
pWriter
->
aDelIdxR
)
taosArrayDestroy
(
pWriter
->
aDelIdxR
);
tBlockDataDestroy
(
&
pWriter
->
dWriter
.
sData
);
tBlockDataDestroy
(
&
pWriter
->
dWriter
.
bData
);
if
(
pWriter
->
dWriter
.
aSttBlk
)
taosArrayDestroy
(
pWriter
->
dWriter
.
aSttBlk
);
if
(
pWriter
->
dWriter
.
aBlockIdx
)
taosArrayDestroy
(
pWriter
->
dWriter
.
aBlockIdx
);
tBlockDataDestroy
(
&
pWriter
->
dReader
.
bData
);
if
(
pWriter
->
dReader
.
aBlockIdx
)
taosArrayDestroy
(
pWriter
->
dReader
.
aBlockIdx
);
tBlockDataDestroy
(
&
pWriter
->
sData
);
tBlockDataDestroy
(
&
pWriter
->
bData
);
tBlockDataDestroy
(
&
pWriter
->
inData
);
tsdbFSDestroy
(
&
pWriter
->
fs
);
taosMemoryFree
(
pWriter
)
;
pWriter
=
NULL
;
}
}
else
{
tsdbInfo
(
"vgId:%d, %s done"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
);
*
ppWriter
=
pWriter
;
tsdbInfo
(
"vgId:%d %s done, sver:%"
PRId64
" ever:%"
PRId64
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
sver
,
ever
);
}
*
ppWriter
=
pWriter
;
return
code
;
}
int32_t
tsdbSnapWriterPrepareClose
(
STsdbSnapWriter
*
pWriter
)
{
int32_t
code
=
0
;
if
(
pWriter
->
dWriter
.
pWriter
)
{
code
=
tsdbSnapWriteCloseFile
(
pWriter
);
if
(
code
)
goto
_exit
;
int32_t
lino
=
0
;
if
(
pWriter
->
pDataFWriter
)
{
code
=
tsdbSnapWriteFileDataEnd
(
pWriter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tsdbSnapWriteDelEnd
(
pWriter
);
if
(
code
)
goto
_exit
;
if
(
pWriter
->
pDelFWriter
)
{
code
=
tsdbSnapWriteDelDataEnd
(
pWriter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
code
=
tsdbFSPrepareCommit
(
pWriter
->
pTsdb
,
&
pWriter
->
fs
);
if
(
code
)
goto
_exit
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d, %s failed since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
tstrerror
(
code
));
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbDebug
(
"vgId:%d %s done"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
int32_t
tsdbSnapWriterClose
(
STsdbSnapWriter
**
ppWriter
,
int8_t
rollback
)
{
int32_t
code
=
0
;
int32_t
code
=
0
;
int32_t
lino
=
0
;
STsdbSnapWriter
*
pWriter
=
*
ppWriter
;
STsdb
*
pTsdb
=
pWriter
->
pTsdb
;
...
...
@@ -1408,7 +1918,7 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
code
=
tsdbFSCommit
(
pWriter
->
pTsdb
);
if
(
code
)
{
taosThreadRwlockUnlock
(
&
pTsdb
->
rwLock
);
goto
_err
;
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
)
;
}
// unlock
...
...
@@ -1416,72 +1926,60 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
}
// SNAP_DATA_DEL
taosArrayDestroy
(
pWriter
->
aDelIdxW
);
taosArrayDestroy
(
pWriter
->
aDelData
);
taosArrayDestroy
(
pWriter
->
aDelIdx
R
);
taosArrayDestroy
(
pWriter
->
aDelIdx
);
// SNAP_DATA_TSDB
// Writer
tBlockDataDestroy
(
&
pWriter
->
dWriter
.
sData
);
tBlockDataDestroy
(
&
pWriter
->
dWriter
.
bData
);
taosArrayDestroy
(
pWriter
->
dWriter
.
aSttBlk
);
tMapDataClear
(
&
pWriter
->
dWriter
.
mDataBlk
);
taosArrayDestroy
(
pWriter
->
dWriter
.
aBlockIdx
);
// Reader
tBlockDataDestroy
(
&
pWriter
->
dReader
.
bData
);
tMapDataClear
(
&
pWriter
->
dReader
.
mDataBlk
);
taosArrayDestroy
(
pWriter
->
dReader
.
aBlockIdx
);
tBlockDataDestroy
(
&
pWriter
->
sData
);
tBlockDataDestroy
(
&
pWriter
->
bData
);
taosArrayDestroy
(
pWriter
->
aSttBlk
);
tMapDataClear
(
&
pWriter
->
mDataBlk
);
taosArrayDestroy
(
pWriter
->
aBlockIdx
);
tDestroyTSchema
(
pWriter
->
skmTable
.
pTSchema
);
tBlockDataDestroy
(
&
pWriter
->
inData
);
for
(
int32_t
iBuf
=
0
;
iBuf
<
sizeof
(
pWriter
->
aBuf
)
/
sizeof
(
uint8_t
*
);
iBuf
++
)
{
tFree
(
pWriter
->
aBuf
[
iBuf
]);
}
tsdb
Info
(
"vgId:%d, %s done"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
);
tsdb
FSDestroy
(
&
pWriter
->
fs
);
taosMemoryFree
(
pWriter
);
*
ppWriter
=
NULL
;
return
code
;
_err:
tsdbError
(
"vgId:%d, vnode snapshot tsdb writer close for %s failed since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
pWriter
->
pTsdb
->
path
,
tstrerror
(
code
));
taosMemoryFree
(
pWriter
);
*
ppWriter
=
NULL
;
_exit:
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
));
}
else
{
tsdbInfo
(
"vgId:%d %s done"
,
TD_VID
(
pTsdb
->
pVnode
),
__func__
);
}
return
code
;
}
int32_t
tsdbSnapWrite
(
STsdbSnapWriter
*
pWriter
,
uint8_t
*
pData
,
uint32_t
nData
)
{
int32_t
code
=
0
;
SSnapDataHdr
*
pHdr
=
(
SSnapDataHdr
*
)
pData
;
int32_t
tsdbSnapWrite
(
STsdbSnapWriter
*
pWriter
,
SSnapDataHdr
*
pHdr
)
{
int32_t
code
=
0
;
int32_t
lino
=
0
;
// ts data
if
(
pHdr
->
type
==
SNAP_DATA_TSDB
)
{
code
=
tsdbSnapWriteData
(
pWriter
,
pData
,
nData
);
if
(
code
)
goto
_err
;
code
=
tsdbSnapWriteTimeSeriesData
(
pWriter
,
pHdr
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
goto
_exit
;
}
else
{
if
(
pWriter
->
dWriter
.
pWriter
)
{
code
=
tsdbSnapWriteCloseFile
(
pWriter
);
if
(
code
)
goto
_err
;
}
}
else
if
(
pWriter
->
pDataFWriter
)
{
code
=
tsdbSnapWriteFileDataEnd
(
pWriter
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
}
// del data
if
(
pHdr
->
type
==
SNAP_DATA_DEL
)
{
code
=
tsdbSnapWriteDel
(
pWriter
,
pData
,
nData
);
if
(
code
)
goto
_err
;
code
=
tsdbSnapWriteDelData
(
pWriter
,
pHdr
);
TSDB_CHECK_CODE
(
code
,
lino
,
_exit
);
goto
_exit
;
}
_exit:
tsdbDebug
(
"vgId:%d, tsdb snapshot write for %s succeed"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
pWriter
->
pTsdb
->
path
);
return
code
;
_err:
tsdbError
(
"vgId:%d, tsdb snapshot write for %s failed since %s"
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
pWriter
->
pTsdb
->
path
,
tstrerror
(
code
));
if
(
code
)
{
tsdbError
(
"vgId:%d %s failed at line %d since %s, type:%d index:%"
PRId64
" size:%"
PRId64
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
lino
,
tstrerror
(
code
),
pHdr
->
type
,
pHdr
->
index
,
pHdr
->
size
);
}
else
{
tsdbDebug
(
"vgId:%d %s done, type:%d index:%"
PRId64
" size:%"
PRId64
,
TD_VID
(
pWriter
->
pTsdb
->
pVnode
),
__func__
,
pHdr
->
type
,
pHdr
->
index
,
pHdr
->
size
);
}
return
code
;
}
source/dnode/vnode/src/tsdb/tsdbUtil.c
浏览文件 @
677a27a0
...
...
@@ -758,7 +758,7 @@ int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema)
pTColVal
->
value
.
nData
=
pColVal
->
value
.
nData
;
if
(
pTColVal
->
value
.
nData
)
{
memcpy
(
pTColVal
->
value
.
pData
,
pColVal
->
value
.
pData
,
pTColVal
->
value
.
nData
);
memcpy
(
pTColVal
->
value
.
pData
,
pColVal
->
value
.
pData
,
pTColVal
->
value
.
nData
);
}
pTColVal
->
flag
=
0
;
}
else
{
...
...
@@ -1133,6 +1133,7 @@ _exit:
void
tBlockDataReset
(
SBlockData
*
pBlockData
)
{
pBlockData
->
suid
=
0
;
pBlockData
->
uid
=
0
;
pBlockData
->
nRow
=
0
;
}
void
tBlockDataClear
(
SBlockData
*
pBlockData
)
{
...
...
source/dnode/vnode/src/vnd/vnodeSnapshot.c
浏览文件 @
677a27a0
...
...
@@ -455,7 +455,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) {
if
(
code
)
goto
_err
;
}
code
=
tsdbSnapWrite
(
pWriter
->
pTsdbSnapWriter
,
p
Data
,
nData
);
code
=
tsdbSnapWrite
(
pWriter
->
pTsdbSnapWriter
,
p
Hdr
);
if
(
code
)
goto
_err
;
}
break
;
case
SNAP_DATA_TQ_HANDLE
:
{
...
...
source/libs/sync/src/syncAppendEntries.c
浏览文件 @
677a27a0
...
...
@@ -89,45 +89,6 @@
// /\ UNCHANGED <<candidateVars, leaderVars>>
//
int32_t
syncNodeFollowerCommit
(
SSyncNode
*
ths
,
SyncIndex
newCommitIndex
)
{
ASSERT
(
false
&&
"deprecated"
);
if
(
ths
->
state
!=
TAOS_SYNC_STATE_FOLLOWER
)
{
sNTrace
(
ths
,
"can not do follower commit"
);
return
-
1
;
}
// maybe update commit index, leader notice me
if
(
newCommitIndex
>
ths
->
commitIndex
)
{
// has commit entry in local
if
(
newCommitIndex
<=
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
))
{
// advance commit index to sanpshot first
SSnapshot
snapshot
;
ths
->
pFsm
->
FpGetSnapshotInfo
(
ths
->
pFsm
,
&
snapshot
);
if
(
snapshot
.
lastApplyIndex
>=
0
&&
snapshot
.
lastApplyIndex
>
ths
->
commitIndex
)
{
SyncIndex
commitBegin
=
ths
->
commitIndex
;
SyncIndex
commitEnd
=
snapshot
.
lastApplyIndex
;
ths
->
commitIndex
=
snapshot
.
lastApplyIndex
;
sNTrace
(
ths
,
"commit by snapshot from index:%"
PRId64
" to index:%"
PRId64
,
commitBegin
,
commitEnd
);
}
SyncIndex
beginIndex
=
ths
->
commitIndex
+
1
;
SyncIndex
endIndex
=
newCommitIndex
;
// update commit index
ths
->
commitIndex
=
newCommitIndex
;
// call back Wal
int32_t
code
=
ths
->
pLogStore
->
syncLogUpdateCommitIndex
(
ths
->
pLogStore
,
ths
->
commitIndex
);
ASSERT
(
code
==
0
);
code
=
syncNodeDoCommit
(
ths
,
beginIndex
,
endIndex
,
ths
->
state
);
ASSERT
(
code
==
0
);
}
}
return
0
;
}
SSyncRaftEntry
*
syncBuildRaftEntryFromAppendEntries
(
const
SyncAppendEntries
*
pMsg
)
{
SSyncRaftEntry
*
pEntry
=
taosMemoryMalloc
(
pMsg
->
dataLen
);
if
(
pEntry
==
NULL
)
{
...
...
@@ -232,256 +193,3 @@ _IGNORE:
rpcFreeCont
(
rpcRsp
.
pCont
);
return
0
;
}
int32_t
syncNodeOnAppendEntriesOld
(
SSyncNode
*
ths
,
const
SRpcMsg
*
pRpcMsg
)
{
SyncAppendEntries
*
pMsg
=
pRpcMsg
->
pCont
;
SRpcMsg
rpcRsp
=
{
0
};
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
)))
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"not in my config"
);
goto
_IGNORE
;
}
// prepare response msg
int32_t
code
=
syncBuildAppendEntriesReply
(
&
rpcRsp
,
ths
->
vgId
);
if
(
code
!=
0
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"build rsp error"
);
goto
_IGNORE
;
}
SyncAppendEntriesReply
*
pReply
=
rpcRsp
.
pCont
;
pReply
->
srcId
=
ths
->
myRaftId
;
pReply
->
destId
=
pMsg
->
srcId
;
pReply
->
term
=
ths
->
raftStore
.
currentTerm
;
pReply
->
success
=
false
;
// pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
pReply
->
matchIndex
=
SYNC_INDEX_INVALID
;
pReply
->
lastSendIndex
=
pMsg
->
prevLogIndex
+
1
;
pReply
->
startTime
=
ths
->
startTime
;
if
(
pMsg
->
term
<
ths
->
raftStore
.
currentTerm
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"reject, small term"
);
goto
_SEND_RESPONSE
;
}
if
(
pMsg
->
term
>
ths
->
raftStore
.
currentTerm
)
{
pReply
->
term
=
pMsg
->
term
;
}
syncNodeStepDown
(
ths
,
pMsg
->
term
);
syncNodeResetElectTimer
(
ths
);
SyncIndex
startIndex
=
ths
->
pLogStore
->
syncLogBeginIndex
(
ths
->
pLogStore
);
SyncIndex
lastIndex
=
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
);
if
(
pMsg
->
prevLogIndex
>
lastIndex
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"reject, index not match"
);
goto
_SEND_RESPONSE
;
}
if
(
pMsg
->
prevLogIndex
>=
startIndex
)
{
SyncTerm
myPreLogTerm
=
syncNodeGetPreTerm
(
ths
,
pMsg
->
prevLogIndex
+
1
);
// ASSERT(myPreLogTerm != SYNC_TERM_INVALID);
if
(
myPreLogTerm
==
SYNC_TERM_INVALID
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"reject, pre-term invalid"
);
goto
_SEND_RESPONSE
;
}
if
(
myPreLogTerm
!=
pMsg
->
prevLogTerm
)
{
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"reject, pre-term not match"
);
goto
_SEND_RESPONSE
;
}
}
// accept
pReply
->
success
=
true
;
bool
hasAppendEntries
=
pMsg
->
dataLen
>
0
;
if
(
hasAppendEntries
)
{
SSyncRaftEntry
*
pAppendEntry
=
syncEntryBuildFromAppendEntries
(
pMsg
);
ASSERT
(
pAppendEntry
!=
NULL
);
SyncIndex
appendIndex
=
pMsg
->
prevLogIndex
+
1
;
LRUHandle
*
hLocal
=
NULL
;
LRUHandle
*
hAppend
=
NULL
;
int32_t
code
=
0
;
SSyncRaftEntry
*
pLocalEntry
=
NULL
;
SLRUCache
*
pCache
=
ths
->
pLogStore
->
pCache
;
hLocal
=
taosLRUCacheLookup
(
pCache
,
&
appendIndex
,
sizeof
(
appendIndex
));
if
(
hLocal
)
{
pLocalEntry
=
(
SSyncRaftEntry
*
)
taosLRUCacheValue
(
pCache
,
hLocal
);
code
=
0
;
ths
->
pLogStore
->
cacheHit
++
;
sNTrace
(
ths
,
"hit cache index:%"
PRId64
", bytes:%u, %p"
,
appendIndex
,
pLocalEntry
->
bytes
,
pLocalEntry
);
}
else
{
ths
->
pLogStore
->
cacheMiss
++
;
sNTrace
(
ths
,
"miss cache index:%"
PRId64
,
appendIndex
);
code
=
ths
->
pLogStore
->
syncLogGetEntry
(
ths
->
pLogStore
,
appendIndex
,
&
pLocalEntry
);
}
if
(
code
==
0
)
{
// get local entry success
if
(
pLocalEntry
->
term
==
pAppendEntry
->
term
)
{
// do nothing
sNTrace
(
ths
,
"log match, do nothing, index:%"
PRId64
,
appendIndex
);
}
else
{
// truncate
code
=
ths
->
pLogStore
->
syncLogTruncate
(
ths
->
pLogStore
,
appendIndex
);
if
(
code
!=
0
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, truncate error, append-index:%"
PRId64
,
appendIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
if
(
hLocal
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hLocal
,
false
);
}
else
{
syncEntryDestroy
(
pLocalEntry
);
}
if
(
hAppend
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hAppend
,
false
);
}
else
{
syncEntryDestroy
(
pAppendEntry
);
}
goto
_IGNORE
;
}
ASSERT
(
pAppendEntry
->
index
==
appendIndex
);
// append
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pAppendEntry
,
false
);
if
(
code
!=
0
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, append error, append-index:%"
PRId64
,
appendIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
if
(
hLocal
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hLocal
,
false
);
}
else
{
syncEntryDestroy
(
pLocalEntry
);
}
if
(
hAppend
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hAppend
,
false
);
}
else
{
syncEntryDestroy
(
pAppendEntry
);
}
goto
_IGNORE
;
}
syncCacheEntry
(
ths
->
pLogStore
,
pAppendEntry
,
&
hAppend
);
}
}
else
{
if
(
terrno
==
TSDB_CODE_WAL_LOG_NOT_EXIST
)
{
// log not exist
// truncate
code
=
ths
->
pLogStore
->
syncLogTruncate
(
ths
->
pLogStore
,
appendIndex
);
if
(
code
!=
0
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, log not exist, truncate error, append-index:%"
PRId64
,
appendIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
syncEntryDestroy
(
pLocalEntry
);
syncEntryDestroy
(
pAppendEntry
);
goto
_IGNORE
;
}
// append
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pAppendEntry
,
false
);
if
(
code
!=
0
)
{
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, log not exist, append error, append-index:%"
PRId64
,
appendIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
if
(
hLocal
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hLocal
,
false
);
}
else
{
syncEntryDestroy
(
pLocalEntry
);
}
if
(
hAppend
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hAppend
,
false
);
}
else
{
syncEntryDestroy
(
pAppendEntry
);
}
goto
_IGNORE
;
}
syncCacheEntry
(
ths
->
pLogStore
,
pAppendEntry
,
&
hAppend
);
}
else
{
// get local entry success
char
logBuf
[
128
];
snprintf
(
logBuf
,
sizeof
(
logBuf
),
"ignore, get local entry error, append-index:%"
PRId64
" err:%d"
,
appendIndex
,
terrno
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
logBuf
);
if
(
hLocal
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hLocal
,
false
);
}
else
{
syncEntryDestroy
(
pLocalEntry
);
}
if
(
hAppend
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hAppend
,
false
);
}
else
{
syncEntryDestroy
(
pAppendEntry
);
}
goto
_IGNORE
;
}
}
// update match index
pReply
->
matchIndex
=
pAppendEntry
->
index
;
if
(
hLocal
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hLocal
,
false
);
}
else
{
syncEntryDestroy
(
pLocalEntry
);
}
if
(
hAppend
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
hAppend
,
false
);
}
else
{
syncEntryDestroy
(
pAppendEntry
);
}
}
else
{
// no append entries, do nothing
// maybe has extra entries, no harm
// update match index
pReply
->
matchIndex
=
pMsg
->
prevLogIndex
;
}
// maybe update commit index, leader notice me
syncNodeFollowerCommit
(
ths
,
pMsg
->
commitIndex
);
syncLogRecvAppendEntries
(
ths
,
pMsg
,
"accept"
);
goto
_SEND_RESPONSE
;
_IGNORE:
rpcFreeCont
(
rpcRsp
.
pCont
);
return
0
;
_SEND_RESPONSE:
// msg event log
syncLogSendAppendEntriesReply
(
ths
,
pReply
,
""
);
// send response
syncNodeSendMsgById
(
&
pReply
->
destId
,
ths
,
&
rpcRsp
);
return
0
;
}
source/libs/sync/src/syncAppendEntriesReply.c
浏览文件 @
677a27a0
...
...
@@ -89,63 +89,3 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
}
return
0
;
}
int32_t
syncNodeOnAppendEntriesReplyOld
(
SSyncNode
*
ths
,
SyncAppendEntriesReply
*
pMsg
)
{
int32_t
ret
=
0
;
// if already drop replica, do not process
if
(
!
syncNodeInRaftGroup
(
ths
,
&
(
pMsg
->
srcId
)))
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"not in my config"
);
return
0
;
}
// drop stale response
if
(
pMsg
->
term
<
ths
->
raftStore
.
currentTerm
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"drop stale response"
);
return
0
;
}
if
(
ths
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
if
(
pMsg
->
term
>
ths
->
raftStore
.
currentTerm
)
{
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"error term"
);
syncNodeStepDown
(
ths
,
pMsg
->
term
);
return
-
1
;
}
ASSERT
(
pMsg
->
term
==
ths
->
raftStore
.
currentTerm
);
if
(
pMsg
->
success
)
{
SyncIndex
oldMatchIndex
=
syncIndexMgrGetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
));
if
(
pMsg
->
matchIndex
>
oldMatchIndex
)
{
syncIndexMgrSetIndex
(
ths
->
pMatchIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
);
syncMaybeAdvanceCommitIndex
(
ths
);
// maybe update minMatchIndex
ths
->
minMatchIndex
=
syncMinMatchIndex
(
ths
);
}
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
pMsg
->
matchIndex
+
1
);
}
else
{
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
));
if
(
nextIndex
>
SYNC_INDEX_BEGIN
)
{
--
nextIndex
;
}
syncIndexMgrSetIndex
(
ths
->
pNextIndex
,
&
(
pMsg
->
srcId
),
nextIndex
);
}
// send next append entries
SPeerState
*
pState
=
syncNodeGetPeerState
(
ths
,
&
(
pMsg
->
srcId
));
ASSERT
(
pState
!=
NULL
);
if
(
pMsg
->
lastSendIndex
==
pState
->
lastSendIndex
)
{
int64_t
timeNow
=
taosGetTimestampMs
();
int64_t
elapsed
=
timeNow
-
pState
->
lastSendTime
;
sNTrace
(
ths
,
"sync-append-entries rtt elapsed:%"
PRId64
", index:%"
PRId64
,
elapsed
,
pState
->
lastSendIndex
);
syncNodeReplicateOne
(
ths
,
&
(
pMsg
->
srcId
),
true
);
}
}
syncLogRecvAppendEntriesReply
(
ths
,
pMsg
,
"process"
);
return
0
;
}
source/libs/sync/src/syncCommit.c
浏览文件 @
677a27a0
...
...
@@ -43,148 +43,6 @@
// IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex]
// /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
//
void
syncOneReplicaAdvance
(
SSyncNode
*
pSyncNode
)
{
ASSERT
(
false
&&
"deprecated"
);
if
(
pSyncNode
==
NULL
)
{
sError
(
"pSyncNode is NULL"
);
return
;
}
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
sNError
(
pSyncNode
,
"not leader, can not advance commit index"
);
return
;
}
if
(
pSyncNode
->
replicaNum
!=
1
)
{
sNError
(
pSyncNode
,
"not one replica, can not advance commit index"
);
return
;
}
// advance commit index to snapshot first
SSnapshot
snapshot
;
pSyncNode
->
pFsm
->
FpGetSnapshotInfo
(
pSyncNode
->
pFsm
,
&
snapshot
);
if
(
snapshot
.
lastApplyIndex
>
0
&&
snapshot
.
lastApplyIndex
>
pSyncNode
->
commitIndex
)
{
SyncIndex
commitBegin
=
pSyncNode
->
commitIndex
;
SyncIndex
commitEnd
=
snapshot
.
lastApplyIndex
;
pSyncNode
->
commitIndex
=
snapshot
.
lastApplyIndex
;
sNTrace
(
pSyncNode
,
"commit by snapshot from index:%"
PRId64
" to index:%"
PRId64
,
commitBegin
,
commitEnd
);
}
// advance commit index as large as possible
SyncIndex
lastIndex
=
syncNodeGetLastIndex
(
pSyncNode
);
if
(
lastIndex
>
pSyncNode
->
commitIndex
)
{
sNTrace
(
pSyncNode
,
"commit by wal from index:%"
PRId64
" to index:%"
PRId64
,
pSyncNode
->
commitIndex
+
1
,
lastIndex
);
pSyncNode
->
commitIndex
=
lastIndex
;
}
// call back Wal
SyncIndex
walCommitVer
=
logStoreWalCommitVer
(
pSyncNode
->
pLogStore
);
if
(
pSyncNode
->
commitIndex
>
walCommitVer
)
{
pSyncNode
->
pLogStore
->
syncLogUpdateCommitIndex
(
pSyncNode
->
pLogStore
,
pSyncNode
->
commitIndex
);
}
}
void
syncMaybeAdvanceCommitIndex
(
SSyncNode
*
pSyncNode
)
{
ASSERTS
(
false
,
"deprecated"
);
if
(
pSyncNode
==
NULL
)
{
sError
(
"pSyncNode is NULL"
);
return
;
}
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
sNError
(
pSyncNode
,
"not leader, can not advance commit index"
);
return
;
}
// advance commit index to sanpshot first
SSnapshot
snapshot
;
pSyncNode
->
pFsm
->
FpGetSnapshotInfo
(
pSyncNode
->
pFsm
,
&
snapshot
);
if
(
snapshot
.
lastApplyIndex
>
0
&&
snapshot
.
lastApplyIndex
>
pSyncNode
->
commitIndex
)
{
SyncIndex
commitBegin
=
pSyncNode
->
commitIndex
;
SyncIndex
commitEnd
=
snapshot
.
lastApplyIndex
;
pSyncNode
->
commitIndex
=
snapshot
.
lastApplyIndex
;
sNTrace
(
pSyncNode
,
"commit by snapshot from index:%"
PRId64
" to index:%"
PRId64
,
commitBegin
,
commitEnd
);
}
// update commit index
SyncIndex
newCommitIndex
=
pSyncNode
->
commitIndex
;
for
(
SyncIndex
index
=
syncNodeGetLastIndex
(
pSyncNode
);
index
>
pSyncNode
->
commitIndex
;
--
index
)
{
bool
agree
=
syncAgree
(
pSyncNode
,
index
);
if
(
agree
)
{
// term
SSyncRaftEntry
*
pEntry
=
NULL
;
SLRUCache
*
pCache
=
pSyncNode
->
pLogStore
->
pCache
;
LRUHandle
*
h
=
taosLRUCacheLookup
(
pCache
,
&
index
,
sizeof
(
index
));
if
(
h
)
{
pEntry
=
(
SSyncRaftEntry
*
)
taosLRUCacheValue
(
pCache
,
h
);
pSyncNode
->
pLogStore
->
cacheHit
++
;
sNTrace
(
pSyncNode
,
"hit cache index:%"
PRId64
", bytes:%u, %p"
,
index
,
pEntry
->
bytes
,
pEntry
);
}
else
{
pSyncNode
->
pLogStore
->
cacheMiss
++
;
sNTrace
(
pSyncNode
,
"miss cache index:%"
PRId64
,
index
);
int32_t
code
=
pSyncNode
->
pLogStore
->
syncLogGetEntry
(
pSyncNode
->
pLogStore
,
index
,
&
pEntry
);
if
(
code
!=
0
)
{
sNError
(
pSyncNode
,
"advance commit index error, read wal index:%"
PRId64
,
index
);
return
;
}
}
// cannot commit, even if quorum agree. need check term!
if
(
pEntry
->
term
<=
pSyncNode
->
raftStore
.
currentTerm
)
{
// update commit index
newCommitIndex
=
index
;
if
(
h
)
{
taosLRUCacheRelease
(
pCache
,
h
,
false
);
}
else
{
syncEntryDestroy
(
pEntry
);
}
break
;
}
else
{
sNTrace
(
pSyncNode
,
"can not commit due to term not equal, index:%"
PRId64
", term:%"
PRIu64
,
pEntry
->
index
,
pEntry
->
term
);
}
if
(
h
)
{
taosLRUCacheRelease
(
pCache
,
h
,
false
);
}
else
{
syncEntryDestroy
(
pEntry
);
}
}
}
// advance commit index as large as possible
SyncIndex
walCommitVer
=
logStoreWalCommitVer
(
pSyncNode
->
pLogStore
);
if
(
walCommitVer
>
newCommitIndex
)
{
newCommitIndex
=
walCommitVer
;
}
// maybe execute fsm
if
(
newCommitIndex
>
pSyncNode
->
commitIndex
)
{
SyncIndex
beginIndex
=
pSyncNode
->
commitIndex
+
1
;
SyncIndex
endIndex
=
newCommitIndex
;
// update commit index
pSyncNode
->
commitIndex
=
newCommitIndex
;
// call back Wal
pSyncNode
->
pLogStore
->
syncLogUpdateCommitIndex
(
pSyncNode
->
pLogStore
,
pSyncNode
->
commitIndex
);
// execute fsm
if
(
pSyncNode
!=
NULL
&&
pSyncNode
->
pFsm
!=
NULL
)
{
int32_t
code
=
syncNodeDoCommit
(
pSyncNode
,
beginIndex
,
endIndex
,
pSyncNode
->
state
);
if
(
code
!=
0
)
{
sNError
(
pSyncNode
,
"advance commit index error, do commit begin:%"
PRId64
", end:%"
PRId64
,
beginIndex
,
endIndex
);
return
;
}
}
}
}
bool
syncAgreeIndex
(
SSyncNode
*
pSyncNode
,
SRaftId
*
pRaftId
,
SyncIndex
index
)
{
// I am leader, I agree
...
...
@@ -210,83 +68,7 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) {
return
c
;
}
int32_t
syncNodeDynamicQuorum
(
const
SSyncNode
*
pSyncNode
)
{
return
pSyncNode
->
quorum
;
#if 0
int32_t quorum = 1; // self
int64_t timeNow = taosGetTimestampMs();
for (int i = 0; i < pSyncNode->peersNum; ++i) {
int64_t peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]);
int64_t peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]);
SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]);
int64_t recvTimeDiff = TABS(peerRecvTime - timeNow);
int64_t startTimeDiff = TABS(peerStartTime - pSyncNode->startTime);
int64_t logDiff = TABS(peerMatchIndex - syncNodeGetLastIndex(pSyncNode));
/*
int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow);
int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime);
int64_t logDiff = syncNodeAbs64(peerMatchIndex, syncNodeGetLastIndex(pSyncNode));
*/
int32_t addQuorum = 0;
if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) {
if (startTimeDiff < SYNC_MAX_START_TIME_RANGE_MS) {
addQuorum = 1;
} else {
if (logDiff < SYNC_ADD_QUORUM_COUNT) {
addQuorum = 1;
} else {
addQuorum = 0;
}
}
} else {
addQuorum = 0;
}
/*
if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) {
addQuorum = 1;
} else {
addQuorum = 0;
}
if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) {
addQuorum = 0;
}
*/
quorum += addQuorum;
}
ASSERT(quorum <= pSyncNode->replicaNum);
if (quorum < pSyncNode->quorum) {
quorum = pSyncNode->quorum;
}
return quorum;
#endif
}
/*
bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
int agreeCount = 0;
for (int i = 0; i < pSyncNode->replicaNum; ++i) {
if (syncAgreeIndex(pSyncNode, &(pSyncNode->replicasId[i]), index)) {
++agreeCount;
}
if (agreeCount >= syncNodeDynamicQuorum(pSyncNode)) {
return true;
}
}
return false;
}
*/
int32_t
syncNodeDynamicQuorum
(
const
SSyncNode
*
pSyncNode
)
{
return
pSyncNode
->
quorum
;
}
bool
syncNodeAgreedUpon
(
SSyncNode
*
pNode
,
SyncIndex
index
)
{
int
count
=
0
;
...
...
source/libs/sync/src/syncElection.c
浏览文件 @
677a27a0
...
...
@@ -43,7 +43,10 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
for
(
int
i
=
0
;
i
<
pNode
->
peersNum
;
++
i
)
{
SRpcMsg
rpcMsg
=
{
0
};
ret
=
syncBuildRequestVote
(
&
rpcMsg
,
pNode
->
vgId
);
ASSERT
(
ret
==
0
);
if
(
ret
<
0
)
{
sError
(
"vgId:%d, failed to build request-vote msg since %s"
,
pNode
->
vgId
,
terrstr
());
continue
;
}
SyncRequestVote
*
pMsg
=
rpcMsg
.
pCont
;
pMsg
->
srcId
=
pNode
->
myRaftId
;
...
...
@@ -51,13 +54,18 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
pMsg
->
term
=
pNode
->
raftStore
.
currentTerm
;
ret
=
syncNodeGetLastIndexTerm
(
pNode
,
&
pMsg
->
lastLogIndex
,
&
pMsg
->
lastLogTerm
);
ASSERT
(
ret
==
0
);
if
(
ret
<
0
)
{
sError
(
"vgId:%d, failed to get index and term of last log since %s"
,
pNode
->
vgId
,
terrstr
());
continue
;
}
ret
=
syncNodeSendMsgById
(
&
pNode
->
peersId
[
i
],
pNode
,
&
rpcMsg
);
ASSERT
(
ret
==
0
);
if
(
ret
<
0
)
{
sError
(
"vgId:%d, failed to send msg to peerId:%"
PRId64
,
pNode
->
vgId
,
pNode
->
peersId
[
i
].
addr
);
continue
;
}
}
return
ret
;
return
0
;
}
int32_t
syncNodeElect
(
SSyncNode
*
pSyncNode
)
{
...
...
source/libs/sync/src/syncMain.c
浏览文件 @
677a27a0
...
...
@@ -292,8 +292,6 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
goto
_DEL_WAL
;
}
else
{
lastApplyIndex
-=
SYNC_VNODE_LOG_RETENTION
;
SyncIndex
beginIndex
=
pSyncNode
->
pLogStore
->
syncLogBeginIndex
(
pSyncNode
->
pLogStore
);
SyncIndex
endIndex
=
pSyncNode
->
pLogStore
->
syncLogEndIndex
(
pSyncNode
->
pLogStore
);
bool
isEmpty
=
pSyncNode
->
pLogStore
->
syncLogIsEmpty
(
pSyncNode
->
pLogStore
);
...
...
@@ -308,6 +306,8 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
if
(
pSyncNode
->
replicaNum
>
1
)
{
// multi replicas
lastApplyIndex
=
TMAX
(
lastApplyIndex
-
SYNC_VNODE_LOG_RETENTION
,
beginIndex
-
1
);
if
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
pSyncNode
->
minMatchIndex
=
syncMinMatchIndex
(
pSyncNode
);
...
...
@@ -586,78 +586,6 @@ SSyncState syncGetState(int64_t rid) {
return
state
;
}
#if 0
int32_t syncGetSnapshotByIndex(int64_t rid, SyncIndex index, SSnapshot* pSnapshot) {
if (index < SYNC_INDEX_BEGIN) {
return -1;
}
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
SSyncRaftEntry* pEntry = NULL;
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
if (code != 0) {
if (pEntry != NULL) {
syncEntryDestroy(pEntry);
}
syncNodeRelease(pSyncNode);
return -1;
}
ASSERT(pEntry != NULL);
pSnapshot->data = NULL;
pSnapshot->lastApplyIndex = index;
pSnapshot->lastApplyTerm = pEntry->term;
pSnapshot->lastConfigIndex = syncNodeGetSnapshotConfigIndex(pSyncNode, index);
syncEntryDestroy(pEntry);
syncNodeRelease(pSyncNode);
return 0;
}
int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
sMeta->lastConfigIndex = pSyncNode->raftCfg.lastConfigIndex;
sTrace("vgId:%d, get snapshot meta, lastConfigIndex:%" PRId64, pSyncNode->vgId, pSyncNode->raftCfg.lastConfigIndex);
syncNodeRelease(pSyncNode);
return 0;
}
int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
ASSERT(pSyncNode->raftCfg.configIndexCount >= 1);
SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0];
for (int32_t i = 0; i < pSyncNode->raftCfg.configIndexCount; ++i) {
if ((pSyncNode->raftCfg.configIndexArr)[i] > lastIndex &&
(pSyncNode->raftCfg.configIndexArr)[i] <= snapshotIndex) {
lastIndex = (pSyncNode->raftCfg.configIndexArr)[i];
}
}
sMeta->lastConfigIndex = lastIndex;
sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, snapshotIndex,
sMeta->lastConfigIndex);
syncNodeRelease(pSyncNode);
return 0;
}
#endif
SyncIndex
syncNodeGetSnapshotConfigIndex
(
SSyncNode
*
pSyncNode
,
SyncIndex
snapshotLastApplyIndex
)
{
ASSERT
(
pSyncNode
->
raftCfg
.
configIndexCount
>=
1
);
SyncIndex
lastIndex
=
(
pSyncNode
->
raftCfg
.
configIndexArr
)[
0
];
...
...
@@ -1042,9 +970,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode
->
commitIndex
=
commitIndex
;
sInfo
(
"vgId:%d, sync node commitIndex initialized as %"
PRId64
,
pSyncNode
->
vgId
,
pSyncNode
->
commitIndex
);
// restore log store on need
if
(
syncNodeLogStoreRestoreOnNeed
(
pSyncNode
)
<
0
)
{
sError
(
"vgId:%d, failed to restore log store since %s."
,
pSyncNode
->
vgId
,
terrstr
());
goto
_error
;
}
// timer ms init
pSyncNode
->
pingBaseLine
=
PING_TIMER_MS
;
pSyncNode
->
electBaseLine
=
tsElectInterval
;
...
...
@@ -1107,10 +1038,16 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode
->
changing
=
false
;
// replication mgr
syncNodeLogReplMgrInit
(
pSyncNode
);
if
(
syncNodeLogReplMgrInit
(
pSyncNode
)
<
0
)
{
sError
(
"vgId:%d, failed to init repl mgr since %s."
,
pSyncNode
->
vgId
,
terrstr
());
goto
_error
;
}
// peer state
syncNodePeerStateInit
(
pSyncNode
);
if
(
syncNodePeerStateInit
(
pSyncNode
)
<
0
)
{
sError
(
"vgId:%d, failed to init peer stat since %s."
,
pSyncNode
->
vgId
,
terrstr
());
goto
_error
;
}
//
// min match index
...
...
@@ -1205,27 +1142,10 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) {
int32_t
ret
=
0
;
ret
=
syncNodeStartPingTimer
(
pSyncNode
);
ASSERT
(
ret
==
0
);
return
ret
;
}
void
syncNodeStartOld
(
SSyncNode
*
pSyncNode
)
{
// start raft
if
(
pSyncNode
->
replicaNum
==
1
)
{
raftStoreNextTerm
(
pSyncNode
);
syncNodeBecomeLeader
(
pSyncNode
,
"one replica start"
);
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop
(
pSyncNode
);
syncMaybeAdvanceCommitIndex
(
pSyncNode
);
}
else
{
syncNodeBecomeFollower
(
pSyncNode
,
"first start"
);
if
(
ret
!=
0
)
{
sError
(
"vgId:%d, failed to start ping timer since %s"
,
pSyncNode
->
vgId
,
terrstr
());
}
int32_t
ret
=
0
;
ret
=
syncNodeStartPingTimer
(
pSyncNode
);
ASSERT
(
ret
==
0
);
return
ret
;
}
int32_t
syncNodeStartStandBy
(
SSyncNode
*
pSyncNode
)
{
...
...
@@ -1236,11 +1156,16 @@ int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) {
// reset elect timer, long enough
int32_t
electMS
=
TIMER_MAX_MS
;
int32_t
ret
=
syncNodeRestartElectTimer
(
pSyncNode
,
electMS
);
ASSERT
(
ret
==
0
);
if
(
ret
<
0
)
{
sError
(
"vgId:%d, failed to restart elect timer since %s"
,
pSyncNode
->
vgId
,
terrstr
());
return
-
1
;
}
ret
=
0
;
ret
=
syncNodeStartPingTimer
(
pSyncNode
);
ASSERT
(
ret
==
0
);
if
(
ret
<
0
)
{
sError
(
"vgId:%d, failed to start ping timer since %s"
,
pSyncNode
->
vgId
,
terrstr
());
return
-
1
;
}
return
ret
;
}
...
...
@@ -1829,12 +1754,6 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode
->
leaderCache
=
pSyncNode
->
myRaftId
;
for
(
int32_t
i
=
0
;
i
<
pSyncNode
->
pNextIndex
->
replicaNum
;
++
i
)
{
// maybe overwrite myself, no harm
// just do it!
// pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1;
// maybe wal is deleted
SyncIndex
lastIndex
;
SyncTerm
lastTerm
;
int32_t
code
=
syncNodeGetLastIndexTerm
(
pSyncNode
,
&
lastIndex
,
&
lastTerm
);
...
...
@@ -1896,7 +1815,11 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
void
syncNodeCandidate2Leader
(
SSyncNode
*
pSyncNode
)
{
ASSERT
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_CANDIDATE
);
ASSERT
(
voteGrantedMajority
(
pSyncNode
->
pVotesGranted
));
bool
granted
=
voteGrantedMajority
(
pSyncNode
->
pVotesGranted
);
if
(
!
granted
)
{
sError
(
"vgId:%d, not granted by majority."
,
pSyncNode
->
vgId
);
return
;
}
syncNodeBecomeLeader
(
pSyncNode
,
"candidate to leader"
);
sNTrace
(
pSyncNode
,
"state change syncNodeCandidate2Leader"
);
...
...
@@ -1912,20 +1835,6 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
pSyncNode
->
vgId
,
pSyncNode
->
raftStore
.
currentTerm
,
pSyncNode
->
commitIndex
,
lastIndex
);
}
void
syncNodeCandidate2LeaderOld
(
SSyncNode
*
pSyncNode
)
{
ASSERT
(
pSyncNode
->
state
==
TAOS_SYNC_STATE_CANDIDATE
);
ASSERT
(
voteGrantedMajority
(
pSyncNode
->
pVotesGranted
));
syncNodeBecomeLeader
(
pSyncNode
,
"candidate to leader"
);
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop
(
pSyncNode
);
syncMaybeAdvanceCommitIndex
(
pSyncNode
);
if
(
pSyncNode
->
replicaNum
>
1
)
{
syncNodeReplicate
(
pSyncNode
);
}
}
bool
syncNodeIsMnode
(
SSyncNode
*
pSyncNode
)
{
return
(
pSyncNode
->
vgId
==
1
);
}
int32_t
syncNodePeerStateInit
(
SSyncNode
*
pSyncNode
)
{
...
...
@@ -1971,7 +1880,8 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) {
// need assert
void
syncNodeVoteForTerm
(
SSyncNode
*
pSyncNode
,
SyncTerm
term
,
SRaftId
*
pRaftId
)
{
ASSERT
(
term
==
pSyncNode
->
raftStore
.
currentTerm
);
ASSERT
(
!
raftStoreHasVoted
(
pSyncNode
));
bool
voted
=
raftStoreHasVoted
(
pSyncNode
);
ASSERT
(
!
voted
);
raftStoreVote
(
pSyncNode
,
pRaftId
);
}
...
...
@@ -2649,24 +2559,6 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
return
0
;
}
int32_t
syncNodeOnLocalCmdOld
(
SSyncNode
*
ths
,
const
SRpcMsg
*
pRpcMsg
)
{
ASSERT
(
false
&&
"deprecated"
);
SyncLocalCmd
*
pMsg
=
pRpcMsg
->
pCont
;
syncLogRecvLocalCmd
(
ths
,
pMsg
,
""
);
if
(
pMsg
->
cmd
==
SYNC_LOCAL_CMD_STEP_DOWN
)
{
syncNodeStepDown
(
ths
,
pMsg
->
currentTerm
);
}
else
if
(
pMsg
->
cmd
==
SYNC_LOCAL_CMD_FOLLOWER_CMT
)
{
syncNodeFollowerCommit
(
ths
,
pMsg
->
commitIndex
);
}
else
{
sError
(
"error local cmd"
);
}
return
0
;
}
// TLA+ Spec
// ClientRequest(i, v) ==
// /\ state[i] = Leader
...
...
@@ -2711,96 +2603,6 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn
}
}
int32_t
syncNodeOnClientRequestOld
(
SSyncNode
*
ths
,
SRpcMsg
*
pMsg
,
SyncIndex
*
pRetIndex
)
{
sNTrace
(
ths
,
"on client request"
);
int32_t
ret
=
0
;
int32_t
code
=
0
;
SyncIndex
index
=
ths
->
pLogStore
->
syncLogWriteIndex
(
ths
->
pLogStore
);
SyncTerm
term
=
ths
->
raftStore
.
currentTerm
;
SSyncRaftEntry
*
pEntry
;
if
(
pMsg
->
msgType
==
TDMT_SYNC_CLIENT_REQUEST
)
{
pEntry
=
syncEntryBuildFromClientRequest
(
pMsg
->
pCont
,
term
,
index
);
}
else
{
pEntry
=
syncEntryBuildFromRpcMsg
(
pMsg
,
term
,
index
);
}
LRUHandle
*
h
=
NULL
;
if
(
ths
->
state
==
TAOS_SYNC_STATE_LEADER
)
{
// append entry
code
=
ths
->
pLogStore
->
syncLogAppendEntry
(
ths
->
pLogStore
,
pEntry
,
false
);
if
(
code
!=
0
)
{
if
(
ths
->
replicaNum
==
1
)
{
if
(
h
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
h
,
false
);
}
else
{
syncEntryDestroy
(
pEntry
);
}
return
-
1
;
}
else
{
// del resp mgr, call FpCommitCb
SFsmCbMeta
cbMeta
=
{
.
index
=
pEntry
->
index
,
.
lastConfigIndex
=
SYNC_INDEX_INVALID
,
.
isWeak
=
pEntry
->
isWeak
,
.
code
=
-
1
,
.
state
=
ths
->
state
,
.
seqNum
=
pEntry
->
seqNum
,
.
term
=
pEntry
->
term
,
.
currentTerm
=
ths
->
raftStore
.
currentTerm
,
.
flag
=
0
,
};
ths
->
pFsm
->
FpCommitCb
(
ths
->
pFsm
,
pMsg
,
&
cbMeta
);
if
(
h
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
h
,
false
);
}
else
{
syncEntryDestroy
(
pEntry
);
}
return
-
1
;
}
}
syncCacheEntry
(
ths
->
pLogStore
,
pEntry
,
&
h
);
// if mulit replica, start replicate right now
if
(
ths
->
replicaNum
>
1
)
{
syncNodeReplicate
(
ths
);
}
// if only myself, maybe commit right now
if
(
ths
->
replicaNum
==
1
)
{
if
(
syncNodeIsMnode
(
ths
))
{
syncMaybeAdvanceCommitIndex
(
ths
);
}
else
{
syncOneReplicaAdvance
(
ths
);
}
}
}
if
(
pRetIndex
!=
NULL
)
{
if
(
ret
==
0
&&
pEntry
!=
NULL
)
{
*
pRetIndex
=
pEntry
->
index
;
}
else
{
*
pRetIndex
=
SYNC_INDEX_INVALID
;
}
}
if
(
h
)
{
taosLRUCacheRelease
(
ths
->
pLogStore
->
pCache
,
h
,
false
);
}
else
{
syncEntryDestroy
(
pEntry
);
}
return
ret
;
}
const
char
*
syncStr
(
ESyncState
state
)
{
switch
(
state
)
{
case
TAOS_SYNC_STATE_FOLLOWER
:
...
...
@@ -2905,129 +2707,6 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) {
return
(
ths
->
replicaNum
==
1
&&
syncUtilUserCommit
(
pMsg
->
msgType
)
&&
ths
->
vgId
!=
1
);
}
int32_t
syncNodeDoCommit
(
SSyncNode
*
ths
,
SyncIndex
beginIndex
,
SyncIndex
endIndex
,
uint64_t
flag
)
{
ASSERT
(
false
);
if
(
beginIndex
>
endIndex
)
{
return
0
;
}
if
(
ths
==
NULL
)
{
return
-
1
;
}
if
(
ths
->
pFsm
!=
NULL
&&
ths
->
pFsm
->
FpGetSnapshotInfo
!=
NULL
)
{
// advance commit index to sanpshot first
SSnapshot
snapshot
=
{
0
};
ths
->
pFsm
->
FpGetSnapshotInfo
(
ths
->
pFsm
,
&
snapshot
);
if
(
snapshot
.
lastApplyIndex
>=
0
&&
snapshot
.
lastApplyIndex
>=
beginIndex
)
{
sNTrace
(
ths
,
"commit by snapshot from index:%"
PRId64
" to index:%"
PRId64
,
beginIndex
,
snapshot
.
lastApplyIndex
);
// update begin index
beginIndex
=
snapshot
.
lastApplyIndex
+
1
;
}
}
int32_t
code
=
0
;
ESyncState
state
=
flag
;
sNTrace
(
ths
,
"commit by wal from index:%"
PRId64
" to index:%"
PRId64
,
beginIndex
,
endIndex
);
// execute fsm
if
(
ths
->
pFsm
!=
NULL
)
{
for
(
SyncIndex
i
=
beginIndex
;
i
<=
endIndex
;
++
i
)
{
if
(
i
!=
SYNC_INDEX_INVALID
)
{
SSyncRaftEntry
*
pEntry
;
SLRUCache
*
pCache
=
ths
->
pLogStore
->
pCache
;
LRUHandle
*
h
=
taosLRUCacheLookup
(
pCache
,
&
i
,
sizeof
(
i
));
if
(
h
)
{
pEntry
=
(
SSyncRaftEntry
*
)
taosLRUCacheValue
(
pCache
,
h
);
ths
->
pLogStore
->
cacheHit
++
;
sNTrace
(
ths
,
"hit cache index:%"
PRId64
", bytes:%u, %p"
,
i
,
pEntry
->
bytes
,
pEntry
);
}
else
{
ths
->
pLogStore
->
cacheMiss
++
;
sNTrace
(
ths
,
"miss cache index:%"
PRId64
,
i
);
code
=
ths
->
pLogStore
->
syncLogGetEntry
(
ths
->
pLogStore
,
i
,
&
pEntry
);
// ASSERT(code == 0);
// ASSERT(pEntry != NULL);
if
(
code
!=
0
||
pEntry
==
NULL
)
{
sNError
(
ths
,
"get log entry error"
);
sFatal
(
"vgId:%d, get log entry %"
PRId64
" error when commit since %s"
,
ths
->
vgId
,
i
,
terrstr
());
continue
;
}
}
SRpcMsg
rpcMsg
=
{
0
};
syncEntry2OriginalRpc
(
pEntry
,
&
rpcMsg
);
sTrace
(
"do commit index:%"
PRId64
", type:%s"
,
i
,
TMSG_INFO
(
pEntry
->
msgType
));
// user commit
if
((
ths
->
pFsm
->
FpCommitCb
!=
NULL
)
&&
syncUtilUserCommit
(
pEntry
->
originalRpcType
))
{
bool
internalExecute
=
true
;
if
((
ths
->
replicaNum
==
1
)
&&
ths
->
restoreFinish
&&
ths
->
vgId
!=
1
)
{
internalExecute
=
false
;
}
sNTrace
(
ths
,
"user commit index:%"
PRId64
", internal:%d, type:%s"
,
i
,
internalExecute
,
TMSG_INFO
(
pEntry
->
msgType
));
// execute fsm in apply thread, or execute outside syncPropose
if
(
internalExecute
)
{
SFsmCbMeta
cbMeta
=
{
.
index
=
pEntry
->
index
,
.
lastConfigIndex
=
syncNodeGetSnapshotConfigIndex
(
ths
,
pEntry
->
index
),
.
isWeak
=
pEntry
->
isWeak
,
.
code
=
0
,
.
state
=
ths
->
state
,
.
seqNum
=
pEntry
->
seqNum
,
.
term
=
pEntry
->
term
,
.
currentTerm
=
ths
->
raftStore
.
currentTerm
,
.
flag
=
flag
,
};
syncRespMgrGetAndDel
(
ths
->
pSyncRespMgr
,
cbMeta
.
seqNum
,
&
rpcMsg
.
info
);
ths
->
pFsm
->
FpCommitCb
(
ths
->
pFsm
,
&
rpcMsg
,
&
cbMeta
);
}
}
#if 0
// execute in pre-commit
// leader transfer
if (pEntry->originalRpcType == TDMT_SYNC_LEADER_TRANSFER) {
code = syncDoLeaderTransfer(ths, &rpcMsg, pEntry);
ASSERT(code == 0);
}
#endif
// restore finish
// if only snapshot, a noop entry will be append, so syncLogLastIndex is always ok
if
(
pEntry
->
index
==
ths
->
pLogStore
->
syncLogLastIndex
(
ths
->
pLogStore
))
{
if
(
ths
->
restoreFinish
==
false
)
{
if
(
ths
->
pFsm
->
FpRestoreFinishCb
!=
NULL
)
{
ths
->
pFsm
->
FpRestoreFinishCb
(
ths
->
pFsm
);
}
ths
->
restoreFinish
=
true
;
int64_t
restoreDelay
=
taosGetTimestampMs
()
-
ths
->
leaderTime
;
sNTrace
(
ths
,
"restore finish, index:%"
PRId64
", elapsed:%"
PRId64
" ms"
,
pEntry
->
index
,
restoreDelay
);
}
}
rpcFreeCont
(
rpcMsg
.
pCont
);
if
(
h
)
{
taosLRUCacheRelease
(
pCache
,
h
,
false
);
}
else
{
syncEntryDestroy
(
pEntry
);
}
}
}
}
return
0
;
}
bool
syncNodeInRaftGroup
(
SSyncNode
*
ths
,
SRaftId
*
pRaftId
)
{
for
(
int32_t
i
=
0
;
i
<
ths
->
replicaNum
;
++
i
)
{
if
(
syncUtilSameId
(
&
((
ths
->
replicasId
)[
i
]),
pRaftId
))
{
...
...
source/libs/sync/src/syncPipeline.c
浏览文件 @
677a27a0
...
...
@@ -945,8 +945,11 @@ int32_t syncNodeLogReplMgrInit(SSyncNode* pNode) {
for
(
int
i
=
0
;
i
<
TSDB_MAX_REPLICA
;
i
++
)
{
ASSERT
(
pNode
->
logReplMgrs
[
i
]
==
NULL
);
pNode
->
logReplMgrs
[
i
]
=
syncLogReplMgrCreate
();
if
(
pNode
->
logReplMgrs
[
i
]
==
NULL
)
{
terrno
=
TSDB_CODE_OUT_OF_MEMORY
;
return
-
1
;
}
pNode
->
logReplMgrs
[
i
]
->
peerId
=
i
;
ASSERTS
(
pNode
->
logReplMgrs
[
i
]
!=
NULL
,
"Out of memory."
);
}
return
0
;
}
...
...
source/libs/sync/src/syncReplication.c
浏览文件 @
677a27a0
...
...
@@ -48,92 +48,6 @@
int32_t
syncNodeMaybeSendAppendEntries
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
SRpcMsg
*
pRpcMsg
);
int32_t
syncNodeReplicateOne
(
SSyncNode
*
pSyncNode
,
SRaftId
*
pDestId
,
bool
snapshot
)
{
ASSERT
(
false
&&
"deprecated"
);
// next index
SyncIndex
nextIndex
=
syncIndexMgrGetIndex
(
pSyncNode
->
pNextIndex
,
pDestId
);
if
(
snapshot
)
{
// maybe start snapshot
SyncIndex
logStartIndex
=
pSyncNode
->
pLogStore
->
syncLogBeginIndex
(
pSyncNode
->
pLogStore
);
SyncIndex
logEndIndex
=
pSyncNode
->
pLogStore
->
syncLogEndIndex
(
pSyncNode
->
pLogStore
);
if
(
nextIndex
<
logStartIndex
||
nextIndex
-
1
>
logEndIndex
)
{
sNTrace
(
pSyncNode
,
"maybe start snapshot for next-index:%"
PRId64
", start:%"
PRId64
", end:%"
PRId64
,
nextIndex
,
logStartIndex
,
logEndIndex
);
// start snapshot
int32_t
code
=
syncNodeStartSnapshot
(
pSyncNode
,
pDestId
);
}
}
// pre index, pre term
SyncIndex
preLogIndex
=
syncNodeGetPreIndex
(
pSyncNode
,
nextIndex
);
SyncTerm
preLogTerm
=
syncNodeGetPreTerm
(
pSyncNode
,
nextIndex
);
// prepare entry
SRpcMsg
rpcMsg
=
{
0
};
SyncAppendEntries
*
pMsg
=
NULL
;
SSyncRaftEntry
*
pEntry
=
NULL
;
SLRUCache
*
pCache
=
pSyncNode
->
pLogStore
->
pCache
;
LRUHandle
*
h
=
taosLRUCacheLookup
(
pCache
,
&
nextIndex
,
sizeof
(
nextIndex
));
int32_t
code
=
0
;
if
(
h
)
{
pEntry
=
(
SSyncRaftEntry
*
)
taosLRUCacheValue
(
pCache
,
h
);
code
=
0
;
pSyncNode
->
pLogStore
->
cacheHit
++
;
sNTrace
(
pSyncNode
,
"hit cache index:%"
PRId64
", bytes:%u, %p"
,
nextIndex
,
pEntry
->
bytes
,
pEntry
);
}
else
{
pSyncNode
->
pLogStore
->
cacheMiss
++
;
sNTrace
(
pSyncNode
,
"miss cache index:%"
PRId64
,
nextIndex
);
code
=
pSyncNode
->
pLogStore
->
syncLogGetEntry
(
pSyncNode
->
pLogStore
,
nextIndex
,
&
pEntry
);
}
if
(
code
==
0
)
{
ASSERT
(
pEntry
!=
NULL
);
code
=
syncBuildAppendEntries
(
&
rpcMsg
,
(
int32_t
)(
pEntry
->
bytes
),
pSyncNode
->
vgId
);
ASSERT
(
code
==
0
);
pMsg
=
rpcMsg
.
pCont
;
memcpy
(
pMsg
->
data
,
pEntry
,
pEntry
->
bytes
);
}
else
{
if
(
terrno
==
TSDB_CODE_WAL_LOG_NOT_EXIST
)
{
// no entry in log
code
=
syncBuildAppendEntries
(
&
rpcMsg
,
0
,
pSyncNode
->
vgId
);
ASSERT
(
code
==
0
);
pMsg
=
rpcMsg
.
pCont
;
}
else
{
sNError
(
pSyncNode
,
"replicate to dnode:%d error, next-index:%"
PRId64
,
DID
(
pDestId
),
nextIndex
);
return
-
1
;
}
}
if
(
h
)
{
taosLRUCacheRelease
(
pCache
,
h
,
false
);
}
else
{
syncEntryDestroy
(
pEntry
);
}
// prepare msg
ASSERT
(
pMsg
!=
NULL
);
pMsg
->
srcId
=
pSyncNode
->
myRaftId
;
pMsg
->
destId
=
*
pDestId
;
pMsg
->
term
=
pSyncNode
->
raftStore
.
currentTerm
;
pMsg
->
prevLogIndex
=
preLogIndex
;
pMsg
->
prevLogTerm
=
preLogTerm
;
pMsg
->
commitIndex
=
pSyncNode
->
commitIndex
;
pMsg
->
privateTerm
=
0
;
// pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId);
// send msg
syncNodeMaybeSendAppendEntries
(
pSyncNode
,
pDestId
,
&
rpcMsg
);
return
0
;
}
int32_t
syncNodeReplicate
(
SSyncNode
*
pNode
)
{
SSyncLogBuffer
*
pBuf
=
pNode
->
pLogBuf
;
taosThreadMutexLock
(
&
pBuf
->
mutex
);
...
...
@@ -156,25 +70,6 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) {
return
0
;
}
int32_t
syncNodeReplicateOld
(
SSyncNode
*
pSyncNode
)
{
if
(
pSyncNode
->
state
!=
TAOS_SYNC_STATE_LEADER
)
{
return
-
1
;
}
sNTrace
(
pSyncNode
,
"do replicate"
);
int32_t
ret
=
0
;
for
(
int
i
=
0
;
i
<
pSyncNode
->
peersNum
;
++
i
)
{
SRaftId
*
pDestId
=
&
(
pSyncNode
->
peersId
[
i
]);
ret
=
syncNodeReplicateOne
(
pSyncNode
,
pDestId
,
true
);
if
(
ret
!=
0
)
{
sError
(
"vgId:%d, do append entries error for dnode:%d"
,
pSyncNode
->
vgId
,
DID
(
pDestId
));
}
}
return
0
;
}
int32_t
syncNodeSendAppendEntries
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
SRpcMsg
*
pRpcMsg
)
{
SyncAppendEntries
*
pMsg
=
pRpcMsg
->
pCont
;
pMsg
->
destId
=
*
destRaftId
;
...
...
@@ -182,39 +77,6 @@ int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftI
return
0
;
}
int32_t
syncNodeSendAppendEntriesOld
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
SRpcMsg
*
pRpcMsg
)
{
int32_t
ret
=
0
;
SyncAppendEntries
*
pMsg
=
pRpcMsg
->
pCont
;
if
(
pMsg
==
NULL
)
{
sError
(
"vgId:%d, sync-append-entries msg is NULL"
,
pSyncNode
->
vgId
);
return
0
;
}
SPeerState
*
pState
=
syncNodeGetPeerState
(
pSyncNode
,
destRaftId
);
if
(
pState
==
NULL
)
{
sError
(
"vgId:%d, replica maybe dropped"
,
pSyncNode
->
vgId
);
return
0
;
}
// save index, otherwise pMsg will be free by rpc
SyncIndex
saveLastSendIndex
=
pState
->
lastSendIndex
;
bool
update
=
false
;
if
(
pMsg
->
dataLen
>
0
)
{
saveLastSendIndex
=
pMsg
->
prevLogIndex
+
1
;
update
=
true
;
}
syncLogSendAppendEntries
(
pSyncNode
,
pMsg
,
""
);
syncNodeSendMsgById
(
destRaftId
,
pSyncNode
,
pRpcMsg
);
if
(
update
)
{
pState
->
lastSendIndex
=
saveLastSendIndex
;
pState
->
lastSendTime
=
taosGetTimestampMs
();
}
return
ret
;
}
int32_t
syncNodeMaybeSendAppendEntries
(
SSyncNode
*
pSyncNode
,
const
SRaftId
*
destRaftId
,
SRpcMsg
*
pRpcMsg
)
{
int32_t
ret
=
0
;
SyncAppendEntries
*
pMsg
=
pRpcMsg
->
pCont
;
...
...
source/libs/wal/src/walMeta.c
浏览文件 @
677a27a0
...
...
@@ -322,6 +322,35 @@ bool walLogEntriesComplete(const SWal* pWal) {
return
complete
;
}
int
walTrimIdxFile
(
SWal
*
pWal
,
int32_t
fileIdx
)
{
SWalFileInfo
*
pFileInfo
=
taosArrayGet
(
pWal
->
fileInfoSet
,
fileIdx
);
ASSERT
(
pFileInfo
!=
NULL
);
char
fnameStr
[
WAL_FILE_LEN
];
walBuildIdxName
(
pWal
,
pFileInfo
->
firstVer
,
fnameStr
);
int64_t
fileSize
=
0
;
taosStatFile
(
fnameStr
,
&
fileSize
,
NULL
);
int64_t
records
=
TMAX
(
0
,
pFileInfo
->
lastVer
-
pFileInfo
->
firstVer
+
1
);
int64_t
lastEndOffset
=
records
*
sizeof
(
SWalIdxEntry
);
if
(
fileSize
<=
lastEndOffset
)
{
return
0
;
}
TdFilePtr
pFile
=
taosOpenFile
(
fnameStr
,
TD_FILE_READ
|
TD_FILE_WRITE
);
if
(
pFile
==
NULL
)
{
terrno
=
TAOS_SYSTEM_ERROR
(
errno
);
return
-
1
;
}
wInfo
(
"vgId:%d, trim idx file. file: %s, size: %"
PRId64
", offset: %"
PRId64
,
pWal
->
cfg
.
vgId
,
fnameStr
,
fileSize
,
lastEndOffset
);
taosFtruncateFile
(
pFile
,
lastEndOffset
);
taosCloseFile
(
&
pFile
);
return
0
;
}
int
walCheckAndRepairMeta
(
SWal
*
pWal
)
{
// load log files, get first/snapshot/last version info
const
char
*
logPattern
=
"^[0-9]+.log$"
;
...
...
@@ -396,6 +425,8 @@ int walCheckAndRepairMeta(SWal* pWal) {
}
updateMeta
=
true
;
(
void
)
walTrimIdxFile
(
pWal
,
fileIdx
);
int64_t
lastVer
=
walScanLogGetLastVer
(
pWal
,
fileIdx
);
if
(
lastVer
<
0
)
{
if
(
terrno
!=
TSDB_CODE_WAL_LOG_NOT_EXIST
)
{
...
...
@@ -558,6 +589,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
goto
_err
;
}
int64_t
count
=
0
;
while
(
idxEntry
.
ver
<
pFileInfo
->
lastVer
)
{
/*A(idxEntry.ver == ckHead.head.version);*/
...
...
@@ -569,11 +601,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
idxEntry
.
offset
,
fLogNameStr
);
goto
_err
;
}
wWarn
(
"vgId:%d, wal idx append new entry %"
PRId64
" %"
PRId64
,
pWal
->
cfg
.
vgId
,
idxEntry
.
ver
,
idxEntry
.
offset
);
if
(
taosWriteFile
(
pIdxFile
,
&
idxEntry
,
sizeof
(
SWalIdxEntry
))
<
0
)
{
wError
(
"vgId:%d, failed to append file since %s. file:%s"
,
pWal
->
cfg
.
vgId
,
terrstr
(),
fnameStr
);
goto
_err
;
}
count
++
;
}
if
(
taosFsyncFile
(
pIdxFile
)
<
0
)
{
...
...
@@ -581,6 +613,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
goto
_err
;
}
if
(
count
>
0
)
{
wInfo
(
"vgId:%d, rebuilt %"
PRId64
" wal idx entries until lastVer: %"
PRId64
,
pWal
->
cfg
.
vgId
,
count
,
pFileInfo
->
lastVer
);
}
(
void
)
taosCloseFile
(
&
pLogFile
);
(
void
)
taosCloseFile
(
&
pIdxFile
);
return
0
;
...
...
source/libs/wal/src/walRef.c
浏览文件 @
677a27a0
...
...
@@ -77,6 +77,31 @@ void walUnrefVer(SWalRef *pRef) {
}
#endif
SWalRef
*
walRefFirstVer
(
SWal
*
pWal
,
SWalRef
*
pRef
)
{
if
(
pRef
==
NULL
)
{
pRef
=
walOpenRef
(
pWal
);
if
(
pRef
==
NULL
)
{
return
NULL
;
}
}
taosThreadMutexLock
(
&
pWal
->
mutex
);
int64_t
ver
=
walGetFirstVer
(
pWal
);
wDebug
(
"vgId:%d, wal ref version %"
PRId64
" for first"
,
pWal
->
cfg
.
vgId
,
ver
);
pRef
->
refVer
=
ver
;
// bsearch in fileSet
SWalFileInfo
tmpInfo
;
tmpInfo
.
firstVer
=
ver
;
SWalFileInfo
*
pRet
=
taosArraySearch
(
pWal
->
fileInfoSet
,
&
tmpInfo
,
compareWalFileInfo
,
TD_LE
);
ASSERT
(
pRet
!=
NULL
);
pRef
->
refFile
=
pRet
->
firstVer
;
taosThreadMutexUnlock
(
&
pWal
->
mutex
);
return
pRef
;
}
SWalRef
*
walRefCommittedVer
(
SWal
*
pWal
)
{
SWalRef
*
pRef
=
walOpenRef
(
pWal
);
if
(
pRef
==
NULL
)
{
...
...
@@ -87,6 +112,8 @@ SWalRef *walRefCommittedVer(SWal *pWal) {
int64_t
ver
=
walGetCommittedVer
(
pWal
);
wDebug
(
"vgId:%d, wal ref version %"
PRId64
" for committed"
,
pWal
->
cfg
.
vgId
,
ver
);
pRef
->
refVer
=
ver
;
// bsearch in fileSet
SWalFileInfo
tmpInfo
;
...
...
source/os/src/osMath.c
浏览文件 @
677a27a0
...
...
@@ -15,8 +15,8 @@
#define ALLOW_FORBID_FUNC
#define _DEFAULT_SOURCE
#include "os.h"
#include <stdlib.h>
#include "os.h"
#ifdef WINDOWS
void
swapStr
(
char
*
j
,
char
*
J
,
int
width
)
{
...
...
source/util/src/talgo.c
浏览文件 @
677a27a0
...
...
@@ -41,12 +41,6 @@ static void median(void *src, int64_t size, int64_t s, int64_t e, const void *pa
ASSERT
(
comparFn
(
elePtrAt
(
src
,
size
,
mid
),
elePtrAt
(
src
,
size
,
s
),
param
)
<=
0
&&
comparFn
(
elePtrAt
(
src
,
size
,
s
),
elePtrAt
(
src
,
size
,
e
),
param
)
<=
0
);
#ifdef _DEBUG_VIEW
// tTagsPrints(src[s], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
// tTagsPrints(src[mid], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
// tTagsPrints(src[e], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
#endif
}
static
void
tInsertSort
(
void
*
src
,
int64_t
size
,
int32_t
s
,
int32_t
e
,
const
void
*
param
,
__ext_compar_fn_t
comparFn
,
...
...
@@ -278,14 +272,4 @@ void taosheapsort(void *base, int32_t size, int32_t len, const void *parcompar,
}
taosMemoryFree
(
buf
);
/*
char *buf = taosMemoryCalloc(1, size);
for (i = len - 1; i > 0; i--) {
doswap(elePtrAt(base, size, 0), elePtrAt(base, size, i));
taosheapadjust(base, size, 0, i - 1, parcompar, compar, parswap, swap, maxroot);
}
taosMemoryFreeClear(buf);
*/
}
tests/parallel_test/container_build.sh
浏览文件 @
677a27a0
...
...
@@ -55,7 +55,7 @@ fi
date
docker run
\
-v
$REP_MOUNT_PARAM
\
--rm
--ulimit
core
=
-1
taos_test:v1.0 sh
-c
"cd
$REP_DIR
;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true;make -j || exit 1"
--rm
--ulimit
core
=
-1
taos_test:v1.0 sh
-c
"cd
$REP_DIR
;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true
-DBUILD_TAOSX=true
;make -j || exit 1"
if
[[
-d
${
WORKDIR
}
/debugNoSan
]]
;
then
echo
"delete
${
WORKDIR
}
/debugNoSan"
...
...
@@ -70,7 +70,7 @@ mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugNoSan
date
docker run
\
-v
$REP_MOUNT_PARAM
\
--rm
--ulimit
core
=
-1
taos_test:v1.0 sh
-c
"cd
$REP_DIR
;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug;make -j || exit 1 "
--rm
--ulimit
core
=
-1
taos_test:v1.0 sh
-c
"cd
$REP_DIR
;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug
-DBUILD_TAOSX=true
;make -j || exit 1 "
mv
${
REP_REAL_PATH
}
/debug
${
WORKDIR
}
/debugSan
...
...
tests/system-test/7-tmq/tmqUpdate-1ctb.py
浏览文件 @
677a27a0
...
...
@@ -206,7 +206,7 @@ class TDTestCase:
paraDict
[
'rowsPerTbl'
]
=
self
.
rowsPerTbl
consumerId
=
1
if
self
.
snapshot
==
0
:
expectrowcnt
=
int
(
paraDict
[
"rowsPerTbl"
]
*
paraDict
[
"ctbNum"
]
*
(
2
))
expectrowcnt
=
int
(
paraDict
[
"rowsPerTbl"
]
*
paraDict
[
"ctbNum"
]
*
(
1
/
2
))
elif
self
.
snapshot
==
1
:
expectrowcnt
=
int
(
paraDict
[
"rowsPerTbl"
]
*
paraDict
[
"ctbNum"
]
*
(
1
))
...
...
tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py
浏览文件 @
677a27a0
...
...
@@ -213,9 +213,9 @@ class TDTestCase:
paraDict
[
'rowsPerTbl'
]
=
self
.
rowsPerTbl
consumerId
=
1
if
self
.
snapshot
==
0
:
expectrowcnt
=
int
(
paraDict
[
"rowsPerTbl"
]
*
paraDict
[
"ctbNum"
]
*
(
2
+
1
/
2
*
1
/
2
*
2
+
1
/
2
*
1
/
2
))
expectrowcnt
=
int
(
paraDict
[
"rowsPerTbl"
]
*
paraDict
[
"ctbNum"
]
*
(
1
/
2
)
*
(
1
/
2
*
3
))
elif
self
.
snapshot
==
1
:
expectrowcnt
=
int
(
paraDict
[
"rowsPerTbl"
]
*
paraDict
[
"ctbNum"
]
*
(
2
+
1
/
2
*
1
/
2
))
expectrowcnt
=
int
(
paraDict
[
"rowsPerTbl"
]
*
paraDict
[
"ctbNum"
]
*
(
1
+
1
/
2
))
topicList
=
topicFromStb1
ifcheckdata
=
1
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录