提交 3b379f7b 编写于 作者: S Shengliang Guan

Merge remote-tracking branch 'origin/3.0' into feature/dnode3

......@@ -13,15 +13,23 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TVK_ROCKSDB_H_
#define _TD_TVK_ROCKSDB_H_
#ifndef _TD_COMMON_CFG_H_
#define _TD_COMMON_CFG_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "tdef.h"
typedef struct {
char dir[TSDB_FILENAME_LEN];
int level;
int primary;
} SDiskCfg;
#ifdef __cplusplus
}
#endif
#endif /*_TD_TVK_ROCKSDB_H_*/
\ No newline at end of file
#endif /*_TD_COMMON_CFG_H_*/
......@@ -21,6 +21,7 @@ extern "C" {
#endif
#include "tdef.h"
#include "tcfg.h"
// cluster
extern char tsFirst[];
......@@ -105,11 +106,6 @@ extern uint32_t tsMaxRange;
extern uint32_t tsCurRange;
extern char tsCompressor[];
typedef struct {
char dir[TSDB_FILENAME_LEN];
int level;
int primary;
} SDiskCfg;
extern int32_t tsDiskCfgNum;
extern SDiskCfg tsDiskCfg[];
......
......@@ -58,6 +58,7 @@ int metaCommit(SMeta *pMeta);
STbCfg * metaGetTbInfoByUid(SMeta *pMeta, tb_uid_t uid);
STbCfg * metaGetTbInfoByName(SMeta *pMeta, char *tbname, tb_uid_t *uid);
SSchemaWrapper *metaGetTableSchema(SMeta *pMeta, tb_uid_t uid, int32_t sver, bool isinline);
STSchema * metaGetTbTSchema(SMeta *pMeta, tb_uid_t uid, int32_t sver);
SMTbCursor *metaOpenTbCursor(SMeta *pMeta);
void metaCloseTbCursor(SMTbCursor *pTbCur);
......
......@@ -17,6 +17,7 @@
#define _TD_TSDB_H_
#include "mallocator.h"
#include "meta.h"
#ifdef __cplusplus
extern "C" {
......@@ -33,7 +34,7 @@ typedef struct SDataStatis {
} SDataStatis;
typedef struct STable {
int32_t tid;
uint64_t tid;
uint64_t uid;
STSchema *pSchema;
} STable;
......@@ -54,10 +55,11 @@ typedef struct STsdbCfg {
int32_t keep1;
int32_t keep2;
int8_t update;
int8_t compression;
} STsdbCfg;
// STsdb
STsdb *tsdbOpen(const char *path, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF);
STsdb *tsdbOpen(const char *path, int32_t vgId, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF, SMeta *pMeta);
void tsdbClose(STsdb *);
void tsdbRemove(const char *path);
int tsdbInsertData(STsdb *pTsdb, SSubmitMsg *pMsg, SSubmitRsp *pRsp);
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_H_
#define _TD_TSDB_H_
#include <pthread.h>
#include <stdbool.h>
#include <stdint.h>
#include "common.h"
#include "taosdef.h"
#include "tarray.h"
#include "tdataformat.h"
#include "thash.h"
#include "tlist.h"
#include "tlockfree.h"
#include "tmsg.h"
#include "tname.h"
#ifdef __cplusplus
extern "C" {
#endif
#define TSDB_VERSION_MAJOR 1
#define TSDB_VERSION_MINOR 0
#define TSDB_INVALID_SUPER_TABLE_ID -1
#define TSDB_STATUS_COMMIT_START 1
#define TSDB_STATUS_COMMIT_OVER 2
#define TSDB_STATUS_COMMIT_NOBLOCK 3 // commit no block, need to be solved
// TSDB STATE DEFINITION
#define TSDB_STATE_OK 0x0
#define TSDB_STATE_BAD_META 0x1
#define TSDB_STATE_BAD_DATA 0x2
typedef struct SDataStatis {
int16_t colId;
int64_t sum;
int64_t max;
int64_t min;
int16_t maxIndex;
int16_t minIndex;
int16_t numOfNull;
} SDataStatis;
// --------- TSDB APPLICATION HANDLE DEFINITION
// --------- TSDB REPOSITORY CONFIGURATION DEFINITION
typedef struct {
int32_t tsdbId;
int32_t cacheBlockSize;
int32_t totalBlocks;
int32_t daysPerFile; // day per file sharding policy
int32_t keep; // day of data to keep
int32_t keep1;
int32_t keep2;
int32_t lruCacheSize;
int32_t minRowsPerFileBlock; // minimum rows per file block
int32_t maxRowsPerFileBlock; // maximum rows per file block
int8_t precision;
int8_t compression;
int8_t update;
int8_t cacheLastRow; // 0:no cache, 1: cache last row, 2: cache last NULL column 3: 1&2
} STsdbCfg;
#define CACHE_NO_LAST(c) ((c)->cacheLastRow == 0)
#define CACHE_LAST_ROW(c) (((c)->cacheLastRow & 1) > 0)
#define CACHE_LAST_NULL_COLUMN(c) (((c)->cacheLastRow & 2) > 0)
// --------- TSDB REPOSITORY USAGE STATISTICS
typedef struct {
int64_t totalStorage; // total bytes occupie
int64_t compStorage;
int64_t pointsWritten; // total data points written
} STsdbStat;
typedef struct STsdb STsdb;
STsdbCfg *tsdbGetCfg(const STsdb *repo);
// --------- TSDB REPOSITORY DEFINITION
// int32_t tsdbCreateRepo(int repoid);
// int32_t tsdbDropRepo(int repoid);
STsdb * tsdbOpen(STsdbCfg *pCfg, STsdbAppH *pAppH);
int tsdbClose(STsdb *repo, int toCommit);
int32_t tsdbConfigRepo(STsdb *repo, STsdbCfg *pCfg);
int tsdbGetState(STsdb *repo);
int8_t tsdbGetCompactState(STsdb *repo);
// --------- TSDB TABLE DEFINITION
typedef struct {
uint64_t uid; // the unique table ID
int32_t tid; // the table ID in the repository.
} STableId;
// --------- TSDB TABLE configuration
typedef struct {
ETableType type;
char * name;
STableId tableId;
int32_t sversion;
char * sname; // super table name
uint64_t superUid;
STSchema * schema;
STSchema * tagSchema;
SKVRow tagValues;
char * sql;
} STableCfg;
void tsdbClearTableCfg(STableCfg *config);
void *tsdbGetTableTagVal(const void *pTable, int32_t colId, int16_t type);
char *tsdbGetTableName(void *pTable);
#define TSDB_TABLEID(_table) ((STableId *)(_table))
#define TSDB_PREV_ROW 0x1
#define TSDB_NEXT_ROW 0x2
STableCfg *tsdbCreateTableCfgFromMsg(SMDCreateTableMsg *pMsg);
int tsdbCreateTable(STsdb *repo, STableCfg *pCfg);
int tsdbDropTable(STsdb *pRepo, STableId tableId);
int tsdbUpdateTableTagValue(STsdb *repo, SUpdateTableTagValMsg *pMsg);
uint32_t tsdbGetFileInfo(STsdb *repo, char *name, uint32_t *index, uint32_t eindex, int64_t *size);
// the TSDB repository info
typedef struct STsdbRepoInfo {
STsdbCfg tsdbCfg;
uint64_t version; // version of the repository
int64_t tsdbTotalDataSize; // the original inserted data size
int64_t tsdbTotalDiskSize; // the total disk size taken by this TSDB repository
// TODO: Other informations to add
} STsdbRepoInfo;
STsdbRepoInfo *tsdbGetStatus(STsdb *pRepo);
// the meter information report structure
typedef struct {
STableCfg tableCfg;
uint64_t version;
int64_t tableTotalDataSize; // In bytes
int64_t tableTotalDiskSize; // In bytes
} STableInfo;
// -- FOR INSERT DATA
/**
* Insert data to a table in a repository
* @param pRepo the TSDB repository handle
* @param pData the data to insert (will give a more specific description)
*
* @return the number of points inserted, -1 for failure and the error number is set
*/
int32_t tsdbInsertData(STsdb *repo, SSubmitMsg *pMsg, SShellSubmitRspMsg *pRsp);
// -- FOR QUERY TIME SERIES DATA
typedef void *TsdbQueryHandleT; // Use void to hide implementation details
#define BLOCK_LOAD_OFFSET_SEQ_ORDER 1
#define BLOCK_LOAD_TABLE_SEQ_ORDER 2
#define BLOCK_LOAD_TABLE_RR_ORDER 3
// query condition to build multi-table data block iterator
typedef struct STsdbQueryCond {
STimeWindow twindow;
int32_t order; // desc|asc order to iterate the data block
int64_t offset; // skip offset put down to tsdb
int32_t numOfCols;
SColumnInfo *colList;
bool loadExternalRows; // load external rows or not
int32_t type; // data block load type:
} STsdbQueryCond;
typedef struct STableData STableData;
typedef struct {
T_REF_DECLARE()
SRWLatch latch;
TSKEY keyFirst;
TSKEY keyLast;
int64_t numOfRows;
int32_t maxTables;
STableData **tData;
SList * actList;
SList * extraBuffList;
SList * bufBlockList;
int64_t pointsAdd; // TODO
int64_t storageAdd; // TODO
} SMemTable;
typedef struct {
SMemTable *mem;
SMemTable *imem;
SMemTable mtable;
SMemTable *omem;
} SMemSnapshot;
typedef struct SMemRef {
int32_t ref;
SMemSnapshot snapshot;
} SMemRef;
#if 0
typedef struct SFileBlockInfo {
int32_t numBlocksOfStep;
} SFileBlockInfo;
typedef struct {
void *pTable;
TSKEY lastKey;
} STableKeyInfo;
typedef struct {
uint32_t numOfTables;
SArray * pGroupList;
SHashObj *map; // speedup acquire the tableQueryInfo by table uid
} STableGroupInfo;
#define TSDB_BLOCK_DIST_STEP_ROWS 16
typedef struct {
uint16_t rowSize;
uint16_t numOfFiles;
uint32_t numOfTables;
uint64_t totalSize;
uint64_t totalRows;
int32_t maxRows;
int32_t minRows;
int32_t firstSeekTimeUs;
uint32_t numOfRowsInMemTable;
uint32_t numOfSmallBlocks;
SArray * dataBlockInfos;
} STableBlockDist;
/**
* Get the data block iterator, starting from position according to the query condition
*
* @param tsdb tsdb handle
* @param pCond query condition, including time window, result set order, and basic required columns for each block
* @param tableInfoGroup table object list in the form of set, grouped into different sets according to the
* group by condition
* @param qinfo query info handle from query processor
* @return
*/
TsdbQueryHandleT *tsdbQueryTables(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *tableInfoGroup, uint64_t qId,
SMemRef *pRef);
/**
* Get the last row of the given query time window for all the tables in STableGroupInfo object.
* Note that only one data block with only row will be returned while invoking retrieve data block function for
* all tables in this group.
*
* @param tsdb tsdb handle
* @param pCond query condition, including time window, result set order, and basic required columns for each block
* @param tableInfo table list.
* @return
*/
TsdbQueryHandleT tsdbQueryLastRow(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *tableInfo, uint64_t qId,
SMemRef *pRef);
TsdbQueryHandleT tsdbQueryCacheLast(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId,
SMemRef *pMemRef);
bool isTsdbCacheLastRow(TsdbQueryHandleT *pQueryHandle);
/**
* get the queried table object list
* @param pHandle
* @return
*/
SArray *tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle);
/**
* get the group list according to table id from client
* @param tsdb
* @param pCond
* @param groupList
* @param qinfo
* @return
*/
TsdbQueryHandleT tsdbQueryRowsInExternalWindow(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList,
uint64_t qId, SMemRef *pRef);
/**
* get num of rows in mem table
*
* @param pHandle
* @return row size
*/
int64_t tsdbGetNumOfRowsInMemTable(TsdbQueryHandleT *pHandle);
/**
* move to next block if exists
*
* @param pQueryHandle
* @return
*/
bool tsdbNextDataBlock(TsdbQueryHandleT pQueryHandle);
/**
* Get current data block information
*
* @param pQueryHandle
* @param pBlockInfo
* @return
*/
void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT *pQueryHandle, SDataBlockInfo *pBlockInfo);
/**
*
* Get the pre-calculated information w.r.t. current data block.
*
* In case of data block in cache, the pBlockStatis will always be NULL.
* If a block is not completed loaded from disk, the pBlockStatis will be NULL.
* @pBlockStatis the pre-calculated value for current data blocks. if the block is a cache block, always return 0
* @return
*/
int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT *pQueryHandle, SDataStatis **pBlockStatis);
/**
*
* The query condition with primary timestamp is passed to iterator during its constructor function,
* the returned data block must be satisfied with the time window condition in any cases,
* which means the SData data block is not actually the completed disk data blocks.
*
* @param pQueryHandle query handle
* @param pColumnIdList required data columns id list
* @return
*/
SArray *tsdbRetrieveDataBlock(TsdbQueryHandleT *pQueryHandle, SArray *pColumnIdList);
/**
* Get the qualified table id for a super table according to the tag query expression.
* @param stableid. super table sid
* @param pTagCond. tag query condition
*/
int32_t tsdbQuerySTableByTagCond(STsdb *tsdb, uint64_t uid, TSKEY key, const char *pTagCond, size_t len,
STableGroupInfo *pGroupList, SColIndex *pColIndex, int32_t numOfCols);
/**
* destroy the created table group list, which is generated by tag query
* @param pGroupList
*/
void tsdbDestroyTableGroup(STableGroupInfo *pGroupList);
/**
* create the table group result including only one table, used to handle the normal table query
*
* @param tsdb tsdbHandle
* @param uid table uid
* @param pGroupInfo the generated result
* @return
*/
int32_t tsdbGetOneTableGroup(STsdb *tsdb, uint64_t uid, TSKEY startKey, STableGroupInfo *pGroupInfo);
/**
*
* @param tsdb
* @param pTableIdList
* @param pGroupInfo
* @return
*/
int32_t tsdbGetTableGroupFromIdList(STsdb *tsdb, SArray *pTableIdList, STableGroupInfo *pGroupInfo);
/**
* clean up the query handle
* @param queryHandle
*/
void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle);
void tsdbResetQueryHandle(TsdbQueryHandleT queryHandle, STsdbQueryCond *pCond);
void tsdbResetQueryHandleForNewTable(TsdbQueryHandleT queryHandle, STsdbQueryCond *pCond, STableGroupInfo *groupList);
int32_t tsdbGetFileBlocksDistInfo(TsdbQueryHandleT *queryHandle, STableBlockDist *pTableBlockInfo);
// obtain queryHandle attribute
int64_t tsdbSkipOffset(TsdbQueryHandleT queryHandle);
/**
* get the statistics of repo usage
* @param repo. point to the tsdbrepo
* @param totalPoints. total data point written
* @param totalStorage. total bytes took by the tsdb
* @param compStorage. total bytes took by the tsdb after compressed
*/
void tsdbReportStat(void *repo, int64_t *totalPoints, int64_t *totalStorage, int64_t *compStorage);
int tsdbInitCommitQueue();
void tsdbDestroyCommitQueue();
int tsdbSyncCommit(STsdb *repo);
void tsdbIncCommitRef(int vgId);
void tsdbDecCommitRef(int vgId);
void tsdbSwitchTable(TsdbQueryHandleT pQueryHandle);
// For TSDB file sync
int tsdbSyncSend(void *pRepo, SOCKET socketFd);
int tsdbSyncRecv(void *pRepo, SOCKET socketFd);
// // For TSDB Compact
// int tsdbCompact(STsdb *pRepo);
// For TSDB Health Monitor
// // no problem return true
// bool tsdbNoProblem(STsdb *pRepo);
// // unit of walSize: MB
// int tsdbCheckWal(STsdb *pRepo, uint32_t walSize);
// // for json tag
// void *getJsonTagValueElment(void *data, char *key, int32_t keyLen, char *out, int16_t bytes);
// void getJsonTagValueAll(void *data, void *dst, int16_t bytes);
// char *parseTagDatatoJson(void *p);
#endif
#ifdef __cplusplus
}
#endif
#endif // _TD_TSDB_H_
......@@ -89,7 +89,7 @@ void vnodeClear();
* @param pVnodeCfg options of the vnode
* @return SVnode* The vnode object
*/
SVnode *vnodeOpen(const char *path, const SVnodeCfg *pVnodeCfg);
SVnode *vnodeOpen(const char *path, const SVnodeCfg *pVnodeCfg, int32_t vid);
/**
* @brief Close a VNODE
......
......@@ -16,6 +16,7 @@
#ifndef _TD_TKV_H_
#define _TD_TKV_H_
#if 0
#include "os.h"
#ifdef __cplusplus
......@@ -59,4 +60,5 @@ void tkvWriteOptsDestroy(STkvWriteOpts *);
}
#endif
#endif
#endif /*_TD_TKV_H_*/
\ No newline at end of file
......@@ -54,7 +54,7 @@ int32_t taosFtruncateFile(FileFd fd, int64_t length);
int32_t taosFsyncFile(FileFd fd);
int64_t taosReadFile(FileFd fd, void *buf, int64_t count);
int64_t taosWriteFile(FileFd fd, void *buf, int64_t count);
int64_t taosWriteFile(FileFd fd, const void *buf, int64_t count);
void taosCloseFile(FileFd fd);
......
......@@ -26,14 +26,10 @@ extern "C" {
#define TD_MOD_UNINITIALIZED 0
#define TD_MOD_INITIALIZED 1
#define TD_MOD_UNCLEARD 0
#define TD_MOD_CLEARD 1
typedef int8_t td_mode_flag_t;
#define TD_CHECK_AND_SET_MODE_INIT(FLAG) atomic_val_compare_exchange_8((FLAG), TD_MOD_UNINITIALIZED, TD_MOD_INITIALIZED)
#define TD_CHECK_AND_SET_MOD_CLEAR(FLAG) atomic_val_compare_exchange_8((FLAG), TD_MOD_UNCLEARD, TD_MOD_CLEARD)
#define TD_CHECK_AND_SET_MOD_CLEAR(FLAG) atomic_val_compare_exchange_8((FLAG), TD_MOD_INITIALIZED, TD_MOD_UNINITIALIZED)
#define TD_IS_NULL(PTR) ((PTR) == NULL)
......
......@@ -305,6 +305,8 @@ TAOS_RES *taos_query_l(TAOS *taos, const char *sql, int sqlLen) {
} else {
CHECK_CODE_GOTO(getPlan(pRequest, pQuery, &pDag), _return);
CHECK_CODE_GOTO(scheduleQuery(pRequest, pDag, &pJob), _return);
pRequest->code = terrno;
return pRequest;
}
_return:
......
......@@ -11,6 +11,7 @@ target_link_libraries(
PUBLIC wal
PUBLIC sync
PUBLIC taos
PUBLIC tfs
)
target_include_directories(
dnode
......
......@@ -21,8 +21,9 @@ extern "C" {
#endif
#include "dndInt.h"
int32_t dndInitDnode(SDnode *pDnode);
void dndCleanupDnode(SDnode *pDnode);
int32_t dndInitMgmt(SDnode *pDnode);
void dndStopMgmt(SDnode *pDnode);
void dndCleanupMgmt(SDnode *pDnode);
int32_t dndGetDnodeId(SDnode *pDnode);
int64_t dndGetClusterId(SDnode *pDnode);
......
......@@ -497,7 +497,7 @@ static void *dnodeThreadRoutine(void *param) {
}
}
int32_t dndInitDnode(SDnode *pDnode) {
int32_t dndInitMgmt(SDnode *pDnode) {
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
pMgmt->dnodeId = 0;
......@@ -547,16 +547,18 @@ int32_t dndInitDnode(SDnode *pDnode) {
return 0;
}
void dndCleanupDnode(SDnode *pDnode) {
void dndStopMgmt(SDnode *pDnode) {
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
dndCleanupWorker(&pMgmt->mgmtWorker);
if (pMgmt->threadId != NULL) {
taosDestoryThread(pMgmt->threadId);
pMgmt->threadId = NULL;
}
}
void dndCleanupMgmt(SDnode *pDnode) {
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
taosWLockLatch(&pMgmt->latch);
if (pMgmt->dnodeEps != NULL) {
......
......@@ -33,9 +33,9 @@ typedef struct {
int8_t dropped;
int8_t accessState;
uint64_t dbUid;
char *db;
char *path;
SVnode *pImpl;
char * db;
char * path;
SVnode * pImpl;
STaosQueue *pWriteQ;
STaosQueue *pSyncQ;
STaosQueue *pApplyQ;
......@@ -49,7 +49,7 @@ typedef struct {
int32_t failed;
int32_t threadIndex;
pthread_t thread;
SDnode *pDnode;
SDnode * pDnode;
SWrapperCfg *pCfgs;
} SVnodeThread;
......@@ -67,7 +67,7 @@ void dndProcessVnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pE
void dndProcessVnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
static int32_t dndPutMsgIntoVnodeApplyQueue(SDnode *pDnode, int32_t vgId, SRpcMsg *pMsg);
static SVnodeObj *dndAcquireVnode(SDnode *pDnode, int32_t vgId);
static SVnodeObj * dndAcquireVnode(SDnode *pDnode, int32_t vgId);
static void dndReleaseVnode(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dndOpenVnode(SDnode *pDnode, SWrapperCfg *pCfg, SVnode *pImpl);
static void dndCloseVnode(SDnode *pDnode, SVnodeObj *pVnode);
......@@ -80,7 +80,7 @@ static void dndCloseVnodes(SDnode *pDnode);
static SVnodeObj *dndAcquireVnode(SDnode *pDnode, int32_t vgId) {
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
SVnodeObj *pVnode = NULL;
SVnodeObj * pVnode = NULL;
int32_t refCount = 0;
taosRLockLatch(&pMgmt->latch);
......@@ -111,7 +111,7 @@ static void dndReleaseVnode(SDnode *pDnode, SVnodeObj *pVnode) {
static int32_t dndOpenVnode(SDnode *pDnode, SWrapperCfg *pCfg, SVnode *pImpl) {
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
SVnodeObj *pVnode = calloc(1, sizeof(SVnodeObj));
SVnodeObj * pVnode = calloc(1, sizeof(SVnodeObj));
if (pVnode == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
......@@ -167,6 +167,11 @@ static void dndCloseVnode(SDnode *pDnode, SVnodeObj *pVnode) {
dDebug("vgId:%d, vnode is closed", pVnode->vgId);
if (pVnode->dropped) {
dDebug("vgId:%d, vnode is destroyed for dropped:%d", pVnode->vgId, pVnode->dropped);
vnodeDestroy(pVnode->path);
}
free(pVnode->path);
free(pVnode->db);
free(pVnode);
......@@ -183,7 +188,7 @@ static SVnodeObj **dndGetVnodesFromHash(SDnode *pDnode, int32_t *numOfVnodes) {
void *pIter = taosHashIterate(pMgmt->hash, NULL);
while (pIter) {
SVnodeObj **ppVnode = pIter;
SVnodeObj *pVnode = *ppVnode;
SVnodeObj * pVnode = *ppVnode;
if (pVnode && num < size) {
int32_t refCount = atomic_add_fetch_32(&pVnode->refCount, 1);
dTrace("vgId:%d, acquire vnode, refCount:%d", pVnode->vgId, refCount);
......@@ -205,9 +210,9 @@ static int32_t dndGetVnodesFromFile(SDnode *pDnode, SWrapperCfg **ppCfgs, int32_
int32_t code = TSDB_CODE_DND_VNODE_READ_FILE_ERROR;
int32_t len = 0;
int32_t maxLen = 30000;
char *content = calloc(1, maxLen + 1);
cJSON *root = NULL;
FILE *fp = NULL;
char * content = calloc(1, maxLen + 1);
cJSON * root = NULL;
FILE * fp = NULL;
char file[PATH_MAX + 20] = {0};
SWrapperCfg *pCfgs = NULL;
......@@ -320,7 +325,7 @@ static int32_t dndWriteVnodesToFile(SDnode *pDnode) {
int32_t len = 0;
int32_t maxLen = 65536;
char *content = calloc(1, maxLen + 1);
char * content = calloc(1, maxLen + 1);
len += snprintf(content + len, maxLen - len, "{\n");
len += snprintf(content + len, maxLen - len, " \"vnodes\": [\n");
......@@ -362,8 +367,8 @@ static int32_t dndWriteVnodesToFile(SDnode *pDnode) {
static void *dnodeOpenVnodeFunc(void *param) {
SVnodeThread *pThread = param;
SDnode *pDnode = pThread->pDnode;
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
SDnode * pDnode = pThread->pDnode;
SVnodesMgmt * pMgmt = &pDnode->vmgmt;
dDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum);
setThreadName("open-vnodes");
......@@ -376,10 +381,7 @@ static void *dnodeOpenVnodeFunc(void *param) {
pMgmt->openVnodes, pMgmt->totalVnodes);
dndReportStartup(pDnode, "open-vnodes", stepDesc);
SVnodeCfg vnodeCfg = {0};
vnodeCfg.vgId = pCfg->vgId;
SVnode *pImpl = vnodeOpen(pCfg->path, &vnodeCfg);
SVnode *pImpl = vnodeOpen(pCfg->path, NULL, pCfg->vgId);
if (pImpl == NULL) {
dError("vgId:%d, failed to open vnode by thread:%d", pCfg->vgId, pThread->threadIndex);
pThread->failed++;
......@@ -469,6 +471,7 @@ static int32_t dndOpenVnodes(SDnode *pDnode) {
}
static void dndCloseVnodes(SDnode *pDnode) {
dInfo("start to close all vnodes");
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
int32_t numOfVnodes = 0;
......@@ -578,7 +581,7 @@ int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *pReq) {
return -1;
}
SVnode *pImpl = vnodeOpen(wrapperCfg.path, &vnodeCfg);
SVnode *pImpl = vnodeOpen(wrapperCfg.path, NULL /*pCfg*/, pCreate->vgId);
if (pImpl == NULL) {
dError("vgId:%d, failed to create vnode since %s", pCreate->vgId, terrstr());
return -1;
......@@ -661,8 +664,6 @@ int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *pReq) {
}
dndCloseVnode(pDnode, pVnode);
vnodeClose(pVnode->pImpl);
vnodeDestroy(pVnode->path);
dndWriteVnodesToFile(pDnode);
return 0;
......@@ -992,12 +993,12 @@ void dndGetVnodeLoads(SDnode *pDnode, SVnodeLoads *pLoads) {
pLoads->num = taosHashGetSize(pMgmt->hash);
int32_t v = 0;
void *pIter = taosHashIterate(pMgmt->hash, NULL);
void * pIter = taosHashIterate(pMgmt->hash, NULL);
while (pIter) {
SVnodeObj **ppVnode = pIter;
if (ppVnode == NULL || *ppVnode == NULL) continue;
SVnodeObj *pVnode = *ppVnode;
SVnodeObj * pVnode = *ppVnode;
SVnodeLoad *pLoad = &pLoads->data[v++];
vnodeGetLoad(pVnode->pImpl, pLoad);
......
......@@ -23,6 +23,7 @@
#include "dndVnodes.h"
#include "sync.h"
#include "wal.h"
#include "tfs.h"
EStat dndGetStat(SDnode *pDnode) { return pDnode->stat; }
......@@ -185,13 +186,23 @@ SDnode *dndInit(SDnodeOpt *pOption) {
return NULL;
}
SDiskCfg dCfg;
strcpy(dCfg.dir, pDnode->opt.dataDir);
dCfg.level = 0;
dCfg.primary = 1;
if (tfsInit(&dCfg, 1) != 0) {
dError("failed to init tfs env");
dndCleanup(pDnode);
return NULL;
}
if (vnodeInit(pDnode->opt.numOfCommitThreads) != 0) {
dError("failed to init vnode env");
dndCleanup(pDnode);
return NULL;
}
if (dndInitDnode(pDnode) != 0) {
if (dndInitMgmt(pDnode) != 0) {
dError("failed to init dnode");
dndCleanup(pDnode);
return NULL;
......@@ -252,13 +263,15 @@ void dndCleanup(SDnode *pDnode) {
dInfo("start to cleanup TDengine");
dndSetStat(pDnode, DND_STAT_STOPPED);
dndCleanupTrans(pDnode);
dndStopMgmt(pDnode);
dndCleanupMnode(pDnode);
dndCleanupBnode(pDnode);
dndCleanupSnode(pDnode);
dndCleanupQnode(pDnode);
dndCleanupVnodes(pDnode);
dndCleanupDnode(pDnode);
dndCleanupMgmt(pDnode);
vnodeClear();
tfsDestroy();
walCleanUp();
rpcCleanup();
......
......@@ -23,6 +23,7 @@
#include "tlockfree.h"
#include "tmacro.h"
#include "wal.h"
#include "tfs.h"
#include "vnode.h"
......@@ -48,7 +49,6 @@ typedef struct SVnodeTask {
typedef struct SVnodeMgr {
td_mode_flag_t vnodeInitFlag;
td_mode_flag_t vnodeClearFlag;
// For commit
bool stop;
uint16_t nthreads;
......
......@@ -97,14 +97,17 @@ int vnodeBufPoolSwitch(SVnode *pVnode) {
pVnode->pBufPool->inuse = NULL;
if (pvma) {
TD_DLIST_APPEND(&(pVnode->pBufPool->incycle), pvma);
}
return 0;
}
int vnodeBufPoolRecycle(SVnode *pVnode) {
SVBufPool * pBufPool = pVnode->pBufPool;
SVMemAllocator *pvma = TD_DLIST_HEAD(&(pBufPool->incycle));
ASSERT(pvma != NULL);
if (pvma == NULL) return 0;
// ASSERT(pvma != NULL);
TD_DLIST_POP(&(pBufPool->incycle), pvma);
vmaReset(pvma);
......
......@@ -40,6 +40,7 @@ int vnodeAsyncCommit(SVnode *pVnode) {
int vnodeSyncCommit(SVnode *pVnode) {
vnodeAsyncCommit(pVnode);
vnodeWaitCommit(pVnode);
tsem_post(&(pVnode->canCommit));
return 0;
}
......
......@@ -15,12 +15,12 @@
#include "vnodeDef.h"
static SVnode *vnodeNew(const char *path, const SVnodeCfg *pVnodeCfg);
static SVnode *vnodeNew(const char *path, const SVnodeCfg *pVnodeCfg, int32_t vid);
static void vnodeFree(SVnode *pVnode);
static int vnodeOpenImpl(SVnode *pVnode);
static void vnodeCloseImpl(SVnode *pVnode);
SVnode *vnodeOpen(const char *path, const SVnodeCfg *pVnodeCfg) {
SVnode *vnodeOpen(const char *path, const SVnodeCfg *pVnodeCfg, int32_t vid) {
SVnode *pVnode = NULL;
// Set default options
......@@ -35,7 +35,7 @@ SVnode *vnodeOpen(const char *path, const SVnodeCfg *pVnodeCfg) {
}
// Create the handle
pVnode = vnodeNew(path, pVnodeCfg);
pVnode = vnodeNew(path, pVnodeCfg, vid);
if (pVnode == NULL) {
// TODO: handle error
return NULL;
......@@ -62,7 +62,7 @@ void vnodeClose(SVnode *pVnode) {
void vnodeDestroy(const char *path) { taosRemoveDir(path); }
/* ------------------------ STATIC METHODS ------------------------ */
static SVnode *vnodeNew(const char *path, const SVnodeCfg *pVnodeCfg) {
static SVnode *vnodeNew(const char *path, const SVnodeCfg *pVnodeCfg, int32_t vid) {
SVnode *pVnode = NULL;
pVnode = (SVnode *)calloc(1, sizeof(*pVnode));
......@@ -71,6 +71,7 @@ static SVnode *vnodeNew(const char *path, const SVnodeCfg *pVnodeCfg) {
return NULL;
}
pVnode->vgId = vid;
pVnode->path = strdup(path);
vnodeOptionsCopy(&(pVnode->config), pVnodeCfg);
......@@ -105,7 +106,7 @@ static int vnodeOpenImpl(SVnode *pVnode) {
// Open tsdb
sprintf(dir, "%s/tsdb", pVnode->path);
pVnode->pTsdb = tsdbOpen(dir, &(pVnode->config.tsdbCfg), vBufPoolGetMAF(pVnode));
pVnode->pTsdb = tsdbOpen(dir, pVnode->vgId, &(pVnode->config.tsdbCfg), vBufPoolGetMAF(pVnode), pVnode->pMeta);
if (pVnode->pTsdb == NULL) {
// TODO: handle error
return -1;
......@@ -137,7 +138,7 @@ static int vnodeOpenImpl(SVnode *pVnode) {
}
static void vnodeCloseImpl(SVnode *pVnode) {
// vnodeSyncCommit(pVnode);
vnodeSyncCommit(pVnode);
if (pVnode) {
vnodeCloseBufPool(pVnode);
metaClose(pVnode->pMeta);
......
......@@ -15,7 +15,7 @@
#include "vnodeDef.h"
SVnodeMgr vnodeMgr = {.vnodeInitFlag = TD_MOD_UNINITIALIZED, .vnodeClearFlag = TD_MOD_UNCLEARD, .stop = false};
SVnodeMgr vnodeMgr = {.vnodeInitFlag = TD_MOD_UNINITIALIZED};
static void* loop(void* arg);
......@@ -24,6 +24,8 @@ int vnodeInit(uint16_t nthreads) {
return 0;
}
vnodeMgr.stop = false;
// Start commit handers
if (nthreads > 0) {
vnodeMgr.nthreads = nthreads;
......@@ -38,6 +40,7 @@ int vnodeInit(uint16_t nthreads) {
for (uint16_t i = 0; i < nthreads; i++) {
pthread_create(&(vnodeMgr.threads[i]), NULL, loop, NULL);
pthread_setname_np(vnodeMgr.threads[i], "VND Commit Thread");
}
} else {
// TODO: if no commit thread is set, then another mechanism should be
......@@ -53,7 +56,7 @@ int vnodeInit(uint16_t nthreads) {
}
void vnodeClear() {
if (TD_CHECK_AND_SET_MOD_CLEAR(&(vnodeMgr.vnodeClearFlag)) == TD_MOD_CLEARD) {
if (TD_CHECK_AND_SET_MOD_CLEAR(&(vnodeMgr.vnodeInitFlag)) == TD_MOD_UNINITIALIZED) {
return;
}
......
......@@ -590,3 +590,35 @@ char *metaTbCursorNext(SMTbCursor *pTbCur) {
}
}
}
STSchema *metaGetTbTSchema(SMeta *pMeta, tb_uid_t uid, int32_t sver) {
STSchemaBuilder sb;
STSchema * pTSchema = NULL;
SSchema * pSchema;
SSchemaWrapper *pSW;
STbCfg * pTbCfg;
tb_uid_t quid;
pTbCfg = metaGetTbInfoByUid(pMeta, uid);
if (pTbCfg->type == META_CHILD_TABLE) {
quid = pTbCfg->ctbCfg.suid;
} else {
quid = uid;
}
pSW = metaGetTableSchema(pMeta, quid, sver, true);
if (pSW == NULL) {
return NULL;
}
// Rebuild a schema
tdInitTSchemaBuilder(&sb, 0);
for (int32_t i = 0; i < pSW->nCols; i++) {
pSchema = pSW->pSchema + i;
tdAddColToSchema(&sb, pSchema->type, pSchema->colId, pSchema->bytes);
}
pTSchema = tdGetSchemaFromBuilder(&sb);
tdDestroyTSchemaBuilder(&sb);
return pTSchema;
}
\ No newline at end of file
......@@ -2,7 +2,7 @@ aux_source_directory(src TSDB_SRC)
if(0)
add_library(tsdb ${TSDB_SRC})
else(0)
add_library(tsdb "")
add_library(tsdb STATIC "")
target_sources(tsdb
PRIVATE
"src/tsdbCommit.c"
......@@ -29,4 +29,5 @@ target_link_libraries(
PUBLIC common
PUBLIC tkv
PUBLIC tfs
PUBLIC meta
)
\ No newline at end of file
......@@ -39,6 +39,18 @@ static FORCE_INLINE int TSDB_KEY_FID(TSKEY key, int32_t days, int8_t precision)
}
}
static FORCE_INLINE int tsdbGetFidLevel(int fid, SRtn *pRtn) {
if (fid >= pRtn->maxFid) {
return 0;
} else if (fid >= pRtn->midFid) {
return 1;
} else if (fid >= pRtn->minFid) {
return 2;
} else {
return -1;
}
}
#if 0
#define TSDB_DEFAULT_BLOCK_ROWS(maxRows) ((maxRows)*4 / 5)
......@@ -52,17 +64,6 @@ int tsdbWriteBlockImpl(STsdbRepo *pRepo, STable *pTable, SDFile *pDFile, SDataCo
bool isLast, bool isSuper, void **ppBuf, void **ppCBuf);
int tsdbApplyRtn(STsdbRepo *pRepo);
static FORCE_INLINE int tsdbGetFidLevel(int fid, SRtn *pRtn) {
if (fid >= pRtn->maxFid) {
return 0;
} else if (fid >= pRtn->midFid) {
return 1;
} else if (fid >= pRtn->minFid) {
return 2;
} else {
return -1;
}
}
#endif
#endif /* _TD_TSDB_COMMIT_H_ */
\ No newline at end of file
......@@ -12,6 +12,8 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#ifndef _TD_TSDB_COMPACT_H_
#define _TD_TSDB_COMPACT_H_
......@@ -19,14 +21,12 @@
extern "C" {
#endif
#if 0
void *tsdbCompactImpl(STsdbRepo *pRepo);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _TD_TSDB_COMPACT_H_ */
#endif
\ No newline at end of file
......@@ -17,6 +17,7 @@
#define _TD_TSDB_DEF_H_
#include "mallocator.h"
#include "meta.h"
#include "tcompression.h"
#include "tglobal.h"
#include "thash.h"
......@@ -47,12 +48,17 @@ struct STsdb {
STsdbMemTable * imem;
SRtn rtn;
SMemAllocatorFactory *pmaf;
STsdbFS fs;
STsdbFS * fs;
SMeta * pMeta;
};
#define REPO_ID(r) 0
#define REPO_ID(r) ((r)->vgId)
#define REPO_CFG(r) (&(r)->config)
#define REPO_FS(r) (&(r)->fs)
#define REPO_FS(r) (r)->fs
static FORCE_INLINE STSchema *tsdbGetTableSchemaImpl(STable *pTable, bool lock, bool copy, int32_t version) {
return pTable->pSchema;
}
#ifdef __cplusplus
}
......
......@@ -70,7 +70,7 @@ typedef struct {
#define TSDB_FS_ITER_FORWARD TSDB_ORDER_ASC
#define TSDB_FS_ITER_BACKWARD TSDB_ORDER_DESC
STsdbFS *tsdbNewFS(STsdbCfg *pCfg);
STsdbFS *tsdbNewFS(const STsdbCfg *pCfg);
void * tsdbFreeFS(STsdbFS *pfs);
int tsdbOpenFS(STsdb *pRepo);
void tsdbCloseFS(STsdb *pRepo);
......
......@@ -22,11 +22,57 @@
extern "C" {
#endif
typedef struct STsdbMemTable STsdbMemTable;
typedef struct {
int rowsInserted;
int rowsUpdated;
int rowsDeleteSucceed;
int rowsDeleteFailed;
int nOperations;
TSKEY keyFirst;
TSKEY keyLast;
} SMergeInfo;
typedef struct STbData {
tb_uid_t uid;
TSKEY keyMin;
TSKEY keyMax;
int64_t nrows;
SSkipList *pData;
} STbData;
typedef struct STsdbMemTable {
T_REF_DECLARE()
SRWLatch latch;
TSKEY keyMin;
TSKEY keyMax;
uint64_t nRow;
SMemAllocator *pMA;
// Container
SSkipList *pSlIdx; // SSkiplist<STbData>
SHashObj * pHashIdx;
} STsdbMemTable;
STsdbMemTable *tsdbNewMemTable(STsdb *pTsdb);
void tsdbFreeMemTable(STsdb *pTsdb, STsdbMemTable *pMemTable);
int tsdbMemTableInsert(STsdb *pTsdb, STsdbMemTable *pMemTable, SSubmitMsg *pMsg, SShellSubmitRspMsg *pRsp);
int tsdbLoadDataFromCache(STable *pTable, SSkipListIterator *pIter, TSKEY maxKey, int maxRowsToRead, SDataCols *pCols,
TKEY *filterKeys, int nFilterKeys, bool keepDup, SMergeInfo *pMergeInfo);
static FORCE_INLINE SMemRow tsdbNextIterRow(SSkipListIterator *pIter) {
if (pIter == NULL) return NULL;
SSkipListNode *node = tSkipListIterGet(pIter);
if (node == NULL) return NULL;
return (SMemRow)SL_GET_NODE_DATA(node);
}
static FORCE_INLINE TSKEY tsdbNextIterKey(SSkipListIterator *pIter) {
SMemRow row = tsdbNextIterRow(pIter);
if (row == NULL) return TSDB_DATA_TIMESTAMP_NULL;
return memRowKey(row);
}
#ifdef __cplusplus
}
......
......@@ -15,67 +15,12 @@
#include "tsdbDef.h"
int tsdbPrepareCommit(STsdb *pTsdb) {
if (pTsdb->mem == NULL) return 0;
// tsem_wait(&(pTsdb->canCommit));
ASSERT(pTsdb->imem == NULL);
pTsdb->imem = pTsdb->mem;
pTsdb->mem = NULL;
}
int tsdbCommit(STsdb *pTsdb) {
// TODO
pTsdb->imem = NULL;
// tsem_post(&(pTsdb->canCommit));
return 0;
}
void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
TSKEY minKey, midKey, maxKey, now;
now = taosGetTimestamp(pCfg->precision);
minKey = now - pCfg->keep * tsTickPerDay[pCfg->precision];
midKey = now - pCfg->keep2 * tsTickPerDay[pCfg->precision];
maxKey = now - pCfg->keep1 * tsTickPerDay[pCfg->precision];
pRtn->minKey = minKey;
pRtn->minFid = (int)(TSDB_KEY_FID(minKey, pCfg->daysPerFile, pCfg->precision));
pRtn->midFid = (int)(TSDB_KEY_FID(midKey, pCfg->daysPerFile, pCfg->precision));
pRtn->maxFid = (int)(TSDB_KEY_FID(maxKey, pCfg->daysPerFile, pCfg->precision));
tsdbDebug("vgId:%d now:%" PRId64 " minKey:%" PRId64 " minFid:%d, midFid:%d, maxFid:%d", REPO_ID(pRepo), now, minKey,
pRtn->minFid, pRtn->midFid, pRtn->maxFid);
}
#if 0
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbint.h"
extern int32_t tsTsdbMetaCompactRatio;
#define TSDB_MAX_SUBBLOCKS 8
static FORCE_INLINE int TSDB_KEY_FID(TSKEY key, int32_t days, int8_t precision) {
if (key < 0) {
return (int)((key + 1) / tsTickPerDay[precision] / days - 1);
} else {
return (int)((key / tsTickPerDay[precision] / days));
}
}
typedef struct {
STable * pTable;
SSkipListIterator *pIter;
} SCommitIter;
typedef struct {
SRtn rtn; // retention snapshot
......@@ -96,6 +41,8 @@ typedef struct {
SDataCols * pDataCols;
} SCommitH;
#define TSDB_DEFAULT_BLOCK_ROWS(maxRows) ((maxRows)*4 / 5)
#define TSDB_COMMIT_REPO(ch) TSDB_READ_REPO(&(ch->readh))
#define TSDB_COMMIT_REPO_ID(ch) REPO_ID(TSDB_READ_REPO(&(ch->readh)))
#define TSDB_COMMIT_WRITE_FSET(ch) (&((ch)->wSet))
......@@ -108,21 +55,21 @@ typedef struct {
#define TSDB_COMMIT_DEFAULT_ROWS(ch) TSDB_DEFAULT_BLOCK_ROWS(TSDB_COMMIT_REPO(ch)->config.maxRowsPerFileBlock)
#define TSDB_COMMIT_TXN_VERSION(ch) FS_TXN_VERSION(REPO_FS(TSDB_COMMIT_REPO(ch)))
static int tsdbCommitMeta(STsdbRepo *pRepo);
static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact);
static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid);
static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile);
static int tsdbCommitTSData(STsdbRepo *pRepo);
static void tsdbStartCommit(STsdbRepo *pRepo);
static void tsdbEndCommit(STsdbRepo *pRepo, int eno);
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static int tsdbCreateCommitIters(SCommitH *pCommith);
static void tsdbDestroyCommitIters(SCommitH *pCommith);
static void tsdbStartCommit(STsdb *pRepo);
static void tsdbEndCommit(STsdb *pTsdb, int eno);
static int tsdbInitCommitH(SCommitH *pCommith, STsdb *pRepo);
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key);
static int tsdbInitCommitH(SCommitH *pCommith, STsdbRepo *pRepo);
static void tsdbDestroyCommitH(SCommitH *pCommith);
static int tsdbGetFidLevel(int fid, SRtn *pRtn);
static int tsdbNextCommitFid(SCommitH *pCommith);
static void tsdbDestroyCommitH(SCommitH *pCommith);
static int tsdbCreateCommitIters(SCommitH *pCommith);
static void tsdbDestroyCommitIters(SCommitH *pCommith);
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static void tsdbResetCommitFile(SCommitH *pCommith);
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
// static int tsdbCommitMeta(STsdbRepo *pRepo);
// static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact);
// static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid);
// static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile);
static int tsdbCommitToTable(SCommitH *pCommith, int tid);
static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable);
static int tsdbComparKeyBlock(const void *arg1, const void *arg2);
......@@ -133,44 +80,14 @@ static int tsdbMoveBlock(SCommitH *pCommith, int bidx);
static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks);
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit,
bool isLastOneBlock);
static void tsdbResetCommitFile(SCommitH *pCommith);
static void tsdbResetCommitTable(SCommitH *pCommith);
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError);
static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo);
static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget,
TSKEY maxKey, int maxRows, int8_t update);
int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf);
void *tsdbCommitData(STsdbRepo *pRepo) {
if (pRepo->imem == NULL) {
return NULL;
}
tsdbStartCommit(pRepo);
// Commit to update meta file
if (tsdbCommitMeta(pRepo) < 0) {
tsdbError("vgId:%d error occurs while committing META data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
// Create the iterator to read from cache
if (tsdbCommitTSData(pRepo) < 0) {
tsdbError("vgId:%d error occurs while committing TS data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
tsdbEndCommit(pRepo, TSDB_CODE_SUCCESS);
return NULL;
_err:
ASSERT(terrno != TSDB_CODE_SUCCESS);
pRepo->code = terrno;
tsdbEndCommit(pRepo, terrno);
return NULL;
}
int tsdbApplyRtnOnFSet(STsdbRepo *pRepo, SDFileSet *pSet, SRtn *pRtn) {
int tsdbApplyRtnOnFSet(STsdb *pRepo, SDFileSet *pSet, SRtn *pRtn) {
SDiskID did;
SDFileSet nSet;
STsdbFS * pfs = REPO_FS(pRepo);
......@@ -212,548 +129,216 @@ int tsdbApplyRtnOnFSet(STsdbRepo *pRepo, SDFileSet *pSet, SRtn *pRtn) {
return 0;
}
int tsdbWriteBlockInfoImpl(SDFile *pHeadf, STable *pTable, SArray *pSupA, SArray *pSubA, void **ppBuf,
SBlockIdx *pIdx) {
size_t nSupBlocks;
size_t nSubBlocks;
uint32_t tlen;
SBlockInfo *pBlkInfo;
int64_t offset;
SBlock * pBlock;
memset(pIdx, 0, sizeof(*pIdx));
nSupBlocks = taosArrayGetSize(pSupA);
nSubBlocks = (pSubA == NULL) ? 0 : taosArrayGetSize(pSubA);
if (nSupBlocks <= 0) {
// No data (data all deleted)
return 0;
}
int tsdbPrepareCommit(STsdb *pTsdb) {
if (pTsdb->mem == NULL) return 0;
tlen = (uint32_t)(sizeof(SBlockInfo) + sizeof(SBlock) * (nSupBlocks + nSubBlocks) + sizeof(TSCKSUM));
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
pBlkInfo = *ppBuf;
ASSERT(pTsdb->imem == NULL);
pBlkInfo->delimiter = TSDB_FILE_DELIMITER;
pBlkInfo->tid = TABLE_TID(pTable);
pBlkInfo->uid = TABLE_UID(pTable);
pTsdb->imem = pTsdb->mem;
pTsdb->mem = NULL;
}
memcpy((void *)(pBlkInfo->blocks), taosArrayGet(pSupA, 0), nSupBlocks * sizeof(SBlock));
if (nSubBlocks > 0) {
memcpy((void *)(pBlkInfo->blocks + nSupBlocks), taosArrayGet(pSubA, 0), nSubBlocks * sizeof(SBlock));
int tsdbCommit(STsdb *pRepo) {
STsdbMemTable *pMem = pRepo->imem;
SCommitH commith = {0};
SDFileSet * pSet = NULL;
int fid;
for (int i = 0; i < nSupBlocks; i++) {
pBlock = pBlkInfo->blocks + i;
if (pRepo->imem == NULL) return 0;
if (pBlock->numOfSubBlocks > 1) {
pBlock->offset += (sizeof(SBlockInfo) + sizeof(SBlock) * nSupBlocks);
tsdbStartCommit(pRepo);
// Resource initialization
if (tsdbInitCommitH(&commith, pRepo) < 0) {
return -1;
}
// Skip expired memory data and expired FSET
tsdbSeekCommitIter(&commith, commith.rtn.minKey);
while ((pSet = tsdbFSIterNext(&(commith.fsIter)))) {
if (pSet->fid < commith.rtn.minFid) {
tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
break;
}
}
taosCalcChecksumAppend(0, (uint8_t *)pBlkInfo, tlen);
// Loop to commit to each file
fid = tsdbNextCommitFid(&(commith));
while (true) {
// Loop over both on disk and memory
if (pSet == NULL && fid == TSDB_IVLD_FID) break;
if (tsdbAppendDFile(pHeadf, (void *)pBlkInfo, tlen, &offset) < 0) {
if (pSet && (fid == TSDB_IVLD_FID || pSet->fid < fid)) {
// Only has existing FSET but no memory data to commit in this
// existing FSET, only check if file in correct retention
if (tsdbApplyRtnOnFSet(pRepo, pSet, &(commith.rtn)) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(pBlkInfo, tlen - sizeof(TSCKSUM)));
// Set pIdx
pBlock = taosArrayGetLast(pSupA);
pIdx->tid = TABLE_TID(pTable);
pIdx->uid = TABLE_UID(pTable);
pIdx->hasLast = pBlock->last ? 1 : 0;
pIdx->maxKey = pBlock->keyLast;
pIdx->numOfBlocks = (uint32_t)nSupBlocks;
pIdx->len = tlen;
pIdx->offset = (uint32_t)offset;
pSet = tsdbFSIterNext(&(commith.fsIter));
} else {
// Has memory data to commit
SDFileSet *pCSet;
int cfid;
return 0;
}
if (pSet == NULL || pSet->fid > fid) {
// Commit to a new FSET with fid: fid
pCSet = NULL;
cfid = fid;
} else {
// Commit to an existing FSET
pCSet = pSet;
cfid = pSet->fid;
pSet = tsdbFSIterNext(&(commith.fsIter));
}
int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf) {
SBlockIdx *pBlkIdx;
size_t nidx = taosArrayGetSize(pIdxA);
int tlen = 0, size;
int64_t offset;
if (tsdbCommitToFile(&commith, pCSet, cfid) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
if (nidx <= 0) {
// All data are deleted
pHeadf->info.offset = 0;
pHeadf->info.len = 0;
return 0;
fid = tsdbNextCommitFid(&commith);
}
}
for (size_t i = 0; i < nidx; i++) {
pBlkIdx = (SBlockIdx *)taosArrayGet(pIdxA, i);
tsdbDestroyCommitH(&commith);
tsdbEndCommit(pRepo, TSDB_CODE_SUCCESS);
size = tsdbEncodeSBlockIdx(NULL, pBlkIdx);
if (tsdbMakeRoom(ppBuf, tlen + size) < 0) return -1;
return 0;
}
void *ptr = POINTER_SHIFT(*ppBuf, tlen);
tsdbEncodeSBlockIdx(&ptr, pBlkIdx);
void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
TSKEY minKey, midKey, maxKey, now;
tlen += size;
}
now = taosGetTimestamp(pCfg->precision);
minKey = now - pCfg->keep * tsTickPerDay[pCfg->precision];
midKey = now - pCfg->keep2 * tsTickPerDay[pCfg->precision];
maxKey = now - pCfg->keep1 * tsTickPerDay[pCfg->precision];
tlen += sizeof(TSCKSUM);
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
taosCalcChecksumAppend(0, (uint8_t *)(*ppBuf), tlen);
pRtn->minKey = minKey;
pRtn->minFid = (int)(TSDB_KEY_FID(minKey, pCfg->daysPerFile, pCfg->precision));
pRtn->midFid = (int)(TSDB_KEY_FID(midKey, pCfg->daysPerFile, pCfg->precision));
pRtn->maxFid = (int)(TSDB_KEY_FID(maxKey, pCfg->daysPerFile, pCfg->precision));
tsdbDebug("vgId:%d now:%" PRId64 " minKey:%" PRId64 " minFid:%d, midFid:%d, maxFid:%d", REPO_ID(pRepo), now, minKey,
pRtn->minFid, pRtn->midFid, pRtn->maxFid);
}
if (tsdbAppendDFile(pHeadf, *ppBuf, tlen, &offset) < tlen) {
return -1;
}
static void tsdbStartCommit(STsdb *pRepo) {
STsdbMemTable *pMem = pRepo->imem;
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(*ppBuf, tlen - sizeof(TSCKSUM)));
pHeadf->info.offset = (uint32_t)offset;
pHeadf->info.len = tlen;
tsdbInfo("vgId:%d start to commit", REPO_ID(pRepo));
return 0;
tsdbStartFSTxn(pRepo, 0, 0);
}
static void tsdbEndCommit(STsdb *pTsdb, int eno) {
tsdbEndFSTxn(pTsdb);
tsdbFreeMemTable(pTsdb, pTsdb->imem);
pTsdb->imem = NULL;
tsdbInfo("vgId:%d commit over, %s", REPO_ID(pTsdb), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed");
}
// =================== Commit Meta Data
static int tsdbInitCommitMetaFile(STsdbRepo *pRepo, SMFile* pMf, bool open) {
STsdbFS * pfs = REPO_FS(pRepo);
SMFile * pOMFile = pfs->cstatus->pmf;
SDiskID did;
static int tsdbInitCommitH(SCommitH *pCommith, STsdb *pRepo) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
// Create/Open a meta file or open the existing file
if (pOMFile == NULL) {
// Create a new meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(pMf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)));
memset(pCommith, 0, sizeof(*pCommith));
tsdbGetRtnSnap(pRepo, &(pCommith->rtn));
if (open && tsdbCreateMFile(pMf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
TSDB_FSET_SET_CLOSED(TSDB_COMMIT_WRITE_FSET(pCommith));
tsdbInfo("vgId:%d meta file %s is created to commit", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMf));
} else {
tsdbInitMFileEx(pMf, pOMFile);
if (open && tsdbOpenMFile(pMf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// Init read handle
if (tsdbInitReadH(&(pCommith->readh), pRepo) < 0) {
return -1;
}
}
return 0;
}
// Init file iterator
tsdbFSIterInit(&(pCommith->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD);
static int tsdbCommitMeta(STsdbRepo *pRepo) {
STsdbFS * pfs = REPO_FS(pRepo);
SMemTable *pMem = pRepo->imem;
SMFile * pOMFile = pfs->cstatus->pmf;
SMFile mf;
SActObj * pAct = NULL;
SActCont * pCont = NULL;
SListNode *pNode = NULL;
ASSERT(pOMFile != NULL || listNEles(pMem->actList) > 0);
if (listNEles(pMem->actList) <= 0) {
// no meta data to commit, just keep the old meta file
tsdbUpdateMFile(pfs, pOMFile);
if (tsTsdbMetaCompactRatio > 0) {
if (tsdbInitCommitMetaFile(pRepo, &mf, false) < 0) {
if (tsdbCreateCommitIters(pCommith) < 0) {
tsdbDestroyCommitH(pCommith);
return -1;
}
int ret = tsdbCompactMetaFile(pRepo, pfs, &mf);
if (ret < 0) tsdbError("compact meta file error");
return ret;
}
return 0;
} else {
if (tsdbInitCommitMetaFile(pRepo, &mf, true) < 0) {
pCommith->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx));
if (pCommith->aBlkIdx == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
}
// Loop to write
while ((pNode = tdListPopHead(pMem->actList)) != NULL) {
pAct = (SActObj *)pNode->data;
if (pAct->act == TSDB_UPDATE_META) {
pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(SActObj));
if (tsdbUpdateMetaRecord(pfs, &mf, pAct->uid, (void *)(pCont->cont), pCont->len, false) < 0) {
tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
tstrerror(terrno));
tsdbCloseMFile(&mf);
(void)tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
pCommith->aSupBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSupBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
} else if (pAct->act == TSDB_DROP_META) {
if (tsdbDropMetaRecord(pfs, &mf, pAct->uid) < 0) {
tsdbError("vgId:%d failed to drop META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
tstrerror(terrno));
tsdbCloseMFile(&mf);
tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
} else {
ASSERT(false);
}
}
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno));
tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbUpdateMFile(pfs, &mf);
if (tsTsdbMetaCompactRatio > 0 && tsdbCompactMetaFile(pRepo, pfs, &mf) < 0) {
tsdbError("compact meta file error");
}
return 0;
}
int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord) {
int tlen = 0;
tlen += taosEncodeFixedU64(buf, pRecord->uid);
tlen += taosEncodeFixedI64(buf, pRecord->offset);
tlen += taosEncodeFixedI64(buf, pRecord->size);
return tlen;
}
void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord) {
buf = taosDecodeFixedU64(buf, &(pRecord->uid));
buf = taosDecodeFixedI64(buf, &(pRecord->offset));
buf = taosDecodeFixedI64(buf, &(pRecord->size));
return buf;
}
static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact) {
char buf[64] = "\0";
void * pBuf = buf;
SKVRecord rInfo;
int64_t offset;
// Seek to end of meta file
offset = tsdbSeekMFile(pMFile, 0, SEEK_END);
if (offset < 0) {
return -1;
}
rInfo.offset = offset;
rInfo.uid = uid;
rInfo.size = contLen;
int tlen = tsdbEncodeKVRecord((void **)(&pBuf), &rInfo);
if (tsdbAppendMFile(pMFile, buf, tlen, NULL) < tlen) {
return -1;
}
if (tsdbAppendMFile(pMFile, cont, contLen, NULL) < contLen) {
return -1;
}
tsdbUpdateMFileMagic(pMFile, POINTER_SHIFT(cont, contLen - sizeof(TSCKSUM)));
SHashObj* cache = compact ? pfs->metaCacheComp : pfs->metaCache;
pMFile->info.nRecords++;
SKVRecord *pRecord = taosHashGet(cache, (void *)&uid, sizeof(uid));
if (pRecord != NULL) {
pMFile->info.tombSize += (pRecord->size + sizeof(SKVRecord));
} else {
pMFile->info.nRecords++;
}
taosHashPut(cache, (void *)(&uid), sizeof(uid), (void *)(&rInfo), sizeof(rInfo));
return 0;
}
static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid) {
SKVRecord rInfo = {0};
char buf[128] = "\0";
SKVRecord *pRecord = taosHashGet(pfs->metaCache, (void *)(&uid), sizeof(uid));
if (pRecord == NULL) {
tsdbError("failed to drop META record with key %" PRIu64 " since not find", uid);
pCommith->aSubBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSubBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
rInfo.offset = -pRecord->offset;
rInfo.uid = pRecord->uid;
rInfo.size = pRecord->size;
void *pBuf = buf;
tsdbEncodeKVRecord(&pBuf, &rInfo);
if (tsdbAppendMFile(pMFile, buf, sizeof(SKVRecord), NULL) < 0) {
pCommith->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pCommith->pDataCols == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t *)buf, sizeof(SKVRecord));
pMFile->info.nDels++;
pMFile->info.nRecords--;
pMFile->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2);
taosHashRemove(pfs->metaCache, (void *)(&uid), sizeof(uid));
return 0;
}
static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile) {
float delPercent = (float)(pMFile->info.nDels) / (float)(pMFile->info.nRecords);
float tombPercent = (float)(pMFile->info.tombSize) / (float)(pMFile->info.size);
float compactRatio = (float)(tsTsdbMetaCompactRatio)/100;
if (delPercent < compactRatio && tombPercent < compactRatio) {
return 0;
}
if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) {
tsdbError("open meta file %s compact fail", pMFile->f.rname);
return -1;
}
tsdbInfo("begin compact tsdb meta file, ratio:%d, nDels:%" PRId64 ",nRecords:%" PRId64 ",tombSize:%" PRId64 ",size:%" PRId64,
tsTsdbMetaCompactRatio, pMFile->info.nDels,pMFile->info.nRecords,pMFile->info.tombSize,pMFile->info.size);
SMFile mf;
SDiskID did;
// first create tmp meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)) + 1);
if (tsdbCreateMFile(&mf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbInfo("vgId:%d meta file %s is created to compact meta data", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf));
// second iterator metaCache
int code = -1;
int64_t maxBufSize = 1024;
SKVRecord *pRecord;
void *pBuf = NULL;
pBuf = malloc((size_t)maxBufSize);
if (pBuf == NULL) {
goto _err;
}
// init Comp
assert(pfs->metaCacheComp == NULL);
pfs->metaCacheComp = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
if (pfs->metaCacheComp == NULL) {
goto _err;
}
pRecord = taosHashIterate(pfs->metaCache, NULL);
while (pRecord) {
if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) {
tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
goto _err;
}
if (pRecord->size > maxBufSize) {
maxBufSize = pRecord->size;
void* tmp = realloc(pBuf, (size_t)maxBufSize);
if (tmp == NULL) {
goto _err;
}
pBuf = tmp;
}
int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size);
if (nread < 0) {
tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
goto _err;
}
if (nread < pRecord->size) {
tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d",
REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread);
goto _err;
}
if (tsdbUpdateMetaRecord(pfs, &mf, pRecord->uid, pBuf, (int)pRecord->size, true) < 0) {
tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pRecord->uid,
tstrerror(terrno));
goto _err;
}
pRecord = taosHashIterate(pfs->metaCache, pRecord);
}
code = 0;
_err:
if (code == 0) TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbCloseMFile(pMFile);
if (code == 0) {
// rename meta.tmp -> meta
tsdbInfo("vgId:%d meta file rename %s -> %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf), TSDB_FILE_FULL_NAME(pMFile));
taosRename(mf.f.aname,pMFile->f.aname);
tstrncpy(mf.f.aname, pMFile->f.aname, TSDB_FILENAME_LEN);
tstrncpy(mf.f.rname, pMFile->f.rname, TSDB_FILENAME_LEN);
// update current meta file info
pfs->nstatus->pmf = NULL;
tsdbUpdateMFile(pfs, &mf);
// Skip all keys until key (not included)
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key) {
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
taosHashCleanup(pfs->metaCache);
pfs->metaCache = pfs->metaCacheComp;
pfs->metaCacheComp = NULL;
} else {
// remove meta.tmp file
remove(mf.f.aname);
taosHashCleanup(pfs->metaCacheComp);
pfs->metaCacheComp = NULL;
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, key - 1, INT32_MAX, NULL, NULL, 0, true, NULL);
}
tfree(pBuf);
ASSERT(mf.info.nDels == 0);
ASSERT(mf.info.tombSize == 0);
tsdbInfo("end compact tsdb meta file,code:%d,nRecords:%" PRId64 ",size:%" PRId64,
code,mf.info.nRecords,mf.info.size);
return code;
}
// =================== Commit Time-Series Data
static int tsdbCommitTSData(STsdbRepo *pRepo) {
SMemTable *pMem = pRepo->imem;
SCommitH commith;
SDFileSet *pSet = NULL;
int fid;
memset(&commith, 0, sizeof(commith));
if (pMem->numOfRows <= 0) {
// No memory data, just apply retention on each file on disk
if (tsdbApplyRtn(pRepo) < 0) {
return -1;
}
return 0;
}
// Resource initialization
if (tsdbInitCommitH(&commith, pRepo) < 0) {
return -1;
}
// Skip expired memory data and expired FSET
tsdbSeekCommitIter(&commith, commith.rtn.minKey);
while ((pSet = tsdbFSIterNext(&(commith.fsIter)))) {
if (pSet->fid < commith.rtn.minFid) {
tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
break;
}
}
// Loop to commit to each file
fid = tsdbNextCommitFid(&(commith));
while (true) {
// Loop over both on disk and memory
if (pSet == NULL && fid == TSDB_IVLD_FID) break;
if (pSet && (fid == TSDB_IVLD_FID || pSet->fid < fid)) {
// Only has existing FSET but no memory data to commit in this
// existing FSET, only check if file in correct retention
if (tsdbApplyRtnOnFSet(pRepo, pSet, &(commith.rtn)) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
static int tsdbNextCommitFid(SCommitH *pCommith) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg *pCfg = REPO_CFG(pRepo);
int fid = TSDB_IVLD_FID;
pSet = tsdbFSIterNext(&(commith.fsIter));
} else {
// Has memory data to commit
SDFileSet *pCSet;
int cfid;
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
// if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
if (pSet == NULL || pSet->fid > fid) {
// Commit to a new FSET with fid: fid
pCSet = NULL;
cfid = fid;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
if (nextKey == TSDB_DATA_TIMESTAMP_NULL) {
continue;
} else {
// Commit to an existing FSET
pCSet = pSet;
cfid = pSet->fid;
pSet = tsdbFSIterNext(&(commith.fsIter));
}
if (tsdbCommitToFile(&commith, pCSet, cfid) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
fid = tsdbNextCommitFid(&commith);
int tfid = (int)(TSDB_KEY_FID(nextKey, pCfg->daysPerFile, pCfg->precision));
if (fid == TSDB_IVLD_FID || fid > tfid) {
fid = tfid;
}
}
tsdbDestroyCommitH(&commith);
return 0;
}
static void tsdbStartCommit(STsdbRepo *pRepo) {
SMemTable *pMem = pRepo->imem;
ASSERT(pMem->numOfRows > 0 || listNEles(pMem->actList) > 0);
tsdbInfo("vgId:%d start to commit! keyFirst %" PRId64 " keyLast %" PRId64 " numOfRows %" PRId64 " meta rows: %d",
REPO_ID(pRepo), pMem->keyFirst, pMem->keyLast, pMem->numOfRows, listNEles(pMem->actList));
tsdbStartFSTxn(pRepo, pMem->pointsAdd, pMem->storageAdd);
pRepo->code = TSDB_CODE_SUCCESS;
}
static void tsdbEndCommit(STsdbRepo *pRepo, int eno) {
if (eno != TSDB_CODE_SUCCESS) {
tsdbEndFSTxnWithError(REPO_FS(pRepo));
} else {
tsdbEndFSTxn(pRepo);
}
tsdbInfo("vgId:%d commit over, %s", REPO_ID(pRepo), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed");
if (pRepo->appH.notifyStatus) pRepo->appH.notifyStatus(pRepo->appH.appH, TSDB_STATUS_COMMIT_OVER, eno);
SMemTable *pIMem = pRepo->imem;
(void)tsdbLockRepo(pRepo);
pRepo->imem = NULL;
(void)tsdbUnlockRepo(pRepo);
tsdbUnRefMemTable(pRepo, pIMem);
tsem_post(&(pRepo->readyToCommit));
return fid;
}
#if 0
static bool tsdbHasDataToCommit(SCommitIter *iters, int nIters, TSKEY minKey, TSKEY maxKey) {
for (int i = 0; i < nIters; i++) {
TSKEY nextKey = tsdbNextIterKey((iters + i)->pIter);
if (nextKey != TSDB_DATA_TIMESTAMP_NULL && (nextKey >= minKey && nextKey <= maxKey)) return true;
}
return false;
static void tsdbDestroyCommitH(SCommitH *pCommith) {
pCommith->pDataCols = tdFreeDataCols(pCommith->pDataCols);
pCommith->aSubBlk = taosArrayDestroy(pCommith->aSubBlk);
pCommith->aSupBlk = taosArrayDestroy(pCommith->aSupBlk);
pCommith->aBlkIdx = taosArrayDestroy(pCommith->aBlkIdx);
tsdbDestroyCommitIters(pCommith);
tsdbDestroyReadH(&(pCommith->readh));
tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith));
}
#endif
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg *pCfg = REPO_CFG(pRepo);
ASSERT(pSet == NULL || pSet->fid == fid);
......@@ -766,7 +351,7 @@ static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
}
// Loop to commit each table data
for (int tid = 1; tid < pCommith->niters; tid++) {
for (int tid = 0; tid < pCommith->niters; tid++) {
SCommitIter *pIter = pCommith->iters + tid;
if (pIter->pTable == NULL) continue;
......@@ -807,39 +392,39 @@ static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
}
static int tsdbCreateCommitIters(SCommitH *pCommith) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
SMemTable *pMem = pRepo->imem;
STsdbMeta *pMeta = pRepo->tsdbMeta;
pCommith->niters = pMem->maxTables;
pCommith->iters = (SCommitIter *)calloc(pMem->maxTables, sizeof(SCommitIter));
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbMemTable * pMem = pRepo->imem;
SSkipListIterator *pSlIter;
SCommitIter * pCommitIter;
SSkipListNode * pNode;
STbData * pTbData;
pCommith->niters = SL_SIZE(pMem->pSlIdx);
pCommith->iters = (SCommitIter *)calloc(pCommith->niters, sizeof(SCommitIter));
if (pCommith->iters == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (tsdbRLockRepoMeta(pRepo) < 0) return -1;
// reference all tables
for (int i = 0; i < pMem->maxTables; i++) {
if (pMeta->tables[i] != NULL) {
tsdbRefTable(pMeta->tables[i]);
pCommith->iters[i].pTable = pMeta->tables[i];
}
}
if (tsdbUnlockRepoMeta(pRepo) < 0) return -1;
for (int i = 0; i < pMem->maxTables; i++) {
if ((pCommith->iters[i].pTable != NULL) && (pMem->tData[i] != NULL) &&
(TABLE_UID(pCommith->iters[i].pTable) == pMem->tData[i]->uid)) {
if ((pCommith->iters[i].pIter = tSkipListCreateIter(pMem->tData[i]->pData)) == NULL) {
// Loop to create iters for each skiplist
pSlIter = tSkipListCreateIter(pMem->pSlIdx);
if (pSlIter == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
for (int i = 0; i < pCommith->niters; i++) {
tSkipListIterNext(pSlIter);
pNode = tSkipListIterGet(pSlIter);
pTbData = (STbData *)pNode->pData;
tSkipListIterNext(pCommith->iters[i].pIter);
}
pCommitIter = pCommith->iters + i;
pCommitIter->pIter = tSkipListCreateIter(pTbData->pData);
tSkipListIterNext(pCommitIter->pIter);
pCommitIter->pTable = (STable *)malloc(sizeof(STable));
pCommitIter->pTable->uid = pTbData->uid;
pCommitIter->pTable->tid = pTbData->uid;
pCommitIter->pTable->pSchema = metaGetTbTSchema(pRepo->pMeta, pTbData->uid, 0);
}
return 0;
......@@ -849,10 +434,9 @@ static void tsdbDestroyCommitIters(SCommitH *pCommith) {
if (pCommith->iters == NULL) return;
for (int i = 1; i < pCommith->niters; i++) {
if (pCommith->iters[i].pTable != NULL) {
tsdbUnRefTable(pCommith->iters[i].pTable);
tSkipListDestroyIter(pCommith->iters[i].pIter);
}
tdFreeSchema(pCommith->iters[i].pTable->pSchema);
free(pCommith->iters[i].pTable);
}
free(pCommith->iters);
......@@ -860,118 +444,583 @@ static void tsdbDestroyCommitIters(SCommitH *pCommith) {
pCommith->niters = 0;
}
// Skip all keys until key (not included)
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key) {
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
static void tsdbResetCommitFile(SCommitH *pCommith) {
pCommith->isRFileSet = false;
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
taosArrayClear(pCommith->aBlkIdx);
}
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, key - 1, INT32_MAX, NULL, NULL, 0, true, NULL);
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
SDiskID did;
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
SDFileSet *pWSet = TSDB_COMMIT_WRITE_FSET(pCommith);
tfsAllocDisk(tsdbGetFidLevel(fid, &(pCommith->rtn)), &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
return -1;
}
}
static int tsdbInitCommitH(SCommitH *pCommith, STsdbRepo *pRepo) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
// Open read FSET
if (pSet) {
if (tsdbSetAndOpenReadFSet(&(pCommith->readh), pSet) < 0) {
return -1;
}
memset(pCommith, 0, sizeof(*pCommith));
tsdbGetRtnSnap(pRepo, &(pCommith->rtn));
pCommith->isRFileSet = true;
TSDB_FSET_SET_CLOSED(TSDB_COMMIT_WRITE_FSET(pCommith));
if (tsdbLoadBlockIdx(&(pCommith->readh)) < 0) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
// Init read handle
if (tsdbInitReadH(&(pCommith->readh), pRepo) < 0) {
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is opened to read to commit", REPO_ID(pRepo), TSDB_FSET_FID(pSet),
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
pCommith->isRFileSet = false;
}
// Set and open commit FSET
if (pSet == NULL || did.level > TSDB_FSET_LEVEL(pSet)) {
// Create a new FSET to write data
tsdbInitDFileSet(pWSet, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)));
if (tsdbCreateDFileSet(pWSet, true) < 0) {
tsdbError("vgId:%d failed to create FSET %d at level %d disk id %d since %s", REPO_ID(pRepo),
TSDB_FSET_FID(pWSet), TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet), tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
}
return -1;
}
// Init file iterator
tsdbFSIterInit(&(pCommith->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD);
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is created to commit", REPO_ID(pRepo), TSDB_FSET_FID(pWSet),
TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet));
} else {
did.level = TSDB_FSET_LEVEL(pSet);
did.id = TSDB_FSET_ID(pSet);
pCommith->wSet.fid = fid;
pCommith->wSet.state = 0;
// TSDB_FILE_HEAD
SDFile *pWHeadf = TSDB_COMMIT_HEAD_FILE(pCommith);
tsdbInitDFile(pWHeadf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_HEAD);
if (tsdbCreateDFile(pWHeadf, true) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWHeadf),
tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
// TSDB_FILE_DATA
SDFile *pRDataf = TSDB_READ_DATA_FILE(&(pCommith->readh));
SDFile *pWDataf = TSDB_COMMIT_DATA_FILE(pCommith);
tsdbInitDFileEx(pWDataf, pRDataf);
if (tsdbOpenDFile(pWDataf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWDataf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
pCommith->isDFileSame = true;
// TSDB_FILE_LAST
SDFile *pRLastf = TSDB_READ_LAST_FILE(&(pCommith->readh));
SDFile *pWLastf = TSDB_COMMIT_LAST_FILE(pCommith);
if (pRLastf->info.size < 32 * 1024) {
tsdbInitDFileEx(pWLastf, pRLastf);
pCommith->isLFileSame = true;
if (tsdbOpenDFile(pWLastf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
} else {
tsdbInitDFile(pWLastf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_LAST);
pCommith->isLFileSame = false;
if (tsdbCreateDFile(pWLastf, true) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
(void)tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
}
}
return 0;
}
// extern int32_t tsTsdbMetaCompactRatio;
int tsdbWriteBlockInfoImpl(SDFile *pHeadf, STable *pTable, SArray *pSupA, SArray *pSubA, void **ppBuf,
SBlockIdx *pIdx) {
size_t nSupBlocks;
size_t nSubBlocks;
uint32_t tlen;
SBlockInfo *pBlkInfo;
int64_t offset;
SBlock * pBlock;
memset(pIdx, 0, sizeof(*pIdx));
nSupBlocks = taosArrayGetSize(pSupA);
nSubBlocks = (pSubA == NULL) ? 0 : taosArrayGetSize(pSubA);
if (nSupBlocks <= 0) {
// No data (data all deleted)
return 0;
}
tlen = (uint32_t)(sizeof(SBlockInfo) + sizeof(SBlock) * (nSupBlocks + nSubBlocks) + sizeof(TSCKSUM));
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
pBlkInfo = *ppBuf;
pBlkInfo->delimiter = TSDB_FILE_DELIMITER;
pBlkInfo->tid = TABLE_TID(pTable);
pBlkInfo->uid = TABLE_UID(pTable);
memcpy((void *)(pBlkInfo->blocks), taosArrayGet(pSupA, 0), nSupBlocks * sizeof(SBlock));
if (nSubBlocks > 0) {
memcpy((void *)(pBlkInfo->blocks + nSupBlocks), taosArrayGet(pSubA, 0), nSubBlocks * sizeof(SBlock));
for (int i = 0; i < nSupBlocks; i++) {
pBlock = pBlkInfo->blocks + i;
if (tsdbCreateCommitIters(pCommith) < 0) {
tsdbDestroyCommitH(pCommith);
return -1;
if (pBlock->numOfSubBlocks > 1) {
pBlock->offset += (sizeof(SBlockInfo) + sizeof(SBlock) * nSupBlocks);
}
pCommith->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx));
if (pCommith->aBlkIdx == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aSupBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSupBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aSubBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSubBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
taosCalcChecksumAppend(0, (uint8_t *)pBlkInfo, tlen);
pCommith->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pCommith->pDataCols == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
if (tsdbAppendDFile(pHeadf, (void *)pBlkInfo, tlen, &offset) < 0) {
return -1;
}
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(pBlkInfo, tlen - sizeof(TSCKSUM)));
// Set pIdx
pBlock = taosArrayGetLast(pSupA);
pIdx->tid = TABLE_TID(pTable);
pIdx->uid = TABLE_UID(pTable);
pIdx->hasLast = pBlock->last ? 1 : 0;
pIdx->maxKey = pBlock->keyLast;
pIdx->numOfBlocks = (uint32_t)nSupBlocks;
pIdx->len = tlen;
pIdx->offset = (uint32_t)offset;
return 0;
}
static void tsdbDestroyCommitH(SCommitH *pCommith) {
pCommith->pDataCols = tdFreeDataCols(pCommith->pDataCols);
pCommith->aSubBlk = taosArrayDestroy(pCommith->aSubBlk);
pCommith->aSupBlk = taosArrayDestroy(pCommith->aSupBlk);
pCommith->aBlkIdx = taosArrayDestroy(pCommith->aBlkIdx);
tsdbDestroyCommitIters(pCommith);
tsdbDestroyReadH(&(pCommith->readh));
tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith));
}
int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf) {
SBlockIdx *pBlkIdx;
size_t nidx = taosArrayGetSize(pIdxA);
int tlen = 0, size;
int64_t offset;
static int tsdbNextCommitFid(SCommitH *pCommith) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int fid = TSDB_IVLD_FID;
if (nidx <= 0) {
// All data are deleted
pHeadf->info.offset = 0;
pHeadf->info.len = 0;
return 0;
}
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
for (size_t i = 0; i < nidx; i++) {
pBlkIdx = (SBlockIdx *)taosArrayGet(pIdxA, i);
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
if (nextKey == TSDB_DATA_TIMESTAMP_NULL) {
continue;
} else {
int tfid = (int)(TSDB_KEY_FID(nextKey, pCfg->daysPerFile, pCfg->precision));
if (fid == TSDB_IVLD_FID || fid > tfid) {
fid = tfid;
}
size = tsdbEncodeSBlockIdx(NULL, pBlkIdx);
if (tsdbMakeRoom(ppBuf, tlen + size) < 0) return -1;
void *ptr = POINTER_SHIFT(*ppBuf, tlen);
tsdbEncodeSBlockIdx(&ptr, pBlkIdx);
tlen += size;
}
tlen += sizeof(TSCKSUM);
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
taosCalcChecksumAppend(0, (uint8_t *)(*ppBuf), tlen);
if (tsdbAppendDFile(pHeadf, *ppBuf, tlen, &offset) < tlen) {
return -1;
}
return fid;
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(*ppBuf, tlen - sizeof(TSCKSUM)));
pHeadf->info.offset = (uint32_t)offset;
pHeadf->info.len = tlen;
return 0;
}
// // =================== Commit Meta Data
// static int tsdbInitCommitMetaFile(STsdbRepo *pRepo, SMFile* pMf, bool open) {
// STsdbFS * pfs = REPO_FS(pRepo);
// SMFile * pOMFile = pfs->cstatus->pmf;
// SDiskID did;
// // Create/Open a meta file or open the existing file
// if (pOMFile == NULL) {
// // Create a new meta file
// did.level = TFS_PRIMARY_LEVEL;
// did.id = TFS_PRIMARY_ID;
// tsdbInitMFile(pMf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)));
// if (open && tsdbCreateMFile(pMf, true) < 0) {
// tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// tsdbInfo("vgId:%d meta file %s is created to commit", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMf));
// } else {
// tsdbInitMFileEx(pMf, pOMFile);
// if (open && tsdbOpenMFile(pMf, O_WRONLY) < 0) {
// tsdbError("vgId:%d failed to open META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// }
// return 0;
// }
// static int tsdbCommitMeta(STsdbRepo *pRepo) {
// STsdbFS * pfs = REPO_FS(pRepo);
// SMemTable *pMem = pRepo->imem;
// SMFile * pOMFile = pfs->cstatus->pmf;
// SMFile mf;
// SActObj * pAct = NULL;
// SActCont * pCont = NULL;
// SListNode *pNode = NULL;
// ASSERT(pOMFile != NULL || listNEles(pMem->actList) > 0);
// if (listNEles(pMem->actList) <= 0) {
// // no meta data to commit, just keep the old meta file
// tsdbUpdateMFile(pfs, pOMFile);
// if (tsTsdbMetaCompactRatio > 0) {
// if (tsdbInitCommitMetaFile(pRepo, &mf, false) < 0) {
// return -1;
// }
// int ret = tsdbCompactMetaFile(pRepo, pfs, &mf);
// if (ret < 0) tsdbError("compact meta file error");
// return ret;
// }
// return 0;
// } else {
// if (tsdbInitCommitMetaFile(pRepo, &mf, true) < 0) {
// return -1;
// }
// }
// // Loop to write
// while ((pNode = tdListPopHead(pMem->actList)) != NULL) {
// pAct = (SActObj *)pNode->data;
// if (pAct->act == TSDB_UPDATE_META) {
// pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(SActObj));
// if (tsdbUpdateMetaRecord(pfs, &mf, pAct->uid, (void *)(pCont->cont), pCont->len, false) < 0) {
// tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
// tstrerror(terrno));
// tsdbCloseMFile(&mf);
// (void)tsdbApplyMFileChange(&mf, pOMFile);
// // TODO: need to reload metaCache
// return -1;
// }
// } else if (pAct->act == TSDB_DROP_META) {
// if (tsdbDropMetaRecord(pfs, &mf, pAct->uid) < 0) {
// tsdbError("vgId:%d failed to drop META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
// tstrerror(terrno));
// tsdbCloseMFile(&mf);
// tsdbApplyMFileChange(&mf, pOMFile);
// // TODO: need to reload metaCache
// return -1;
// }
// } else {
// ASSERT(false);
// }
// }
// if (tsdbUpdateMFileHeader(&mf) < 0) {
// tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno));
// tsdbApplyMFileChange(&mf, pOMFile);
// // TODO: need to reload metaCache
// return -1;
// }
// TSDB_FILE_FSYNC(&mf);
// tsdbCloseMFile(&mf);
// tsdbUpdateMFile(pfs, &mf);
// if (tsTsdbMetaCompactRatio > 0 && tsdbCompactMetaFile(pRepo, pfs, &mf) < 0) {
// tsdbError("compact meta file error");
// }
// return 0;
// }
// int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord) {
// int tlen = 0;
// tlen += taosEncodeFixedU64(buf, pRecord->uid);
// tlen += taosEncodeFixedI64(buf, pRecord->offset);
// tlen += taosEncodeFixedI64(buf, pRecord->size);
// return tlen;
// }
// void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord) {
// buf = taosDecodeFixedU64(buf, &(pRecord->uid));
// buf = taosDecodeFixedI64(buf, &(pRecord->offset));
// buf = taosDecodeFixedI64(buf, &(pRecord->size));
// return buf;
// }
// static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact) {
// char buf[64] = "\0";
// void * pBuf = buf;
// SKVRecord rInfo;
// int64_t offset;
// // Seek to end of meta file
// offset = tsdbSeekMFile(pMFile, 0, SEEK_END);
// if (offset < 0) {
// return -1;
// }
// rInfo.offset = offset;
// rInfo.uid = uid;
// rInfo.size = contLen;
// int tlen = tsdbEncodeKVRecord((void **)(&pBuf), &rInfo);
// if (tsdbAppendMFile(pMFile, buf, tlen, NULL) < tlen) {
// return -1;
// }
// if (tsdbAppendMFile(pMFile, cont, contLen, NULL) < contLen) {
// return -1;
// }
// tsdbUpdateMFileMagic(pMFile, POINTER_SHIFT(cont, contLen - sizeof(TSCKSUM)));
// SHashObj* cache = compact ? pfs->metaCacheComp : pfs->metaCache;
// pMFile->info.nRecords++;
// SKVRecord *pRecord = taosHashGet(cache, (void *)&uid, sizeof(uid));
// if (pRecord != NULL) {
// pMFile->info.tombSize += (pRecord->size + sizeof(SKVRecord));
// } else {
// pMFile->info.nRecords++;
// }
// taosHashPut(cache, (void *)(&uid), sizeof(uid), (void *)(&rInfo), sizeof(rInfo));
// return 0;
// }
// static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid) {
// SKVRecord rInfo = {0};
// char buf[128] = "\0";
// SKVRecord *pRecord = taosHashGet(pfs->metaCache, (void *)(&uid), sizeof(uid));
// if (pRecord == NULL) {
// tsdbError("failed to drop META record with key %" PRIu64 " since not find", uid);
// return -1;
// }
// rInfo.offset = -pRecord->offset;
// rInfo.uid = pRecord->uid;
// rInfo.size = pRecord->size;
// void *pBuf = buf;
// tsdbEncodeKVRecord(&pBuf, &rInfo);
// if (tsdbAppendMFile(pMFile, buf, sizeof(SKVRecord), NULL) < 0) {
// return -1;
// }
// pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t *)buf, sizeof(SKVRecord));
// pMFile->info.nDels++;
// pMFile->info.nRecords--;
// pMFile->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2);
// taosHashRemove(pfs->metaCache, (void *)(&uid), sizeof(uid));
// return 0;
// }
// static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile) {
// float delPercent = (float)(pMFile->info.nDels) / (float)(pMFile->info.nRecords);
// float tombPercent = (float)(pMFile->info.tombSize) / (float)(pMFile->info.size);
// float compactRatio = (float)(tsTsdbMetaCompactRatio)/100;
// if (delPercent < compactRatio && tombPercent < compactRatio) {
// return 0;
// }
// if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) {
// tsdbError("open meta file %s compact fail", pMFile->f.rname);
// return -1;
// }
// tsdbInfo("begin compact tsdb meta file, ratio:%d, nDels:%" PRId64 ",nRecords:%" PRId64 ",tombSize:%" PRId64
// ",size:%" PRId64,
// tsTsdbMetaCompactRatio, pMFile->info.nDels,pMFile->info.nRecords,pMFile->info.tombSize,pMFile->info.size);
// SMFile mf;
// SDiskID did;
// // first create tmp meta file
// did.level = TFS_PRIMARY_LEVEL;
// did.id = TFS_PRIMARY_ID;
// tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)) + 1);
// if (tsdbCreateMFile(&mf, true) < 0) {
// tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// tsdbInfo("vgId:%d meta file %s is created to compact meta data", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf));
// // second iterator metaCache
// int code = -1;
// int64_t maxBufSize = 1024;
// SKVRecord *pRecord;
// void *pBuf = NULL;
// pBuf = malloc((size_t)maxBufSize);
// if (pBuf == NULL) {
// goto _err;
// }
// // init Comp
// assert(pfs->metaCacheComp == NULL);
// pfs->metaCacheComp = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
// if (pfs->metaCacheComp == NULL) {
// goto _err;
// }
// pRecord = taosHashIterate(pfs->metaCache, NULL);
// while (pRecord) {
// if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) {
// tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
// tstrerror(terrno));
// goto _err;
// }
// if (pRecord->size > maxBufSize) {
// maxBufSize = pRecord->size;
// void* tmp = realloc(pBuf, (size_t)maxBufSize);
// if (tmp == NULL) {
// goto _err;
// }
// pBuf = tmp;
// }
// int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size);
// if (nread < 0) {
// tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
// tstrerror(terrno));
// goto _err;
// }
// if (nread < pRecord->size) {
// tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d",
// REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread);
// goto _err;
// }
// if (tsdbUpdateMetaRecord(pfs, &mf, pRecord->uid, pBuf, (int)pRecord->size, true) < 0) {
// tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pRecord->uid,
// tstrerror(terrno));
// goto _err;
// }
// pRecord = taosHashIterate(pfs->metaCache, pRecord);
// }
// code = 0;
// _err:
// if (code == 0) TSDB_FILE_FSYNC(&mf);
// tsdbCloseMFile(&mf);
// tsdbCloseMFile(pMFile);
// if (code == 0) {
// // rename meta.tmp -> meta
// tsdbInfo("vgId:%d meta file rename %s -> %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf),
// TSDB_FILE_FULL_NAME(pMFile)); taosRename(mf.f.aname,pMFile->f.aname); tstrncpy(mf.f.aname, pMFile->f.aname,
// TSDB_FILENAME_LEN); tstrncpy(mf.f.rname, pMFile->f.rname, TSDB_FILENAME_LEN);
// // update current meta file info
// pfs->nstatus->pmf = NULL;
// tsdbUpdateMFile(pfs, &mf);
// taosHashCleanup(pfs->metaCache);
// pfs->metaCache = pfs->metaCacheComp;
// pfs->metaCacheComp = NULL;
// } else {
// // remove meta.tmp file
// remove(mf.f.aname);
// taosHashCleanup(pfs->metaCacheComp);
// pfs->metaCacheComp = NULL;
// }
// tfree(pBuf);
// ASSERT(mf.info.nDels == 0);
// ASSERT(mf.info.tombSize == 0);
// tsdbInfo("end compact tsdb meta file,code:%d,nRecords:%" PRId64 ",size:%" PRId64,
// code,mf.info.nRecords,mf.info.size);
// return code;
// }
// // =================== Commit Time-Series Data
// #if 0
// static bool tsdbHasDataToCommit(SCommitIter *iters, int nIters, TSKEY minKey, TSKEY maxKey) {
// for (int i = 0; i < nIters; i++) {
// TSKEY nextKey = tsdbNextIterKey((iters + i)->pIter);
// if (nextKey != TSDB_DATA_TIMESTAMP_NULL && (nextKey >= minKey && nextKey <= maxKey)) return true;
// }
// return false;
// }
// #endif
static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
SCommitIter *pIter = pCommith->iters + tid;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
tsdbResetCommitTable(pCommith);
TSDB_RLOCK_TABLE(pIter->pTable);
// Set commit table
if (tsdbSetCommitTable(pCommith, pIter->pTable) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
// No disk data and no memory data, just return
if (pCommith->readh.pBlkIdx == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return 0;
}
......@@ -982,7 +1031,6 @@ static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
if (pCommith->readh.pBlkIdx) {
if (tsdbLoadBlockInfo(&(pCommith->readh), NULL) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
......@@ -1003,7 +1051,6 @@ static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
if ((nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey) ||
(pBlock && (!pBlock->last) && tsdbComparKeyBlock((void *)(&nextKey), pBlock) > 0)) {
if (tsdbMoveBlock(pCommith, bidx) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
......@@ -1016,7 +1063,6 @@ static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
} else if (pBlock && (pBlock->last || tsdbComparKeyBlock((void *)(&nextKey), pBlock) == 0)) {
// merge pBlock data and memory data
if (tsdbMergeMemData(pCommith, pIter, bidx) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
......@@ -1031,12 +1077,10 @@ static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
// Only commit memory data
if (pBlock == NULL) {
if (tsdbCommitMemData(pCommith, pIter, pCommith->maxKey, false) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
} else {
if (tsdbCommitMemData(pCommith, pIter, pBlock->keyFirst - 1, true) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
}
......@@ -1044,8 +1088,6 @@ static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
}
}
TSDB_RUNLOCK_TABLE(pIter->pTable);
if (tsdbWriteBlockInfo(pCommith) < 0) {
tsdbError("vgId:%d failed to write SBlockInfo part into file %s since %s", TSDB_COMMIT_REPO_ID(pCommith),
TSDB_FILE_FULL_NAME(TSDB_COMMIT_HEAD_FILE(pCommith)), tstrerror(terrno));
......@@ -1088,8 +1130,8 @@ static int tsdbComparKeyBlock(const void *arg1, const void *arg2) {
}
}
int tsdbWriteBlockImpl(STsdbRepo *pRepo, STable *pTable, SDFile *pDFile, SDataCols *pDataCols, SBlock *pBlock,
bool isLast, bool isSuper, void **ppBuf, void **ppCBuf) {
int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDataCols *pDataCols, SBlock *pBlock, bool isLast,
bool isSuper, void **ppBuf, void **ppCBuf) {
STsdbCfg * pCfg = REPO_CFG(pRepo);
SBlockData *pBlockData;
int64_t offset = 0;
......@@ -1155,8 +1197,7 @@ int tsdbWriteBlockImpl(STsdbRepo *pRepo, STable *pTable, SDFile *pDFile, SDataCo
pBlockCol = pBlockData->cols + tcol;
tptr = POINTER_SHIFT(pBlockData, lsize);
if (pCfg->compression == TWO_STAGE_COMP &&
tsdbMakeRoom(ppCBuf, tlen + COMP_OVERFLOW_BYTES) < 0) {
if (pCfg->compression == TWO_STAGE_COMP && tsdbMakeRoom(ppCBuf, tlen + COMP_OVERFLOW_BYTES) < 0) {
return -1;
}
......@@ -1227,7 +1268,6 @@ static int tsdbWriteBlock(SCommitH *pCommith, SDFile *pDFile, SDataCols *pDataCo
(void **)(&(TSDB_COMMIT_COMP_BUF(pCommith))));
}
static int tsdbWriteBlockInfo(SCommitH *pCommih) {
SDFile * pHeadf = TSDB_COMMIT_HEAD_FILE(pCommih);
SBlockIdx blkIdx;
......@@ -1251,7 +1291,7 @@ static int tsdbWriteBlockInfo(SCommitH *pCommih) {
}
static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SMergeInfo mInfo;
int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith);
......@@ -1284,7 +1324,7 @@ static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLi
}
static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int nBlocks = pCommith->readh.pBlkIdx->numOfBlocks;
SBlock * pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
......@@ -1407,9 +1447,10 @@ static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const
return 0;
}
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit, bool isLastOneBlock) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit,
bool isLastOneBlock) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg *pCfg = REPO_CFG(pRepo);
SBlock block;
SDFile * pDFile;
bool isLast;
......@@ -1464,7 +1505,7 @@ static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIt
if (key1 < key2) {
for (int i = 0; i < pDataCols->numOfCols; i++) {
//TODO: dataColAppendVal may fail
// TODO: dataColAppendVal may fail
dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows,
pTarget->maxPoints);
}
......@@ -1482,17 +1523,17 @@ static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIt
tSkipListIterNext(pCommitIter->pIter);
} else {
if (update != TD_ROW_OVERWRITE_UPDATE) {
//copy disk data
// copy disk data
for (int i = 0; i < pDataCols->numOfCols; i++) {
//TODO: dataColAppendVal may fail
// TODO: dataColAppendVal may fail
dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows,
pTarget->maxPoints);
}
if(update == TD_ROW_DISCARD_UPDATE) pTarget->numOfRows++;
if (update == TD_ROW_DISCARD_UPDATE) pTarget->numOfRows++;
}
if (update != TD_ROW_DISCARD_UPDATE) {
//copy mem data
// copy mem data
if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) {
pSchema = tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row));
ASSERT(pSchema != NULL);
......@@ -1508,144 +1549,12 @@ static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIt
}
}
static void tsdbResetCommitFile(SCommitH *pCommith) {
pCommith->isRFileSet = false;
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
taosArrayClear(pCommith->aBlkIdx);
}
static void tsdbResetCommitTable(SCommitH *pCommith) {
taosArrayClear(pCommith->aSubBlk);
taosArrayClear(pCommith->aSupBlk);
pCommith->pTable = NULL;
}
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
SDiskID did;
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
SDFileSet *pWSet = TSDB_COMMIT_WRITE_FSET(pCommith);
tfsAllocDisk(tsdbGetFidLevel(fid, &(pCommith->rtn)), &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
return -1;
}
// Open read FSET
if (pSet) {
if (tsdbSetAndOpenReadFSet(&(pCommith->readh), pSet) < 0) {
return -1;
}
pCommith->isRFileSet = true;
if (tsdbLoadBlockIdx(&(pCommith->readh)) < 0) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is opened to read to commit", REPO_ID(pRepo), TSDB_FSET_FID(pSet),
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
pCommith->isRFileSet = false;
}
// Set and open commit FSET
if (pSet == NULL || did.level > TSDB_FSET_LEVEL(pSet)) {
// Create a new FSET to write data
tsdbInitDFileSet(pWSet, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)));
if (tsdbCreateDFileSet(pWSet, true) < 0) {
tsdbError("vgId:%d failed to create FSET %d at level %d disk id %d since %s", REPO_ID(pRepo),
TSDB_FSET_FID(pWSet), TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet), tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
}
return -1;
}
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is created to commit", REPO_ID(pRepo), TSDB_FSET_FID(pWSet),
TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet));
} else {
did.level = TSDB_FSET_LEVEL(pSet);
did.id = TSDB_FSET_ID(pSet);
pCommith->wSet.fid = fid;
pCommith->wSet.state = 0;
// TSDB_FILE_HEAD
SDFile *pWHeadf = TSDB_COMMIT_HEAD_FILE(pCommith);
tsdbInitDFile(pWHeadf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_HEAD);
if (tsdbCreateDFile(pWHeadf, true) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWHeadf),
tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
// TSDB_FILE_DATA
SDFile *pRDataf = TSDB_READ_DATA_FILE(&(pCommith->readh));
SDFile *pWDataf = TSDB_COMMIT_DATA_FILE(pCommith);
tsdbInitDFileEx(pWDataf, pRDataf);
if (tsdbOpenDFile(pWDataf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWDataf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
pCommith->isDFileSame = true;
// TSDB_FILE_LAST
SDFile *pRLastf = TSDB_READ_LAST_FILE(&(pCommith->readh));
SDFile *pWLastf = TSDB_COMMIT_LAST_FILE(pCommith);
if (pRLastf->info.size < 32 * 1024) {
tsdbInitDFileEx(pWLastf, pRLastf);
pCommith->isLFileSame = true;
if (tsdbOpenDFile(pWLastf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
} else {
tsdbInitDFile(pWLastf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_LAST);
pCommith->isLFileSame = false;
if (tsdbCreateDFile(pWLastf, true) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
(void)tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
}
}
return 0;
}
static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError) {
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
......@@ -1658,8 +1567,8 @@ static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError) {
}
static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg *pCfg = REPO_CFG(pRepo);
int mergeRows = pBlock->numOfRows + pInfo->rowsInserted - pInfo->rowsDeleteSucceed;
ASSERT(mergeRows > 0);
......@@ -1675,28 +1584,27 @@ static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *p
return false;
}
int tsdbApplyRtn(STsdbRepo *pRepo) {
SRtn rtn;
SFSIter fsiter;
STsdbFS * pfs = REPO_FS(pRepo);
SDFileSet *pSet;
// Get retention snapshot
tsdbGetRtnSnap(pRepo, &rtn);
tsdbFSIterInit(&fsiter, pfs, TSDB_FS_ITER_FORWARD);
while ((pSet = tsdbFSIterNext(&fsiter))) {
if (pSet->fid < rtn.minFid) {
tsdbInfo("vgId:%d FSET %d at level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
continue;
}
if (tsdbApplyRtnOnFSet(pRepo, pSet, &rtn) < 0) {
return -1;
}
}
return 0;
}
#endif
\ No newline at end of file
// int tsdbApplyRtn(STsdbRepo *pRepo) {
// SRtn rtn;
// SFSIter fsiter;
// STsdbFS * pfs = REPO_FS(pRepo);
// SDFileSet *pSet;
// // Get retention snapshot
// tsdbGetRtnSnap(pRepo, &rtn);
// tsdbFSIterInit(&fsiter, pfs, TSDB_FS_ITER_FORWARD);
// while ((pSet = tsdbFSIterNext(&fsiter))) {
// if (pSet->fid < rtn.minFid) {
// tsdbInfo("vgId:%d FSET %d at level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
// TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
// continue;
// }
// if (tsdbApplyRtnOnFSet(pRepo, pSet, &rtn) < 0) {
// return -1;
// }
// }
// return 0;
// }
\ No newline at end of file
......@@ -12,6 +12,7 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#include "tsdbint.h"
typedef struct {
......@@ -528,3 +529,5 @@ static int tsdbCompactMeta(STsdbRepo *pRepo) {
return 0;
}
#endif
......@@ -191,7 +191,7 @@ static int tsdbAddDFileSetToStatus(SFSStatus *pStatus, const SDFileSet *pSet) {
}
// ================== STsdbFS
STsdbFS *tsdbNewFS(STsdbCfg *pCfg) {
STsdbFS *tsdbNewFS(const STsdbCfg *pCfg) {
int keep = pCfg->keep;
int days = pCfg->daysPerFile;
int maxFSet = TSDB_MAX_FSETS(keep, days);
......@@ -1287,7 +1287,7 @@ static int tsdbRestoreCurrent(STsdb *pRepo) {
return -1;
}
if (tsdbSaveFSStatus(pRepo->fs.cstatus, REPO_ID(pRepo)) < 0) {
if (tsdbSaveFSStatus(pRepo->fs->cstatus, REPO_ID(pRepo)) < 0) {
tsdbError("vgId:%d failed to restore corrent since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
......
......@@ -15,18 +15,19 @@
#include "tsdbDef.h"
static STsdb *tsdbNew(const char *path, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF);
static STsdb *tsdbNew(const char *path, int32_t vgId, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF,
SMeta *pMeta);
static void tsdbFree(STsdb *pTsdb);
static int tsdbOpenImpl(STsdb *pTsdb);
static void tsdbCloseImpl(STsdb *pTsdb);
STsdb *tsdbOpen(const char *path, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF) {
STsdb *tsdbOpen(const char *path, int32_t vgId, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF, SMeta *pMeta) {
STsdb *pTsdb = NULL;
// Set default TSDB Options
if (pTsdbCfg == NULL) {
// if (pTsdbCfg == NULL) {
pTsdbCfg = &defautlTsdbOptions;
}
// }
// Validate the options
if (tsdbValidateOptions(pTsdbCfg) < 0) {
......@@ -35,7 +36,7 @@ STsdb *tsdbOpen(const char *path, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory
}
// Create the handle
pTsdb = tsdbNew(path, pTsdbCfg, pMAF);
pTsdb = tsdbNew(path, vgId, pTsdbCfg, pMAF, pMeta);
if (pTsdb == NULL) {
// TODO: handle error
return NULL;
......@@ -62,7 +63,8 @@ void tsdbClose(STsdb *pTsdb) {
void tsdbRemove(const char *path) { taosRemoveDir(path); }
/* ------------------------ STATIC METHODS ------------------------ */
static STsdb *tsdbNew(const char *path, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF) {
static STsdb *tsdbNew(const char *path, int32_t vgId, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF,
SMeta *pMeta) {
STsdb *pTsdb = NULL;
pTsdb = (STsdb *)calloc(1, sizeof(STsdb));
......@@ -72,25 +74,32 @@ static STsdb *tsdbNew(const char *path, const STsdbCfg *pTsdbCfg, SMemAllocatorF
}
pTsdb->path = strdup(path);
pTsdb->vgId = vgId;
tsdbOptionsCopy(&(pTsdb->config), pTsdbCfg);
pTsdb->pmaf = pMAF;
pTsdb->pMeta = pMeta;
pTsdb->fs = tsdbNewFS(pTsdbCfg);
return pTsdb;
}
static void tsdbFree(STsdb *pTsdb) {
if (pTsdb) {
tsdbFreeFS(pTsdb->fs);
tfree(pTsdb->path);
free(pTsdb);
}
}
static int tsdbOpenImpl(STsdb *pTsdb) {
tsdbOpenFS(pTsdb);
// TODO
return 0;
}
static void tsdbCloseImpl(STsdb *pTsdb) {
tsdbCloseFS(pTsdb);
// TODO
}
#if 0
......@@ -112,8 +121,8 @@ static void tsdbCloseImpl(STsdb *pTsdb) {
// no test file errors here
#include "taosdef.h"
#include "tsdbint.h"
#include "ttimer.h"
#include "tthread.h"
#include "ttimer.h"
#define IS_VALID_PRECISION(precision) \
(((precision) >= TSDB_TIME_PRECISION_MILLI) && ((precision) <= TSDB_TIME_PRECISION_NANO))
......
......@@ -15,30 +15,6 @@
#include "tsdbDef.h"
typedef struct STbData {
tb_uid_t uid;
TSKEY keyMin;
TSKEY keyMax;
int64_t nrows;
SSkipList *pData;
} STbData;
struct STsdbMemTable {
T_REF_DECLARE()
SRWLatch latch;
TSKEY keyMin;
TSKEY keyMax;
uint64_t nRow;
SMemAllocator *pMA;
// Container
#if 1
SSkipList *pSlIdx; // SSkiplist<STbData>
SHashObj * pHashIdx;
#else
TD_SLIST(STbData) list;
#endif
};
static int tsdbScanAndConvertSubmitMsg(STsdb *pTsdb, SSubmitMsg *pMsg);
static int tsdbMemTableInsertTbData(STsdb *pRepo, SSubmitBlk *pBlock, int32_t *pAffectedRows);
static STbData *tsdbNewTbData(tb_uid_t uid);
......@@ -46,6 +22,7 @@ static void tsdbFreeTbData(STbData *pTbData);
static char * tsdbGetTsTupleKey(const void *data);
static int tsdbTbDataComp(const void *arg1, const void *arg2);
static char * tsdbTbDataGetUid(const void *arg);
static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row);
STsdbMemTable *tsdbNewMemTable(STsdb *pTsdb) {
STsdbMemTable *pMemTable = (STsdbMemTable *)calloc(1, sizeof(*pMemTable));
......@@ -127,6 +104,129 @@ int tsdbMemTableInsert(STsdb *pTsdb, STsdbMemTable *pMemTable, SSubmitMsg *pMsg,
return 0;
}
/**
* This is an important function to load data or try to load data from memory skiplist iterator.
*
* This function load memory data until:
* 1. iterator ends
* 2. data key exceeds maxKey
* 3. rowsIncreased = rowsInserted - rowsDeleteSucceed >= maxRowsToRead
* 4. operations in pCols not exceeds its max capacity if pCols is given
*
* The function tries to procceed AS MUCH AS POSSIBLE.
*/
int tsdbLoadDataFromCache(STable *pTable, SSkipListIterator *pIter, TSKEY maxKey, int maxRowsToRead, SDataCols *pCols,
TKEY *filterKeys, int nFilterKeys, bool keepDup, SMergeInfo *pMergeInfo) {
ASSERT(maxRowsToRead > 0 && nFilterKeys >= 0);
if (pIter == NULL) return 0;
STSchema * pSchema = NULL;
TSKEY rowKey = 0;
TSKEY fKey = 0;
bool isRowDel = false;
int filterIter = 0;
SMemRow row = NULL;
SMergeInfo mInfo;
if (pMergeInfo == NULL) pMergeInfo = &mInfo;
memset(pMergeInfo, 0, sizeof(*pMergeInfo));
pMergeInfo->keyFirst = INT64_MAX;
pMergeInfo->keyLast = INT64_MIN;
if (pCols) tdResetDataCols(pCols);
row = tsdbNextIterRow(pIter);
if (row == NULL || memRowKey(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = memRowKey(row);
isRowDel = memRowDeleted(row);
}
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
while (true) {
if (fKey == INT64_MAX && rowKey == INT64_MAX) break;
if (fKey < rowKey) {
pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, fKey);
pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, fKey);
filterIter++;
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
} else if (fKey > rowKey) {
if (isRowDel) {
pMergeInfo->rowsDeleteFailed++;
} else {
if (pMergeInfo->rowsInserted - pMergeInfo->rowsDeleteSucceed >= maxRowsToRead) break;
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsInserted++;
pMergeInfo->nOperations++;
pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, rowKey);
pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, rowKey);
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
}
tSkipListIterNext(pIter);
row = tsdbNextIterRow(pIter);
if (row == NULL || memRowKey(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = memRowKey(row);
isRowDel = memRowDeleted(row);
}
} else {
if (isRowDel) {
ASSERT(!keepDup);
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsDeleteSucceed++;
pMergeInfo->nOperations++;
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
} else {
if (keepDup) {
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsUpdated++;
pMergeInfo->nOperations++;
pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, rowKey);
pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, rowKey);
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
} else {
pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, fKey);
pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, fKey);
}
}
tSkipListIterNext(pIter);
row = tsdbNextIterRow(pIter);
if (row == NULL || memRowKey(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = memRowKey(row);
isRowDel = memRowDeleted(row);
}
filterIter++;
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
}
}
return 0;
}
static int tsdbScanAndConvertSubmitMsg(STsdb *pTsdb, SSubmitMsg *pMsg) {
ASSERT(pMsg != NULL);
// STsdbMeta * pMeta = pTsdb->tsdbMeta;
......@@ -337,6 +437,21 @@ static char *tsdbTbDataGetUid(const void *arg) {
STbData *pTbData = (STbData *)arg;
return (char *)(&(pTbData->uid));
}
static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row) {
if (pCols) {
if (*ppSchema == NULL || schemaVersion(*ppSchema) != memRowVersion(row)) {
*ppSchema = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row));
if (*ppSchema == NULL) {
ASSERT(false);
return -1;
}
}
tdAppendMemRowToDataCol(row, *ppSchema, pCols, true);
}
return 0;
}
/* ------------------------ REFACTORING ------------------------ */
#if 0
......@@ -674,21 +789,6 @@ int tsdbLoadDataFromCache(STable *pTable, SSkipListIterator *pIter, TSKEY maxKey
}
// ---------------- LOCAL FUNCTIONS ----------------
static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row) {
if (pCols) {
if (*ppSchema == NULL || schemaVersion(*ppSchema) != memRowVersion(row)) {
*ppSchema = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row), (int8_t)memRowType(row));
if (*ppSchema == NULL) {
ASSERT(false);
return -1;
}
}
tdAppendMemRowToDataCol(row, *ppSchema, pCols, true, 0);
}
return 0;
}
static FORCE_INLINE int tsdbCheckRowRange(STsdbRepo *pRepo, STable *pTable, SMemRow row, TSKEY minKey, TSKEY maxKey,
TSKEY now) {
......
......@@ -15,7 +15,16 @@
#include "tsdbDef.h"
const STsdbCfg defautlTsdbOptions = {.lruCacheSize = 0};
const STsdbCfg defautlTsdbOptions = {.precision = 0,
.lruCacheSize = 0,
.daysPerFile = 10,
.minRowsPerFileBlock = 100,
.maxRowsPerFileBlock = 4096,
.keep = 3650,
.keep1 = 3650,
.keep2 = 3650,
.update = 0,
.compression = TWO_STAGE_COMP};
int tsdbOptionsInit(STsdbCfg *pTsdbOptions) {
// TODO
......
......@@ -25,7 +25,6 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int3
static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, int16_t *colIds,
int numOfColIds);
static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol);
static STSchema *tsdbGetTableSchemaImpl(STable *pTable, bool lock, bool copy, int32_t version) { return NULL; }
int tsdbInitReadH(SReadH *pReadh, STsdb *pRepo) {
ASSERT(pReadh != NULL && pRepo != NULL);
......@@ -667,4 +666,3 @@ static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBloc
return 0;
}
aux_source_directory(src TSDB_SRC)
add_library(tsdb STATIC ${TSDB_SRC})
target_include_directories(
tsdb
PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/vnode/tsdb2"
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
)
target_link_libraries(tsdb os util common tfs)
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// #ifndef _TD_TSDB_BUFFER_H_
// #define _TD_TSDB_BUFFER_H_
// typedef struct {
// int64_t blockId;
// int offset;
// int remain;
// char data[];
// } STsdbBufBlock;
// typedef struct {
// pthread_cond_t poolNotEmpty;
// int bufBlockSize;
// int tBufBlocks;
// int nBufBlocks;
// int nRecycleBlocks;
// int nElasticBlocks;
// int64_t index;
// SList* bufBlockList;
// } STsdbBufPool;
// #define TSDB_BUFFER_RESERVE 1024 // Reseve 1K as commit threshold
// STsdbBufPool* tsdbNewBufPool();
// void tsdbFreeBufPool(STsdbBufPool* pBufPool);
// int tsdbOpenBufPool(STsdb* pRepo);
// void tsdbCloseBufPool(STsdb* pRepo);
// SListNode* tsdbAllocBufBlockFromPool(STsdb* pRepo);
// int tsdbExpandPool(STsdb* pRepo, int32_t oldTotalBlocks);
// void tsdbRecycleBufferBlock(STsdbBufPool* pPool, SListNode *pNode, bool bELastic);
// // health cite
// STsdbBufBlock *tsdbNewBufBlock(int bufBlockSize);
// void tsdbFreeBufBlock(STsdbBufBlock *pBufBlock);
// #endif /* _TD_TSDB_BUFFER_H_ */
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_COMMIT_H_
#define _TD_TSDB_COMMIT_H_
typedef struct {
int minFid;
int midFid;
int maxFid;
TSKEY minKey;
} SRtn;
typedef struct {
uint64_t uid;
int64_t offset;
int64_t size;
} SKVRecord;
#define TSDB_DEFAULT_BLOCK_ROWS(maxRows) ((maxRows)*4 / 5)
void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn);
int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord);
void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord);
void *tsdbCommitData(STsdb *pRepo);
int tsdbApplyRtnOnFSet(STsdb *pRepo, SDFileSet *pSet, SRtn *pRtn);
int tsdbWriteBlockInfoImpl(SDFile *pHeadf, STable *pTable, SArray *pSupA, SArray *pSubA, void **ppBuf, SBlockIdx *pIdx);
int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf);
int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDFileAggr, SDataCols *pDataCols,
SBlock *pBlock, bool isLast, bool isSuper, void **ppBuf, void **ppCBuf, void **ppExBuf);
int tsdbApplyRtn(STsdb *pRepo);
static FORCE_INLINE int tsdbGetFidLevel(int fid, SRtn *pRtn) {
if (fid >= pRtn->maxFid) {
return 0;
} else if (fid >= pRtn->midFid) {
return 1;
} else if (fid >= pRtn->minFid) {
return 2;
} else {
return -1;
}
}
#endif /* _TD_TSDB_COMMIT_H_ */
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_FS_H_
#define _TD_TSDB_FS_H_
/**
* 1. The fileset .head/.data/.last use the same fver 0 before 2021.10.10.
* 2. .head fver is 1 when extract aggregate block data from .data/.last file and save to separate .smad/.smal file
* since 2021.10.10
* // TODO update date and add release version.
*/
typedef enum {
TSDB_FS_VER_0 = 0,
TSDB_FS_VER_1,
} ETsdbFsVer;
#define TSDB_FVER_TYPE uint32_t
#define TSDB_LATEST_FVER TSDB_FS_VER_1 // latest version for DFile
#define TSDB_LATEST_SFS_VER TSDB_FS_VER_1 // latest version for 'current' file
static FORCE_INLINE uint32_t tsdbGetDFSVersion(TSDB_FILE_T fType) { // latest version for DFile
switch (fType) {
case TSDB_FILE_HEAD:
return TSDB_FS_VER_1;
default:
return TSDB_FS_VER_0;
}
}
// ================== TSDB global config
extern bool tsdbForceKeepFile;
// ================== CURRENT file header info
typedef struct {
uint32_t version; // Current file system version (relating to code)
uint32_t len; // Encode content length (including checksum)
} SFSHeader;
// ================== TSDB File System Meta
typedef struct {
uint32_t version; // Commit version from 0 to increase
int64_t totalPoints; // total points
int64_t totalStorage; // Uncompressed total storage
} STsdbFSMeta;
// ==================
typedef struct {
STsdbFSMeta meta; // FS meta
SMFile* pmf; // meta file pointer
SMFile mf; // meta file
SArray* df; // data file array
} SFSStatus;
typedef struct {
pthread_rwlock_t lock;
SFSStatus* cstatus; // current status
SHashObj* metaCache; // meta cache
SHashObj* metaCacheComp; // meta cache for compact
bool intxn;
SFSStatus* nstatus; // new status
} STsdbFS;
#define FS_CURRENT_STATUS(pfs) ((pfs)->cstatus)
#define FS_NEW_STATUS(pfs) ((pfs)->nstatus)
#define FS_IN_TXN(pfs) (pfs)->intxn
#define FS_VERSION(pfs) ((pfs)->cstatus->meta.version)
#define FS_TXN_VERSION(pfs) ((pfs)->nstatus->meta.version)
typedef struct {
int direction;
uint64_t version; // current FS version
STsdbFS* pfs;
int index; // used to position next fset when version the same
int fid; // used to seek when version is changed
SDFileSet* pSet;
} SFSIter;
#define TSDB_FS_ITER_FORWARD TSDB_ORDER_ASC
#define TSDB_FS_ITER_BACKWARD TSDB_ORDER_DESC
STsdbFS *tsdbNewFS(STsdbCfg *pCfg);
void * tsdbFreeFS(STsdbFS *pfs);
int tsdbOpenFS(STsdb *pRepo);
void tsdbCloseFS(STsdb *pRepo);
void tsdbStartFSTxn(STsdb *pRepo, int64_t pointsAdd, int64_t storageAdd);
int tsdbEndFSTxn(STsdb *pRepo);
int tsdbEndFSTxnWithError(STsdbFS *pfs);
void tsdbUpdateFSTxnMeta(STsdbFS *pfs, STsdbFSMeta *pMeta);
void tsdbUpdateMFile(STsdbFS *pfs, const SMFile *pMFile);
int tsdbUpdateDFileSet(STsdbFS *pfs, const SDFileSet *pSet);
void tsdbFSIterInit(SFSIter *pIter, STsdbFS *pfs, int direction);
void tsdbFSIterSeek(SFSIter *pIter, int fid);
SDFileSet *tsdbFSIterNext(SFSIter *pIter);
int tsdbLoadMetaCache(STsdb *pRepo, bool recoverMeta);
static FORCE_INLINE int tsdbRLockFS(STsdbFS* pFs) {
int code = pthread_rwlock_rdlock(&(pFs->lock));
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
static FORCE_INLINE int tsdbWLockFS(STsdbFS* pFs) {
int code = pthread_rwlock_wrlock(&(pFs->lock));
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
static FORCE_INLINE int tsdbUnLockFS(STsdbFS* pFs) {
int code = pthread_rwlock_unlock(&(pFs->lock));
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
#endif /* _TD_TSDB_FS_H_ */
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TS_TSDB_FILE_H_
#define _TS_TSDB_FILE_H_
#include "os.h"
#define TSDB_FILE_HEAD_SIZE 512
#define TSDB_FILE_DELIMITER 0xF00AFA0F
#define TSDB_FILE_INIT_MAGIC 0xFFFFFFFF
#define TSDB_IVLD_FID INT_MIN
#define TSDB_FILE_STATE_OK 0
#define TSDB_FILE_STATE_BAD 1
#define TSDB_FILE_INFO(tf) (&((tf)->info))
#define TSDB_FILE_F(tf) (&((tf)->f))
#define TSDB_FILE_FD(tf) ((tf)->fd)
#define TSDB_FILE_FULL_NAME(tf) TFILE_NAME(TSDB_FILE_F(tf))
#define TSDB_FILE_OPENED(tf) (TSDB_FILE_FD(tf) >= 0)
#define TSDB_FILE_CLOSED(tf) (!TSDB_FILE_OPENED(tf))
#define TSDB_FILE_SET_CLOSED(f) (TSDB_FILE_FD(f) = -1)
#define TSDB_FILE_LEVEL(tf) TFILE_LEVEL(TSDB_FILE_F(tf))
#define TSDB_FILE_ID(tf) TFILE_ID(TSDB_FILE_F(tf))
#define TSDB_FILE_FSYNC(tf) taosFsyncFile(TSDB_FILE_FD(tf))
#define TSDB_FILE_STATE(tf) ((tf)->state)
#define TSDB_FILE_SET_STATE(tf, s) ((tf)->state = (s))
#define TSDB_FILE_IS_OK(tf) (TSDB_FILE_STATE(tf) == TSDB_FILE_STATE_OK)
#define TSDB_FILE_IS_BAD(tf) (TSDB_FILE_STATE(tf) == TSDB_FILE_STATE_BAD)
#define ASSERT_TSDB_FSET_NFILES_VALID(s) \
do { \
uint8_t nDFiles = tsdbGetNFiles(s); \
ASSERT((nDFiles >= TSDB_FILE_MIN) && (nDFiles <= TSDB_FILE_MAX)); \
} while (0)
typedef enum {
TSDB_FILE_HEAD = 0,
TSDB_FILE_DATA,
TSDB_FILE_LAST,
TSDB_FILE_SMAD, // sma for .data
TSDB_FILE_SMAL, // sma for .last
TSDB_FILE_MAX,
TSDB_FILE_META
} TSDB_FILE_T;
#define TSDB_FILE_MIN 3U // min valid number of files in one DFileSet(.head/.data/.last)
// =============== SMFile
typedef struct {
int64_t size;
int64_t tombSize;
int64_t nRecords;
int64_t nDels;
uint32_t magic;
} SMFInfo;
typedef struct {
SMFInfo info;
TFILE f;
int fd;
uint8_t state;
} SMFile;
void tsdbInitMFile(SMFile* pMFile, SDiskID did, int vid, uint32_t ver);
void tsdbInitMFileEx(SMFile* pMFile, const SMFile* pOMFile);
int tsdbEncodeSMFile(void** buf, SMFile* pMFile);
void* tsdbDecodeSMFile(void* buf, SMFile* pMFile);
int tsdbEncodeSMFileEx(void** buf, SMFile* pMFile);
void* tsdbDecodeSMFileEx(void* buf, SMFile* pMFile);
int tsdbApplyMFileChange(SMFile* from, SMFile* to);
int tsdbCreateMFile(SMFile* pMFile, bool updateHeader);
int tsdbUpdateMFileHeader(SMFile* pMFile);
int tsdbLoadMFileHeader(SMFile* pMFile, SMFInfo* pInfo);
int tsdbScanAndTryFixMFile(STsdb* pRepo);
int tsdbEncodeMFInfo(void** buf, SMFInfo* pInfo);
void* tsdbDecodeMFInfo(void* buf, SMFInfo* pInfo);
static FORCE_INLINE void tsdbSetMFileInfo(SMFile* pMFile, SMFInfo* pInfo) { pMFile->info = *pInfo; }
static FORCE_INLINE int tsdbOpenMFile(SMFile* pMFile, int flags) {
ASSERT(TSDB_FILE_CLOSED(pMFile));
pMFile->fd = open(TSDB_FILE_FULL_NAME(pMFile), flags | O_BINARY);
if (pMFile->fd < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return 0;
}
static FORCE_INLINE void tsdbCloseMFile(SMFile* pMFile) {
if (TSDB_FILE_OPENED(pMFile)) {
close(pMFile->fd);
TSDB_FILE_SET_CLOSED(pMFile);
}
}
static FORCE_INLINE int64_t tsdbSeekMFile(SMFile* pMFile, int64_t offset, int whence) {
ASSERT(TSDB_FILE_OPENED(pMFile));
int64_t loffset = taosLSeekFile(TSDB_FILE_FD(pMFile), offset, whence);
if (loffset < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return loffset;
}
static FORCE_INLINE int64_t tsdbWriteMFile(SMFile* pMFile, void* buf, int64_t nbyte) {
ASSERT(TSDB_FILE_OPENED(pMFile));
int64_t nwrite = taosWriteFile(pMFile->fd, buf, nbyte);
if (nwrite < nbyte) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return nwrite;
}
static FORCE_INLINE void tsdbUpdateMFileMagic(SMFile* pMFile, void* pCksum) {
pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t*)(pCksum), sizeof(TSCKSUM));
}
static FORCE_INLINE int tsdbAppendMFile(SMFile* pMFile, void* buf, int64_t nbyte, int64_t* offset) {
ASSERT(TSDB_FILE_OPENED(pMFile));
int64_t toffset;
if ((toffset = tsdbSeekMFile(pMFile, 0, SEEK_END)) < 0) {
return -1;
}
ASSERT(pMFile->info.size == toffset);
if (offset) {
*offset = toffset;
}
if (tsdbWriteMFile(pMFile, buf, nbyte) < 0) {
return -1;
}
pMFile->info.size += nbyte;
return (int)nbyte;
}
static FORCE_INLINE int tsdbRemoveMFile(SMFile* pMFile) { return tfsremove(TSDB_FILE_F(pMFile)); }
static FORCE_INLINE int64_t tsdbReadMFile(SMFile* pMFile, void* buf, int64_t nbyte) {
ASSERT(TSDB_FILE_OPENED(pMFile));
int64_t nread = taosReadFile(pMFile->fd, buf, nbyte);
if (nread < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return nread;
}
// =============== SDFile
typedef struct {
uint32_t magic;
uint32_t len;
uint32_t totalBlocks;
uint32_t totalSubBlocks;
uint32_t offset;
uint64_t size;
uint64_t tombSize;
uint32_t fver;
} SDFInfo;
typedef struct {
SDFInfo info;
TFILE f;
int fd;
uint8_t state;
} SDFile;
void tsdbInitDFile(SDFile* pDFile, SDiskID did, int vid, int fid, uint32_t ver, TSDB_FILE_T ftype);
void tsdbInitDFileEx(SDFile* pDFile, SDFile* pODFile);
int tsdbEncodeSDFile(void** buf, SDFile* pDFile);
void* tsdbDecodeSDFile(void* buf, SDFile* pDFile, uint32_t sfver);
int tsdbCreateDFile(SDFile* pDFile, bool updateHeader, TSDB_FILE_T ftype);
int tsdbUpdateDFileHeader(SDFile* pDFile);
int tsdbLoadDFileHeader(SDFile* pDFile, SDFInfo* pInfo);
int tsdbParseDFilename(const char* fname, int* vid, int* fid, TSDB_FILE_T* ftype, uint32_t* version);
static FORCE_INLINE void tsdbSetDFileInfo(SDFile* pDFile, SDFInfo* pInfo) { pDFile->info = *pInfo; }
static FORCE_INLINE int tsdbOpenDFile(SDFile* pDFile, int flags) {
ASSERT(!TSDB_FILE_OPENED(pDFile));
pDFile->fd = open(TSDB_FILE_FULL_NAME(pDFile), flags | O_BINARY);
if (pDFile->fd < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return 0;
}
static FORCE_INLINE void tsdbCloseDFile(SDFile* pDFile) {
if (TSDB_FILE_OPENED(pDFile)) {
close(pDFile->fd);
TSDB_FILE_SET_CLOSED(pDFile);
}
}
static FORCE_INLINE int64_t tsdbSeekDFile(SDFile* pDFile, int64_t offset, int whence) {
ASSERT(TSDB_FILE_OPENED(pDFile));
int64_t loffset = taosLSeekFile(TSDB_FILE_FD(pDFile), offset, whence);
if (loffset < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return loffset;
}
static FORCE_INLINE int64_t tsdbWriteDFile(SDFile* pDFile, void* buf, int64_t nbyte) {
ASSERT(TSDB_FILE_OPENED(pDFile));
int64_t nwrite = taosWriteFile(pDFile->fd, buf, nbyte);
if (nwrite < nbyte) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return nwrite;
}
static FORCE_INLINE void tsdbUpdateDFileMagic(SDFile* pDFile, void* pCksm) {
pDFile->info.magic = taosCalcChecksum(pDFile->info.magic, (uint8_t*)(pCksm), sizeof(TSCKSUM));
}
static FORCE_INLINE int tsdbAppendDFile(SDFile* pDFile, void* buf, int64_t nbyte, int64_t* offset) {
ASSERT(TSDB_FILE_OPENED(pDFile));
int64_t toffset;
if ((toffset = tsdbSeekDFile(pDFile, 0, SEEK_END)) < 0) {
return -1;
}
ASSERT(pDFile->info.size == toffset);
if (offset) {
*offset = toffset;
}
if (tsdbWriteDFile(pDFile, buf, nbyte) < 0) {
return -1;
}
pDFile->info.size += nbyte;
return (int)nbyte;
}
static FORCE_INLINE int tsdbRemoveDFile(SDFile* pDFile) { return tfsremove(TSDB_FILE_F(pDFile)); }
static FORCE_INLINE int64_t tsdbReadDFile(SDFile* pDFile, void* buf, int64_t nbyte) {
ASSERT(TSDB_FILE_OPENED(pDFile));
int64_t nread = taosReadFile(pDFile->fd, buf, nbyte);
if (nread < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return nread;
}
static FORCE_INLINE int tsdbCopyDFile(SDFile* pSrc, SDFile* pDest) {
if (tfscopy(TSDB_FILE_F(pSrc), TSDB_FILE_F(pDest)) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
tsdbSetDFileInfo(pDest, TSDB_FILE_INFO(pSrc));
return 0;
}
// =============== SDFileSet
typedef struct {
int fid;
int state;
uint16_t ver; // fset version
SDFile files[TSDB_FILE_MAX];
} SDFileSet;
typedef enum {
TSDB_FSET_VER_0 = 0, // .head/.data/.last
TSDB_FSET_VER_1, // .head/.data/.last/.smad/.smal
} ETsdbFSetVer;
#define TSDB_LATEST_FSET_VER TSDB_FSET_VER_1
// get nDFiles in SDFileSet
static FORCE_INLINE uint8_t tsdbGetNFiles(SDFileSet* pSet) {
switch (pSet->ver) {
case TSDB_FSET_VER_0:
return TSDB_FILE_MIN;
case TSDB_FSET_VER_1:
default:
return TSDB_FILE_MAX;
}
}
#define TSDB_FSET_FID(s) ((s)->fid)
#define TSDB_DFILE_IN_SET(s, t) ((s)->files + (t))
#define TSDB_FSET_LEVEL(s) TSDB_FILE_LEVEL(TSDB_DFILE_IN_SET(s, 0))
#define TSDB_FSET_ID(s) TSDB_FILE_ID(TSDB_DFILE_IN_SET(s, 0))
#define TSDB_FSET_SET_CLOSED(s) \
do { \
for (TSDB_FILE_T ftype = TSDB_FILE_HEAD; ftype < TSDB_FILE_MAX; ftype++) { \
TSDB_FILE_SET_CLOSED(TSDB_DFILE_IN_SET(s, ftype)); \
} \
} while (0);
#define TSDB_FSET_FSYNC(s) \
do { \
for (TSDB_FILE_T ftype = TSDB_FILE_HEAD; ftype < tsdbGetNFiles(s); ftype++) { \
TSDB_FILE_FSYNC(TSDB_DFILE_IN_SET(s, ftype)); \
} \
} while (0);
void tsdbInitDFileSet(SDFileSet* pSet, SDiskID did, int vid, int fid, uint32_t ver, uint16_t fsetVer);
void tsdbInitDFileSetEx(SDFileSet* pSet, SDFileSet* pOSet);
int tsdbEncodeDFileSet(void** buf, SDFileSet* pSet);
void* tsdbDecodeDFileSet(void* buf, SDFileSet* pSet, uint32_t sfver);
int tsdbEncodeDFileSetEx(void** buf, SDFileSet* pSet);
void* tsdbDecodeDFileSetEx(void* buf, SDFileSet* pSet);
int tsdbApplyDFileSetChange(SDFileSet* from, SDFileSet* to);
int tsdbCreateDFileSet(SDFileSet* pSet, bool updateHeader);
int tsdbUpdateDFileSetHeader(SDFileSet* pSet);
int tsdbScanAndTryFixDFileSet(STsdb* pRepo, SDFileSet* pSet);
static FORCE_INLINE void tsdbCloseDFileSet(SDFileSet* pSet) {
ASSERT_TSDB_FSET_NFILES_VALID(pSet);
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
tsdbCloseDFile(TSDB_DFILE_IN_SET(pSet, ftype));
}
}
static FORCE_INLINE int tsdbOpenDFileSet(SDFileSet* pSet, int flags) {
ASSERT_TSDB_FSET_NFILES_VALID(pSet);
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
if (tsdbOpenDFile(TSDB_DFILE_IN_SET(pSet, ftype), flags) < 0) {
tsdbCloseDFileSet(pSet);
return -1;
}
}
return 0;
}
static FORCE_INLINE void tsdbRemoveDFileSet(SDFileSet* pSet) {
ASSERT_TSDB_FSET_NFILES_VALID(pSet);
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
(void)tsdbRemoveDFile(TSDB_DFILE_IN_SET(pSet, ftype));
}
}
static FORCE_INLINE int tsdbCopyDFileSet(SDFileSet* pSrc, SDFileSet* pDest) {
ASSERT_TSDB_FSET_NFILES_VALID(pSrc);
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSrc); ftype++) {
if (tsdbCopyDFile(TSDB_DFILE_IN_SET(pSrc, ftype), TSDB_DFILE_IN_SET(pDest, ftype)) < 0) {
tsdbRemoveDFileSet(pDest);
return -1;
}
}
return 0;
}
static FORCE_INLINE void tsdbGetFidKeyRange(int days, int8_t precision, int fid, TSKEY* minKey, TSKEY* maxKey) {
*minKey = fid * days * tsTickPerDay[precision];
*maxKey = *minKey + days * tsTickPerDay[precision] - 1;
}
static FORCE_INLINE bool tsdbFSetIsOk(SDFileSet* pSet) {
for (TSDB_FILE_T ftype = 0; ftype < TSDB_FILE_MAX; ftype++) {
if (TSDB_FILE_IS_BAD(TSDB_DFILE_IN_SET(pSet, ftype))) {
return false;
}
}
return true;
}
#endif /* _TS_TSDB_FILE_H_ */
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_LOG_H_
#define _TD_TSDB_LOG_H_
extern int32_t tsdbDebugFlag;
#define tsdbFatal(...) do { if (tsdbDebugFlag & DEBUG_FATAL) { taosPrintLog("TDB FATAL ", 255, __VA_ARGS__); }} while(0)
#define tsdbError(...) do { if (tsdbDebugFlag & DEBUG_ERROR) { taosPrintLog("TDB ERROR ", 255, __VA_ARGS__); }} while(0)
#define tsdbWarn(...) do { if (tsdbDebugFlag & DEBUG_WARN) { taosPrintLog("TDB WARN ", 255, __VA_ARGS__); }} while(0)
#define tsdbInfo(...) do { if (tsdbDebugFlag & DEBUG_INFO) { taosPrintLog("TDB ", 255, __VA_ARGS__); }} while(0)
#define tsdbDebug(...) do { if (tsdbDebugFlag & DEBUG_DEBUG) { taosPrintLog("TDB ", tsdbDebugFlag, __VA_ARGS__); }} while(0)
#define tsdbTrace(...) do { if (tsdbDebugFlag & DEBUG_TRACE) { taosPrintLog("TDB ", tsdbDebugFlag, __VA_ARGS__); }} while(0)
#endif /* _TD_TSDB_LOG_H_ */
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_MEMTABLE_H_
#define _TD_TSDB_MEMTABLE_H_
typedef struct {
int rowsInserted;
int rowsUpdated;
int rowsDeleteSucceed;
int rowsDeleteFailed;
int nOperations;
TSKEY keyFirst;
TSKEY keyLast;
} SMergeInfo;
typedef struct {
STable * pTable;
SSkipListIterator *pIter;
} SCommitIter;
struct STableData {
uint64_t uid;
TSKEY keyFirst;
TSKEY keyLast;
int64_t numOfRows;
SSkipList* pData;
T_REF_DECLARE()
};
enum { TSDB_UPDATE_META, TSDB_DROP_META };
#ifdef WINDOWS
#pragma pack(push ,1)
typedef struct {
#else
typedef struct __attribute__((packed)){
#endif
char act;
uint64_t uid;
} SActObj;
#ifdef WINDOWS
#pragma pack(pop)
#endif
typedef struct {
int len;
char cont[];
} SActCont;
int tsdbRefMemTable(STsdb* pRepo, SMemTable* pMemTable);
int tsdbUnRefMemTable(STsdb* pRepo, SMemTable* pMemTable);
int tsdbTakeMemSnapshot(STsdb* pRepo, SMemSnapshot* pSnapshot, SArray* pATable);
void tsdbUnTakeMemSnapShot(STsdb* pRepo, SMemSnapshot* pSnapshot);
void* tsdbAllocBytes(STsdb* pRepo, int bytes);
int tsdbAsyncCommit(STsdb* pRepo);
int tsdbSyncCommitConfig(STsdb* pRepo);
int tsdbLoadDataFromCache(STable* pTable, SSkipListIterator* pIter, TSKEY maxKey, int maxRowsToRead, SDataCols* pCols,
TKEY* filterKeys, int nFilterKeys, bool keepDup, SMergeInfo* pMergeInfo);
void* tsdbCommitData(STsdb* pRepo);
static FORCE_INLINE SMemRow tsdbNextIterRow(SSkipListIterator* pIter) {
if (pIter == NULL) return NULL;
SSkipListNode* node = tSkipListIterGet(pIter);
if (node == NULL) return NULL;
return (SMemRow)SL_GET_NODE_DATA(node);
}
static FORCE_INLINE TSKEY tsdbNextIterKey(SSkipListIterator* pIter) {
SMemRow row = tsdbNextIterRow(pIter);
if (row == NULL) return TSDB_DATA_TIMESTAMP_NULL;
return memRowKey(row);
}
static FORCE_INLINE TKEY tsdbNextIterTKey(SSkipListIterator* pIter) {
SMemRow row = tsdbNextIterRow(pIter);
if (row == NULL) return TKEY_NULL;
return memRowTKey(row);
}
#endif /* _TD_TSDB_MEMTABLE_H_ */
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_MEMORY_H_
#define _TD_TSDB_MEMORY_H_
static void * taosTMalloc(size_t size);
static void * taosTCalloc(size_t nmemb, size_t size);
static void * taosTRealloc(void *ptr, size_t size);
static void * taosTZfree(void *ptr);
static size_t taosTSizeof(void *ptr);
static void taosTMemset(void *ptr, int c);
static FORCE_INLINE void *taosTMalloc(size_t size) {
if (size <= 0) return NULL;
void *ret = malloc(size + sizeof(size_t));
if (ret == NULL) return NULL;
*(size_t *)ret = size;
return (void *)((char *)ret + sizeof(size_t));
}
static FORCE_INLINE void *taosTCalloc(size_t nmemb, size_t size) {
size_t tsize = nmemb * size;
void * ret = taosTMalloc(tsize);
if (ret == NULL) return NULL;
taosTMemset(ret, 0);
return ret;
}
static FORCE_INLINE size_t taosTSizeof(void *ptr) { return (ptr) ? (*(size_t *)((char *)ptr - sizeof(size_t))) : 0; }
static FORCE_INLINE void taosTMemset(void *ptr, int c) { memset(ptr, c, taosTSizeof(ptr)); }
static FORCE_INLINE void * taosTRealloc(void *ptr, size_t size) {
if (ptr == NULL) return taosTMalloc(size);
if (size <= taosTSizeof(ptr)) return ptr;
void * tptr = (void *)((char *)ptr - sizeof(size_t));
size_t tsize = size + sizeof(size_t);
void* tptr1 = realloc(tptr, tsize);
if (tptr1 == NULL) return NULL;
tptr = tptr1;
*(size_t *)tptr = size;
return (void *)((char *)tptr + sizeof(size_t));
}
static FORCE_INLINE void* taosTZfree(void* ptr) {
if (ptr) {
free((void*)((char*)ptr - sizeof(size_t)));
}
return NULL;
}
#endif /* _TD_TSDB_MEMORY_H_ */
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_META_H_
#define _TD_TSDB_META_H_
#include "tskiplist.h"
#define TSDB_MAX_TABLE_SCHEMAS 16
#pragma pack(push, 1)
typedef struct jsonMapValue {
void* table; // STable *
int16_t colId; // the json col ID.
} JsonMapValue;
#pragma pack(pop)
typedef struct STable {
STableId tableId;
ETableType type;
tstr* name; // NOTE: there a flexible string here
uint64_t suid;
struct STable* pSuper; // super table pointer
SArray* schema;
STSchema* tagSchema;
SKVRow tagVal;
SSkipList* pIndex; // For TSDB_SUPER_TABLE, it is the skiplist index
SHashObj* jsonKeyMap; // For json tag key {"key":[t1, t2, t3]}
void* eventHandler; // TODO
void* streamHandler; // TODO
TSKEY lastKey;
SMemRow lastRow;
char* sql;
void* cqhandle;
SRWLatch latch; // TODO: implementa latch functions
SDataCol* lastCols;
int16_t maxColNum;
int16_t restoreColumnNum;
bool hasRestoreLastColumn;
int lastColSVersion;
int16_t cacheLastConfigVersion;
T_REF_DECLARE()
} STable;
typedef struct {
pthread_rwlock_t rwLock;
int32_t nTables;
int32_t maxTables;
STable** tables;
SList* superList;
SHashObj* uidMap;
int maxRowBytes;
int maxCols;
} STsdbMeta;
#define TSDB_INIT_NTABLES 1024
#define TABLE_TYPE(t) (t)->type
#define TABLE_NAME(t) (t)->name
#define TABLE_CHAR_NAME(t) TABLE_NAME(t)->data
#define TABLE_UID(t) (t)->tableId.uid
#define TABLE_TID(t) (t)->tableId.tid
#define TABLE_SUID(t) (t)->suid
// #define TSDB_META_FILE_MAGIC(m) KVSTORE_MAGIC((m)->pStore)
#define TSDB_RLOCK_TABLE(t) taosRLockLatch(&((t)->latch))
#define TSDB_RUNLOCK_TABLE(t) taosRUnLockLatch(&((t)->latch))
#define TSDB_WLOCK_TABLE(t) taosWLockLatch(&((t)->latch))
#define TSDB_WUNLOCK_TABLE(t) taosWUnLockLatch(&((t)->latch))
STsdbMeta* tsdbNewMeta(STsdbCfg* pCfg);
void tsdbFreeMeta(STsdbMeta* pMeta);
int tsdbOpenMeta(STsdb* pRepo);
int tsdbCloseMeta(STsdb* pRepo);
STable* tsdbGetTableByUid(STsdbMeta* pMeta, uint64_t uid);
STSchema* tsdbGetTableSchemaByVersion(STable* pTable, int16_t _version, int8_t rowType);
int tsdbWLockRepoMeta(STsdb* pRepo);
int tsdbRLockRepoMeta(STsdb* pRepo);
int tsdbUnlockRepoMeta(STsdb* pRepo);
void tsdbRefTable(STable* pTable);
void tsdbUnRefTable(STable* pTable);
void tsdbUpdateTableSchema(STsdb* pRepo, STable* pTable, STSchema* pSchema, bool insertAct);
int tsdbRestoreTable(STsdb* pRepo, void* cont, int contLen);
void tsdbOrgMeta(STsdb* pRepo);
int tsdbInitColIdCacheWithSchema(STable* pTable, STSchema* pSchema);
int16_t tsdbGetLastColumnsIndexByColId(STable* pTable, int16_t colId);
int tsdbUpdateLastColSchema(STable* pTable, STSchema* pNewSchema);
STSchema* tsdbGetTableLatestSchema(STable* pTable);
void tsdbFreeLastColumns(STable* pTable);
int tsdbCompareJsonMapValue(const void* a, const void* b);
void* tsdbGetJsonTagValue(STable* pTable, char* key, int32_t keyLen, int16_t* colId);
static FORCE_INLINE int tsdbCompareSchemaVersion(const void* key1, const void* key2) {
if (*(int16_t*)key1 < schemaVersion(*(STSchema**)key2)) {
return -1;
} else if (*(int16_t*)key1 > schemaVersion(*(STSchema**)key2)) {
return 1;
} else {
return 0;
}
}
static FORCE_INLINE STSchema* tsdbGetTableSchemaImpl(STable* pTable, bool lock, bool copy, int16_t _version,
int8_t rowType) {
STable* pDTable = (pTable->pSuper != NULL) ? pTable->pSuper : pTable; // for performance purpose
STSchema* pSchema = NULL;
STSchema* pTSchema = NULL;
if (lock) TSDB_RLOCK_TABLE(pDTable);
if (_version < 0) { // get the latest version of schema
pTSchema = *(STSchema**)taosArrayGetLast(pDTable->schema);
} else { // get the schema with version
void* ptr = taosArraySearch(pDTable->schema, &_version, tsdbCompareSchemaVersion, TD_EQ);
if (ptr == NULL) {
if (rowType == SMEM_ROW_KV) {
ptr = taosArrayGetLast(pDTable->schema);
} else {
terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION;
goto _exit;
}
}
pTSchema = *(STSchema**)ptr;
}
ASSERT(pTSchema != NULL);
if (copy) {
if ((pSchema = tdDupSchema(pTSchema)) == NULL) terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
} else {
pSchema = pTSchema;
}
_exit:
if (lock) TSDB_RUNLOCK_TABLE(pDTable);
return pSchema;
}
static FORCE_INLINE STSchema* tsdbGetTableSchema(STable* pTable) {
return tsdbGetTableSchemaImpl(pTable, false, false, -1, -1);
}
static FORCE_INLINE STSchema* tsdbGetTableTagSchema(STable* pTable) {
if (pTable->type == TSDB_CHILD_TABLE) { // check child table first
STable* pSuper = pTable->pSuper;
if (pSuper == NULL) return NULL;
return pSuper->tagSchema;
} else if (pTable->type == TSDB_SUPER_TABLE) {
return pTable->tagSchema;
} else {
return NULL;
}
}
static FORCE_INLINE TSKEY tsdbGetTableLastKeyImpl(STable* pTable) {
ASSERT((pTable->lastRow == NULL) || (pTable->lastKey == memRowKey(pTable->lastRow)));
return pTable->lastKey;
}
#endif /* _TD_TSDB_META_H_ */
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_READ_IMPL_H_
#define _TD_TSDB_READ_IMPL_H_
#include "os.h"
#include "tfs.h"
#include "tsdb.h"
#include "tsdbFile.h"
#include "tsdbMemory.h"
#include "tsdbMeta.h"
#include "tskiplist.h"
typedef struct SReadH SReadH;
typedef struct {
int32_t tid;
uint32_t len;
uint32_t offset;
uint32_t hasLast : 2;
uint32_t numOfBlocks : 30;
uint64_t uid;
TSKEY maxKey;
} SBlockIdx;
#if 0
typedef struct {
int64_t last : 1;
int64_t offset : 63;
int32_t algorithm : 8;
int32_t numOfRows : 24;
int32_t len;
int32_t keyLen; // key column length, keyOffset = offset+sizeof(SBlockData)+sizeof(SBlockCol)*numOfCols
int16_t numOfSubBlocks;
int16_t numOfCols; // not including timestamp column
TSKEY keyFirst;
TSKEY keyLast;
} SBlock;
#endif
/**
* keyLen; // key column length, keyOffset = offset+sizeof(SBlockData)+sizeof(SBlockCol)*numOfCols
* numOfCols; // not including timestamp column
*/
#define SBlockFieldsP0 \
int64_t last : 1; \
int64_t offset : 63; \
int32_t algorithm : 8; \
int32_t numOfRows : 24; \
int32_t len; \
int32_t keyLen; \
int16_t numOfSubBlocks; \
int16_t numOfCols; \
TSKEY keyFirst; \
TSKEY keyLast
/**
* aggrStat; // only valid when blkVer > 0. 0 - no aggr part in .data/.last/.smad/.smal, 1 - has aggr in .smad/.smal
* blkVer; // 0 - original block, 1 - block since importing .smad/.smal
* aggrOffset; // only valid when blkVer > 0 and aggrStat > 0
*/
#define SBlockFieldsP1 \
uint64_t aggrStat : 1; \
uint64_t blkVer : 7; \
uint64_t aggrOffset : 56
typedef struct {
SBlockFieldsP0;
} SBlockV0;
typedef struct {
SBlockFieldsP0;
SBlockFieldsP1;
} SBlockV1;
typedef enum {
TSDB_SBLK_VER_0 = 0,
TSDB_SBLK_VER_1,
} ESBlockVer;
#define SBlockVerLatest TSDB_SBLK_VER_1
#define SBlock SBlockV1 // latest SBlock definition
// lastest SBlockInfo definition
typedef struct {
int32_t delimiter; // For recovery usage
int32_t tid;
uint64_t uid;
SBlock blocks[];
} SBlockInfo;
typedef struct {
int16_t colId;
int32_t len;
uint32_t type : 8;
uint32_t offset : 24;
int64_t sum;
int64_t max;
int64_t min;
int16_t maxIndex;
int16_t minIndex;
int16_t numOfNull;
uint8_t offsetH;
char padding[1];
} SBlockColV0;
typedef struct {
int16_t colId;
uint8_t offsetH;
uint8_t reserved; // reserved field, not used
int32_t len;
uint32_t type : 8;
uint32_t offset : 24;
} SBlockColV1;
#define SBlockCol SBlockColV1 // latest SBlockCol definition
typedef struct {
int16_t colId;
int16_t maxIndex;
int16_t minIndex;
int16_t numOfNull;
int64_t sum;
int64_t max;
int64_t min;
} SAggrBlkColV1;
#define SAggrBlkCol SAggrBlkColV1 // latest SAggrBlkCol definition
// Code here just for back-ward compatibility
static FORCE_INLINE void tsdbSetBlockColOffset(SBlockCol *pBlockCol, uint32_t offset) {
pBlockCol->offset = offset & ((((uint32_t)1) << 24) - 1);
pBlockCol->offsetH = (uint8_t)(offset >> 24);
}
static FORCE_INLINE uint32_t tsdbGetBlockColOffset(SBlockCol *pBlockCol) {
uint32_t offset1 = pBlockCol->offset;
uint32_t offset2 = pBlockCol->offsetH;
return (offset1 | (offset2 << 24));
}
typedef struct {
int32_t delimiter; // For recovery usage
int32_t numOfCols; // For recovery usage
uint64_t uid; // For recovery usage
SBlockCol cols[];
} SBlockData;
typedef void SAggrBlkData; // SBlockCol cols[];
struct SReadH {
STsdb * pRepo;
SDFileSet rSet; // FSET to read
SArray * aBlkIdx; // SBlockIdx array
STable * pTable; // table to read
SBlockIdx * pBlkIdx; // current reading table SBlockIdx
int cidx;
SBlockInfo * pBlkInfo; // SBlockInfoV#
SBlockData * pBlkData; // Block info
SAggrBlkData *pAggrBlkData; // Aggregate Block info
SDataCols * pDCols[2];
void * pBuf; // buffer
void * pCBuf; // compression buffer
void * pExBuf; // extra buffer
};
#define TSDB_READ_REPO(rh) ((rh)->pRepo)
#define TSDB_READ_REPO_ID(rh) REPO_ID(TSDB_READ_REPO(rh))
#define TSDB_READ_FSET(rh) (&((rh)->rSet))
#define TSDB_READ_TABLE(rh) ((rh)->pTable)
#define TSDB_READ_HEAD_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_HEAD)
#define TSDB_READ_DATA_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_DATA)
#define TSDB_READ_LAST_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_LAST)
#define TSDB_READ_SMAD_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_SMAD)
#define TSDB_READ_SMAL_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_SMAL)
#define TSDB_READ_BUF(rh) ((rh)->pBuf)
#define TSDB_READ_COMP_BUF(rh) ((rh)->pCBuf)
#define TSDB_READ_EXBUF(rh) ((rh)->pExBuf)
#define TSDB_BLOCK_STATIS_SIZE(ncols, blkVer) \
(sizeof(SBlockData) + sizeof(SBlockColV##blkVer) * (ncols) + sizeof(TSCKSUM))
static FORCE_INLINE size_t tsdbBlockStatisSize(int nCols, uint32_t blkVer) {
switch (blkVer) {
case TSDB_SBLK_VER_0:
return TSDB_BLOCK_STATIS_SIZE(nCols, 0);
case TSDB_SBLK_VER_1:
default:
return TSDB_BLOCK_STATIS_SIZE(nCols, 1);
}
}
#define TSDB_BLOCK_AGGR_SIZE(ncols, blkVer) (sizeof(SAggrBlkColV##blkVer) * (ncols) + sizeof(TSCKSUM))
static FORCE_INLINE size_t tsdbBlockAggrSize(int nCols, uint32_t blkVer) {
switch (blkVer) {
case TSDB_SBLK_VER_0:
ASSERT(false);
return 0;
case TSDB_SBLK_VER_1:
default:
return TSDB_BLOCK_AGGR_SIZE(nCols, 1);
}
}
int tsdbInitReadH(SReadH *pReadh, STsdb *pRepo);
void tsdbDestroyReadH(SReadH *pReadh);
int tsdbSetAndOpenReadFSet(SReadH *pReadh, SDFileSet *pSet);
void tsdbCloseAndUnsetFSet(SReadH *pReadh);
int tsdbLoadBlockIdx(SReadH *pReadh);
int tsdbSetReadTable(SReadH *pReadh, STable *pTable);
int tsdbLoadBlockInfo(SReadH *pReadh, void **pTarget, uint32_t *extendedLen);
int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlockInfo);
int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, int16_t *colIds, int numOfColsIds);
int tsdbLoadBlockStatis(SReadH *pReadh, SBlock *pBlock);
int tsdbLoadBlockOffset(SReadH *pReadh, SBlock *pBlock);
int tsdbEncodeSBlockIdx(void **buf, SBlockIdx *pIdx);
void *tsdbDecodeSBlockIdx(void *buf, SBlockIdx *pIdx);
void tsdbGetBlockStatis(SReadH *pReadh, SDataStatis *pStatis, int numOfCols, SBlock *pBlock);
static FORCE_INLINE int tsdbMakeRoom(void **ppBuf, size_t size) {
void * pBuf = *ppBuf;
size_t tsize = taosTSizeof(pBuf);
if (tsize < size) {
if (tsize == 0) tsize = 1024;
while (tsize < size) {
tsize *= 2;
}
*ppBuf = taosTRealloc(pBuf, tsize);
if (*ppBuf == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
}
return 0;
}
static FORCE_INLINE SBlockCol *tsdbGetSBlockCol(SBlock *pBlock, SBlockCol **pDestBlkCol, SBlockCol *pBlkCols,
int colIdx) {
if (pBlock->blkVer == SBlockVerLatest) {
*pDestBlkCol = pBlkCols + colIdx;
return *pDestBlkCol;
}
if (pBlock->blkVer == TSDB_SBLK_VER_0) {
SBlockColV0 *pBlkCol = (SBlockColV0 *)pBlkCols + colIdx;
(*pDestBlkCol)->colId = pBlkCol->colId;
(*pDestBlkCol)->len = pBlkCol->len;
(*pDestBlkCol)->type = pBlkCol->type;
(*pDestBlkCol)->offset = pBlkCol->offset;
(*pDestBlkCol)->offsetH = pBlkCol->offsetH;
}
return *pDestBlkCol;
}
#endif /*_TD_TSDB_READ_IMPL_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_INT_H_
#define _TD_TSDB_INT_H_
#include "os.h"
#include "taosdef.h"
#include "taoserror.h"
#include "tarray.h"
#include "tchecksum.h"
#include "tcoding.h"
#include "tcompression.h"
#include "tdataformat.h"
#include "tfs.h"
#include "thash.h"
#include "tlist.h"
#include "tlockfree.h"
#include "tlog.h"
#include "tsdbMemory.h"
#include "tskiplist.h"
#include "tsdb.h"
#ifdef __cplusplus
extern "C" {
#endif
// Log
#include "tsdbLog.h"
// Meta
#include "tsdbMeta.h"
// // Buffer
// #include "tsdbBuffer.h"
// MemTable
#include "tsdbMemTable.h"
// File
#include "tsdbFile.h"
// FS
#include "tsdbFS.h"
// ReadImpl
#include "tsdbReadImpl.h"
// Commit
#include "tsdbCommit.h"
// Compact
#include "tsdbCompact.h"
#include "tsdbRowMergeBuf.h"
// Main definitions
struct STsdb {
uint8_t state;
STsdbCfg config;
STsdbStat stat;
STsdbMeta* tsdbMeta;
SMemTable* mem;
SMemTable* imem;
STsdbFS* fs;
SRtn rtn;
SMergeBuf mergeBuf; // used when update=2
};
#define REPO_ID(r) (r)->config.tsdbId
#define REPO_CFG(r) (&((r)->config))
#define REPO_FS(r) ((r)->fs)
#define IS_REPO_LOCKED(r) (r)->repoLocked
#define TSDB_SUBMIT_MSG_HEAD_SIZE sizeof(SSubmitMsg)
int tsdbLockRepo(STsdb* pRepo);
int tsdbUnlockRepo(STsdb* pRepo);
STsdbMeta* tsdbGetMeta(STsdb* pRepo);
int tsdbCheckCommit(STsdb* pRepo);
int tsdbRestoreInfo(STsdb* pRepo);
UNUSED_FUNC int tsdbCacheLastData(STsdb* pRepo, STsdbCfg* oldCfg);
int32_t tsdbLoadLastCache(STsdb* pRepo, STable* pTable);
void tsdbGetRootDir(int repoid, char dirName[]);
void tsdbGetDataDir(int repoid, char dirName[]);
#ifdef __cplusplus
}
#endif
#endif /* _TD_TSDB_INT_H_ */
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#include "tsdbHealth.h"
#include "tsdbint.h"
#define POOL_IS_EMPTY(b) (listNEles((b)->bufBlockList) == 0)
// ---------------- INTERNAL FUNCTIONS ----------------
STsdbBufPool *tsdbNewBufPool() {
STsdbBufPool *pBufPool = (STsdbBufPool *)calloc(1, sizeof(*pBufPool));
if (pBufPool == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
int code = pthread_cond_init(&(pBufPool->poolNotEmpty), NULL);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
goto _err;
}
pBufPool->bufBlockList = tdListNew(sizeof(STsdbBufBlock *));
if (pBufPool->bufBlockList == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
return pBufPool;
_err:
tsdbFreeBufPool(pBufPool);
return NULL;
}
void tsdbFreeBufPool(STsdbBufPool *pBufPool) {
if (pBufPool) {
if (pBufPool->bufBlockList) {
ASSERT(listNEles(pBufPool->bufBlockList) == 0);
tdListFree(pBufPool->bufBlockList);
}
pthread_cond_destroy(&pBufPool->poolNotEmpty);
free(pBufPool);
}
}
int tsdbOpenBufPool(STsdb *pRepo) {
STsdbCfg * pCfg = &(pRepo->config);
STsdbBufPool *pPool = pRepo->pPool;
ASSERT(pPool != NULL);
pPool->bufBlockSize = pCfg->cacheBlockSize * 1024 * 1024; // MB
pPool->tBufBlocks = pCfg->totalBlocks;
pPool->nBufBlocks = 0;
pPool->nElasticBlocks = 0;
pPool->index = 0;
pPool->nRecycleBlocks = 0;
for (int i = 0; i < pCfg->totalBlocks; i++) {
STsdbBufBlock *pBufBlock = tsdbNewBufBlock(pPool->bufBlockSize);
if (pBufBlock == NULL) goto _err;
if (tdListAppend(pPool->bufBlockList, (void *)(&pBufBlock)) < 0) {
tsdbFreeBufBlock(pBufBlock);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pPool->nBufBlocks++;
}
tsdbDebug("vgId:%d buffer pool is opened! bufBlockSize:%d tBufBlocks:%d nBufBlocks:%d", REPO_ID(pRepo),
pPool->bufBlockSize, pPool->tBufBlocks, pPool->nBufBlocks);
return 0;
_err:
tsdbCloseBufPool(pRepo);
return -1;
}
void tsdbCloseBufPool(STsdb *pRepo) {
if (pRepo == NULL) return;
STsdbBufPool * pBufPool = pRepo->pPool;
STsdbBufBlock *pBufBlock = NULL;
if (pBufPool) {
SListNode *pNode = NULL;
while ((pNode = tdListPopHead(pBufPool->bufBlockList)) != NULL) {
tdListNodeGetData(pBufPool->bufBlockList, pNode, (void *)(&pBufBlock));
tsdbFreeBufBlock(pBufBlock);
free(pNode);
}
}
tsdbDebug("vgId:%d, buffer pool is closed", REPO_ID(pRepo));
}
SListNode *tsdbAllocBufBlockFromPool(STsdb *pRepo) {
ASSERT(pRepo != NULL && pRepo->pPool != NULL);
ASSERT(IS_REPO_LOCKED(pRepo));
STsdbBufPool *pBufPool = pRepo->pPool;
while (POOL_IS_EMPTY(pBufPool)) {
if (tsDeadLockKillQuery) {
// supply new Block
if (tsdbInsertNewBlock(pRepo) > 0) {
tsdbWarn("vgId:%d add new elastic block . elasticBlocks=%d cur free Blocks=%d", REPO_ID(pRepo),
pBufPool->nElasticBlocks, TD_DLIST_NELES(pBufPool->bufBlockList));
break;
} else {
// no newBlock, kill query free
if (!tsdbUrgeQueryFree(pRepo)) tsdbWarn("vgId:%d Urge query free thread start failed.", REPO_ID(pRepo));
}
}
pRepo->repoLocked = false;
pthread_cond_wait(&(pBufPool->poolNotEmpty), &(pRepo->mutex));
pRepo->repoLocked = true;
}
SListNode *pNode = tdListPopHead(pBufPool->bufBlockList);
ASSERT(pNode != NULL);
STsdbBufBlock *pBufBlock = NULL;
tdListNodeGetData(pBufPool->bufBlockList, pNode, (void *)(&pBufBlock));
pBufBlock->blockId = pBufPool->index++;
pBufBlock->offset = 0;
pBufBlock->remain = pBufPool->bufBlockSize;
tsdbDebug("vgId:%d, buffer block is allocated, blockId:%" PRId64, REPO_ID(pRepo), pBufBlock->blockId);
return pNode;
}
// ---------------- LOCAL FUNCTIONS ----------------
STsdbBufBlock *tsdbNewBufBlock(int bufBlockSize) {
STsdbBufBlock *pBufBlock = (STsdbBufBlock *)malloc(sizeof(*pBufBlock) + bufBlockSize);
if (pBufBlock == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
pBufBlock->blockId = 0;
pBufBlock->offset = 0;
pBufBlock->remain = bufBlockSize;
return pBufBlock;
}
void tsdbFreeBufBlock(STsdbBufBlock *pBufBlock) { tfree(pBufBlock); }
int tsdbExpandPool(STsdb *pRepo, int32_t oldTotalBlocks) {
if (oldTotalBlocks == pRepo->config.totalBlocks) {
return TSDB_CODE_SUCCESS;
}
int err = TSDB_CODE_SUCCESS;
if (tsdbLockRepo(pRepo) < 0) return terrno;
STsdbBufPool *pPool = pRepo->pPool;
if (pRepo->config.totalBlocks > oldTotalBlocks) {
for (int i = 0; i < pRepo->config.totalBlocks - oldTotalBlocks; i++) {
STsdbBufBlock *pBufBlock = tsdbNewBufBlock(pPool->bufBlockSize);
if (pBufBlock == NULL) goto err;
if (tdListAppend(pPool->bufBlockList, (void *)(&pBufBlock)) < 0) {
tsdbFreeBufBlock(pBufBlock);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
err = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto err;
}
pPool->nBufBlocks++;
}
pthread_cond_signal(&pPool->poolNotEmpty);
} else {
pPool->nRecycleBlocks = oldTotalBlocks - pRepo->config.totalBlocks;
}
err:
tsdbUnlockRepo(pRepo);
return err;
}
void tsdbRecycleBufferBlock(STsdbBufPool *pPool, SListNode *pNode, bool bELastic) {
STsdbBufBlock *pBufBlock = NULL;
tdListNodeGetData(pPool->bufBlockList, pNode, (void *)(&pBufBlock));
tsdbFreeBufBlock(pBufBlock);
free(pNode);
if (bELastic) {
pPool->nElasticBlocks--;
tsdbWarn("pPool=%p elastic block reduce one . nElasticBlocks=%d cur free Blocks=%d", pPool, pPool->nElasticBlocks,
TD_DLIST_NELES(pPool->bufBlockList));
} else
pPool->nBufBlocks--;
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbint.h"
#include "ttime.h"
extern int32_t tsTsdbMetaCompactRatio;
#define TSDB_MAX_SUBBLOCKS 8
static FORCE_INLINE int TSDB_KEY_FID(TSKEY key, int32_t days, int8_t precision) {
if (key < 0) {
return (int)((key + 1) / tsTickPerDay[precision] / days - 1);
} else {
return (int)((key / tsTickPerDay[precision] / days));
}
}
typedef struct {
SRtn rtn; // retention snapshot
SFSIter fsIter; // tsdb file iterator
int niters; // memory iterators
SCommitIter *iters;
bool isRFileSet; // read and commit FSET
SReadH readh;
SDFileSet wSet;
bool isDFileSame;
bool isLFileSame;
TSKEY minKey;
TSKEY maxKey;
SArray * aBlkIdx; // SBlockIdx array
STable * pTable;
SArray * aSupBlk; // Table super-block array
SArray * aSubBlk; // table sub-block array
SDataCols * pDataCols;
} SCommitH;
#define TSDB_COMMIT_REPO(ch) TSDB_READ_REPO(&(ch->readh))
#define TSDB_COMMIT_REPO_ID(ch) REPO_ID(TSDB_READ_REPO(&(ch->readh)))
#define TSDB_COMMIT_WRITE_FSET(ch) (&((ch)->wSet))
#define TSDB_COMMIT_TABLE(ch) ((ch)->pTable)
#define TSDB_COMMIT_HEAD_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_HEAD)
#define TSDB_COMMIT_DATA_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_DATA)
#define TSDB_COMMIT_LAST_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_LAST)
#define TSDB_COMMIT_SMAD_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_SMAD)
#define TSDB_COMMIT_SMAL_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_SMAL)
#define TSDB_COMMIT_BUF(ch) TSDB_READ_BUF(&((ch)->readh))
#define TSDB_COMMIT_COMP_BUF(ch) TSDB_READ_COMP_BUF(&((ch)->readh))
#define TSDB_COMMIT_EXBUF(ch) TSDB_READ_EXBUF(&((ch)->readh))
#define TSDB_COMMIT_DEFAULT_ROWS(ch) TSDB_DEFAULT_BLOCK_ROWS(TSDB_COMMIT_REPO(ch)->config.maxRowsPerFileBlock)
#define TSDB_COMMIT_TXN_VERSION(ch) FS_TXN_VERSION(REPO_FS(TSDB_COMMIT_REPO(ch)))
static int tsdbCommitMeta(STsdb *pRepo);
static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact);
static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid);
static int tsdbCompactMetaFile(STsdb *pRepo, STsdbFS *pfs, SMFile *pMFile);
static int tsdbCommitTSData(STsdb *pRepo);
static void tsdbStartCommit(STsdb *pRepo);
static void tsdbEndCommit(STsdb *pRepo, int eno);
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static int tsdbCreateCommitIters(SCommitH *pCommith);
static void tsdbDestroyCommitIters(SCommitH *pCommith);
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key);
static int tsdbInitCommitH(SCommitH *pCommith, STsdb *pRepo);
static void tsdbDestroyCommitH(SCommitH *pCommith);
static int tsdbGetFidLevel(int fid, SRtn *pRtn);
static int tsdbNextCommitFid(SCommitH *pCommith);
static int tsdbCommitToTable(SCommitH *pCommith, int tid);
static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable);
static int tsdbComparKeyBlock(const void *arg1, const void *arg2);
static int tsdbWriteBlockInfo(SCommitH *pCommih);
static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData);
static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx);
static int tsdbMoveBlock(SCommitH *pCommith, int bidx);
static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks);
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit,
bool isLastOneBlock);
static void tsdbResetCommitFile(SCommitH *pCommith);
static void tsdbResetCommitTable(SCommitH *pCommith);
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError);
static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo);
static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget,
TSKEY maxKey, int maxRows, int8_t update);
void *tsdbCommitData(STsdb *pRepo) {
if (pRepo->imem == NULL) {
return NULL;
}
tsdbStartCommit(pRepo);
// Commit to update meta file
if (tsdbCommitMeta(pRepo) < 0) {
tsdbError("vgId:%d error occurs while committing META data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
// Create the iterator to read from cache
if (tsdbCommitTSData(pRepo) < 0) {
tsdbError("vgId:%d error occurs while committing TS data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
tsdbEndCommit(pRepo, TSDB_CODE_SUCCESS);
return NULL;
_err:
ASSERT(terrno != TSDB_CODE_SUCCESS);
pRepo->code = terrno;
tsdbEndCommit(pRepo, terrno);
return NULL;
}
int tsdbApplyRtnOnFSet(STsdb *pRepo, SDFileSet *pSet, SRtn *pRtn) {
SDiskID did;
SDFileSet nSet;
STsdbFS * pfs = REPO_FS(pRepo);
int level;
ASSERT(pSet->fid >= pRtn->minFid);
level = tsdbGetFidLevel(pSet->fid, pRtn);
tfsAllocDisk(level, &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
return -1;
}
if (did.level > TSDB_FSET_LEVEL(pSet)) {
// Need to move the FSET to higher level
tsdbInitDFileSet(&nSet, did, REPO_ID(pRepo), pSet->fid, FS_TXN_VERSION(pfs), pSet->ver);
if (tsdbCopyDFileSet(pSet, &nSet) < 0) {
tsdbError("vgId:%d failed to copy FSET %d from level %d to level %d since %s", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), did.level, tstrerror(terrno));
return -1;
}
if (tsdbUpdateDFileSet(pfs, &nSet) < 0) {
return -1;
}
tsdbInfo("vgId:%d FSET %d is copied from level %d disk id %d to level %d disk id %d", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet), did.level, did.id);
} else {
// On a correct level
if (tsdbUpdateDFileSet(pfs, pSet) < 0) {
return -1;
}
}
return 0;
}
int tsdbWriteBlockInfoImpl(SDFile *pHeadf, STable *pTable, SArray *pSupA, SArray *pSubA, void **ppBuf,
SBlockIdx *pIdx) {
size_t nSupBlocks;
size_t nSubBlocks;
uint32_t tlen;
SBlockInfo *pBlkInfo;
int64_t offset;
SBlock * pBlock;
memset(pIdx, 0, sizeof(*pIdx));
nSupBlocks = taosArrayGetSize(pSupA);
nSubBlocks = (pSubA == NULL) ? 0 : taosArrayGetSize(pSubA);
if (nSupBlocks <= 0) {
// No data (data all deleted)
return 0;
}
tlen = (uint32_t)(sizeof(SBlockInfo) + sizeof(SBlock) * (nSupBlocks + nSubBlocks) + sizeof(TSCKSUM));
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
pBlkInfo = *ppBuf;
pBlkInfo->delimiter = TSDB_FILE_DELIMITER;
pBlkInfo->tid = TABLE_TID(pTable);
pBlkInfo->uid = TABLE_UID(pTable);
memcpy((void *)(pBlkInfo->blocks), taosArrayGet(pSupA, 0), nSupBlocks * sizeof(SBlock));
if (nSubBlocks > 0) {
memcpy((void *)(pBlkInfo->blocks + nSupBlocks), taosArrayGet(pSubA, 0), nSubBlocks * sizeof(SBlock));
for (int i = 0; i < nSupBlocks; i++) {
pBlock = pBlkInfo->blocks + i;
if (pBlock->numOfSubBlocks > 1) {
pBlock->offset += (sizeof(SBlockInfo) + sizeof(SBlock) * nSupBlocks);
}
}
}
taosCalcChecksumAppend(0, (uint8_t *)pBlkInfo, tlen);
if (tsdbAppendDFile(pHeadf, (void *)pBlkInfo, tlen, &offset) < 0) {
return -1;
}
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(pBlkInfo, tlen - sizeof(TSCKSUM)));
// Set pIdx
pBlock = taosArrayGetLast(pSupA);
pIdx->tid = TABLE_TID(pTable);
pIdx->uid = TABLE_UID(pTable);
pIdx->hasLast = pBlock->last ? 1 : 0;
pIdx->maxKey = pBlock->keyLast;
pIdx->numOfBlocks = (uint32_t)nSupBlocks;
pIdx->len = tlen;
pIdx->offset = (uint32_t)offset;
return 0;
}
int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf) {
SBlockIdx *pBlkIdx;
size_t nidx = taosArrayGetSize(pIdxA);
int tlen = 0, size;
int64_t offset = 0;
if (nidx <= 0) {
// All data are deleted
pHeadf->info.offset = 0;
pHeadf->info.len = 0;
return 0;
}
for (size_t i = 0; i < nidx; i++) {
pBlkIdx = (SBlockIdx *)taosArrayGet(pIdxA, i);
size = tsdbEncodeSBlockIdx(NULL, pBlkIdx);
if (tsdbMakeRoom(ppBuf, tlen + size) < 0) return -1;
void *ptr = POINTER_SHIFT(*ppBuf, tlen);
tsdbEncodeSBlockIdx(&ptr, pBlkIdx);
tlen += size;
}
tlen += sizeof(TSCKSUM);
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
taosCalcChecksumAppend(0, (uint8_t *)(*ppBuf), tlen);
if (tsdbAppendDFile(pHeadf, *ppBuf, tlen, &offset) < tlen) {
return -1;
}
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(*ppBuf, tlen - sizeof(TSCKSUM)));
pHeadf->info.offset = (uint32_t)offset;
pHeadf->info.len = tlen;
return 0;
}
// =================== Commit Meta Data
static int tsdbInitCommitMetaFile(STsdb *pRepo, SMFile *pMf, bool open) {
STsdbFS *pfs = REPO_FS(pRepo);
SMFile * pOMFile = pfs->cstatus->pmf;
SDiskID did;
// Create/Open a meta file or open the existing file
if (pOMFile == NULL) {
// Create a new meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(pMf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)));
if (open && tsdbCreateMFile(pMf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbInfo("vgId:%d meta file %s is created to commit", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMf));
} else {
tsdbInitMFileEx(pMf, pOMFile);
if (open && tsdbOpenMFile(pMf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
}
return 0;
}
static int tsdbCommitMeta(STsdb *pRepo) {
STsdbFS * pfs = REPO_FS(pRepo);
SMemTable *pMem = pRepo->imem;
SMFile * pOMFile = pfs->cstatus->pmf;
SMFile mf;
SActObj * pAct = NULL;
SActCont * pCont = NULL;
SListNode *pNode = NULL;
ASSERT(pOMFile != NULL || listNEles(pMem->actList) > 0);
if (listNEles(pMem->actList) <= 0) {
// no meta data to commit, just keep the old meta file
tsdbUpdateMFile(pfs, pOMFile);
if (tsTsdbMetaCompactRatio > 0) {
if (tsdbInitCommitMetaFile(pRepo, &mf, false) < 0) {
return -1;
}
int ret = tsdbCompactMetaFile(pRepo, pfs, &mf);
if (ret < 0) tsdbError("compact meta file error");
return ret;
}
return 0;
} else {
if (tsdbInitCommitMetaFile(pRepo, &mf, true) < 0) {
return -1;
}
}
// Loop to write
while ((pNode = tdListPopHead(pMem->actList)) != NULL) {
pAct = (SActObj *)pNode->data;
if (pAct->act == TSDB_UPDATE_META) {
pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(SActObj));
if (tsdbUpdateMetaRecord(pfs, &mf, pAct->uid, (void *)(pCont->cont), pCont->len, false) < 0) {
tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
tstrerror(terrno));
tsdbCloseMFile(&mf);
(void)tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
} else if (pAct->act == TSDB_DROP_META) {
if (tsdbDropMetaRecord(pfs, &mf, pAct->uid) < 0) {
tsdbError("vgId:%d failed to drop META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
tstrerror(terrno));
tsdbCloseMFile(&mf);
tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
} else {
ASSERT(false);
}
}
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno));
tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbUpdateMFile(pfs, &mf);
if (tsTsdbMetaCompactRatio > 0 && tsdbCompactMetaFile(pRepo, pfs, &mf) < 0) {
tsdbError("compact meta file error");
}
return 0;
}
int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord) {
int tlen = 0;
tlen += taosEncodeFixedU64(buf, pRecord->uid);
tlen += taosEncodeFixedI64(buf, pRecord->offset);
tlen += taosEncodeFixedI64(buf, pRecord->size);
return tlen;
}
void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord) {
buf = taosDecodeFixedU64(buf, &(pRecord->uid));
buf = taosDecodeFixedI64(buf, &(pRecord->offset));
buf = taosDecodeFixedI64(buf, &(pRecord->size));
return buf;
}
void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
TSKEY minKey, midKey, maxKey, now;
now = taosGetTimestamp(pCfg->precision);
minKey = now - pCfg->keep * tsTickPerDay[pCfg->precision];
midKey = now - pCfg->keep2 * tsTickPerDay[pCfg->precision];
maxKey = now - pCfg->keep1 * tsTickPerDay[pCfg->precision];
pRtn->minKey = minKey;
pRtn->minFid = (int)(TSDB_KEY_FID(minKey, pCfg->daysPerFile, pCfg->precision));
pRtn->midFid = (int)(TSDB_KEY_FID(midKey, pCfg->daysPerFile, pCfg->precision));
pRtn->maxFid = (int)(TSDB_KEY_FID(maxKey, pCfg->daysPerFile, pCfg->precision));
tsdbDebug("vgId:%d now:%" PRId64 " minKey:%" PRId64 " minFid:%d, midFid:%d, maxFid:%d", REPO_ID(pRepo), now, minKey,
pRtn->minFid, pRtn->midFid, pRtn->maxFid);
}
static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact) {
char buf[64] = "\0";
void * pBuf = buf;
SKVRecord rInfo;
int64_t offset;
// Seek to end of meta file
offset = tsdbSeekMFile(pMFile, 0, SEEK_END);
if (offset < 0) {
return -1;
}
rInfo.offset = offset;
rInfo.uid = uid;
rInfo.size = contLen;
int tlen = tsdbEncodeKVRecord((void **)(&pBuf), &rInfo);
if (tsdbAppendMFile(pMFile, buf, tlen, NULL) < tlen) {
return -1;
}
if (tsdbAppendMFile(pMFile, cont, contLen, NULL) < contLen) {
return -1;
}
tsdbUpdateMFileMagic(pMFile, POINTER_SHIFT(cont, contLen - sizeof(TSCKSUM)));
SHashObj *cache = compact ? pfs->metaCacheComp : pfs->metaCache;
pMFile->info.nRecords++;
SKVRecord *pRecord = taosHashGet(cache, (void *)&uid, sizeof(uid));
if (pRecord != NULL) {
pMFile->info.tombSize += (pRecord->size + sizeof(SKVRecord));
} else {
pMFile->info.nRecords++;
}
taosHashPut(cache, (void *)(&uid), sizeof(uid), (void *)(&rInfo), sizeof(rInfo));
return 0;
}
static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid) {
SKVRecord rInfo = {0};
char buf[128] = "\0";
SKVRecord *pRecord = taosHashGet(pfs->metaCache, (void *)(&uid), sizeof(uid));
if (pRecord == NULL) {
tsdbError("failed to drop META record with key %" PRIu64 " since not find", uid);
return -1;
}
rInfo.offset = -pRecord->offset;
rInfo.uid = pRecord->uid;
rInfo.size = pRecord->size;
void *pBuf = buf;
tsdbEncodeKVRecord(&pBuf, &rInfo);
if (tsdbAppendMFile(pMFile, buf, sizeof(SKVRecord), NULL) < 0) {
return -1;
}
pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t *)buf, sizeof(SKVRecord));
pMFile->info.nDels++;
pMFile->info.nRecords--;
pMFile->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2);
taosHashRemove(pfs->metaCache, (void *)(&uid), sizeof(uid));
return 0;
}
static int tsdbCompactMetaFile(STsdb *pRepo, STsdbFS *pfs, SMFile *pMFile) {
float delPercent = (float)(pMFile->info.nDels) / (float)(pMFile->info.nRecords);
float tombPercent = (float)(pMFile->info.tombSize) / (float)(pMFile->info.size);
float compactRatio = (float)(tsTsdbMetaCompactRatio) / 100;
if (delPercent < compactRatio && tombPercent < compactRatio) {
return 0;
}
if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) {
tsdbError("open meta file %s compact fail", pMFile->f.rname);
return -1;
}
tsdbInfo("begin compact tsdb meta file, ratio:%d, nDels:%" PRId64 ",nRecords:%" PRId64 ",tombSize:%" PRId64
",size:%" PRId64,
tsTsdbMetaCompactRatio, pMFile->info.nDels, pMFile->info.nRecords, pMFile->info.tombSize, pMFile->info.size);
SMFile mf;
SDiskID did;
// first create tmp meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)) + 1);
if (tsdbCreateMFile(&mf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbInfo("vgId:%d meta file %s is created to compact meta data", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf));
// second iterator metaCache
int code = -1;
int64_t maxBufSize = 1024;
SKVRecord *pRecord;
void * pBuf = NULL;
pBuf = malloc((size_t)maxBufSize);
if (pBuf == NULL) {
goto _err;
}
// init Comp
assert(pfs->metaCacheComp == NULL);
pfs->metaCacheComp = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
if (pfs->metaCacheComp == NULL) {
goto _err;
}
pRecord = taosHashIterate(pfs->metaCache, NULL);
while (pRecord) {
if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) {
tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
goto _err;
}
if (pRecord->size > maxBufSize) {
maxBufSize = pRecord->size;
void *tmp = realloc(pBuf, (size_t)maxBufSize);
if (tmp == NULL) {
goto _err;
}
pBuf = tmp;
}
int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size);
if (nread < 0) {
tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
goto _err;
}
if (nread < pRecord->size) {
tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d",
REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread);
goto _err;
}
if (tsdbUpdateMetaRecord(pfs, &mf, pRecord->uid, pBuf, (int)pRecord->size, true) < 0) {
tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pRecord->uid,
tstrerror(terrno));
goto _err;
}
pRecord = taosHashIterate(pfs->metaCache, pRecord);
}
code = 0;
_err:
if (code == 0) TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbCloseMFile(pMFile);
if (code == 0) {
// rename meta.tmp -> meta
tsdbInfo("vgId:%d meta file rename %s -> %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf),
TSDB_FILE_FULL_NAME(pMFile));
taosRenameFile(mf.f.aname, pMFile->f.aname);
tstrncpy(mf.f.aname, pMFile->f.aname, TSDB_FILENAME_LEN);
tstrncpy(mf.f.rname, pMFile->f.rname, TSDB_FILENAME_LEN);
// update current meta file info
pfs->nstatus->pmf = NULL;
tsdbUpdateMFile(pfs, &mf);
taosHashCleanup(pfs->metaCache);
pfs->metaCache = pfs->metaCacheComp;
pfs->metaCacheComp = NULL;
} else {
// remove meta.tmp file
remove(mf.f.aname);
taosHashCleanup(pfs->metaCacheComp);
pfs->metaCacheComp = NULL;
}
tfree(pBuf);
ASSERT(mf.info.nDels == 0);
ASSERT(mf.info.tombSize == 0);
tsdbInfo("end compact tsdb meta file,code:%d,nRecords:%" PRId64 ",size:%" PRId64, code, mf.info.nRecords,
mf.info.size);
return code;
}
// =================== Commit Time-Series Data
static int tsdbCommitTSData(STsdb *pRepo) {
SMemTable *pMem = pRepo->imem;
SCommitH commith;
SDFileSet *pSet = NULL;
int fid;
memset(&commith, 0, sizeof(commith));
if (pMem->numOfRows <= 0) {
// No memory data, just apply retention on each file on disk
if (tsdbApplyRtn(pRepo) < 0) {
return -1;
}
return 0;
}
// Resource initialization
if (tsdbInitCommitH(&commith, pRepo) < 0) {
return -1;
}
// Skip expired memory data and expired FSET
tsdbSeekCommitIter(&commith, commith.rtn.minKey);
while ((pSet = tsdbFSIterNext(&(commith.fsIter)))) {
if (pSet->fid < commith.rtn.minFid) {
tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
break;
}
}
// Loop to commit to each file
fid = tsdbNextCommitFid(&(commith));
while (true) {
// Loop over both on disk and memory
if (pSet == NULL && fid == TSDB_IVLD_FID) break;
if (pSet && (fid == TSDB_IVLD_FID || pSet->fid < fid)) {
// Only has existing FSET but no memory data to commit in this
// existing FSET, only check if file in correct retention
if (tsdbApplyRtnOnFSet(pRepo, pSet, &(commith.rtn)) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
pSet = tsdbFSIterNext(&(commith.fsIter));
} else {
// Has memory data to commit
SDFileSet *pCSet;
int cfid;
if (pSet == NULL || pSet->fid > fid) {
// Commit to a new FSET with fid: fid
pCSet = NULL;
cfid = fid;
} else {
// Commit to an existing FSET
pCSet = pSet;
cfid = pSet->fid;
pSet = tsdbFSIterNext(&(commith.fsIter));
}
if (tsdbCommitToFile(&commith, pCSet, cfid) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
fid = tsdbNextCommitFid(&commith);
}
}
tsdbDestroyCommitH(&commith);
return 0;
}
static void tsdbStartCommit(STsdb *pRepo) {
SMemTable *pMem = pRepo->imem;
ASSERT(pMem->numOfRows > 0 || listNEles(pMem->actList) > 0);
tsdbInfo("vgId:%d start to commit! keyFirst %" PRId64 " keyLast %" PRId64 " numOfRows %" PRId64 " meta rows: %d",
REPO_ID(pRepo), pMem->keyFirst, pMem->keyLast, pMem->numOfRows, listNEles(pMem->actList));
tsdbStartFSTxn(pRepo, pMem->pointsAdd, pMem->storageAdd);
pRepo->code = TSDB_CODE_SUCCESS;
}
static void tsdbEndCommit(STsdb *pRepo, int eno) {
if (eno != TSDB_CODE_SUCCESS) {
tsdbEndFSTxnWithError(REPO_FS(pRepo));
} else {
tsdbEndFSTxn(pRepo);
}
tsdbInfo("vgId:%d commit over, %s", REPO_ID(pRepo), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed");
if (pRepo->appH.notifyStatus) pRepo->appH.notifyStatus(pRepo->appH.appH, TSDB_STATUS_COMMIT_OVER, eno);
SMemTable *pIMem = pRepo->imem;
(void)tsdbLockRepo(pRepo);
pRepo->imem = NULL;
(void)tsdbUnlockRepo(pRepo);
tsdbUnRefMemTable(pRepo, pIMem);
tsem_post(&(pRepo->readyToCommit));
}
#if 0
static bool tsdbHasDataToCommit(SCommitIter *iters, int nIters, TSKEY minKey, TSKEY maxKey) {
for (int i = 0; i < nIters; i++) {
TSKEY nextKey = tsdbNextIterKey((iters + i)->pIter);
if (nextKey != TSDB_DATA_TIMESTAMP_NULL && (nextKey >= minKey && nextKey <= maxKey)) return true;
}
return false;
}
#endif
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg *pCfg = REPO_CFG(pRepo);
ASSERT(pSet == NULL || pSet->fid == fid);
tsdbResetCommitFile(pCommith);
tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, fid, &(pCommith->minKey), &(pCommith->maxKey));
// Set and open files
if (tsdbSetAndOpenCommitFile(pCommith, pSet, fid) < 0) {
return -1;
}
// Loop to commit each table data
for (int tid = 1; tid < pCommith->niters; tid++) {
SCommitIter *pIter = pCommith->iters + tid;
if (pIter->pTable == NULL) continue;
if (tsdbCommitToTable(pCommith, tid) < 0) {
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
}
if (tsdbWriteBlockIdx(TSDB_COMMIT_HEAD_FILE(pCommith), pCommith->aBlkIdx, (void **)(&(TSDB_COMMIT_BUF(pCommith)))) <
0) {
tsdbError("vgId:%d failed to write SBlockIdx part to FSET %d since %s", REPO_ID(pRepo), fid, tstrerror(terrno));
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
if (tsdbUpdateDFileSetHeader(&(pCommith->wSet)) < 0) {
tsdbError("vgId:%d failed to update FSET %d header since %s", REPO_ID(pRepo), fid, tstrerror(terrno));
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
// Close commit file
tsdbCloseCommitFile(pCommith, false);
if (tsdbUpdateDFileSet(REPO_FS(pRepo), &(pCommith->wSet)) < 0) {
return -1;
}
return 0;
}
static int tsdbCreateCommitIters(SCommitH *pCommith) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
SMemTable *pMem = pRepo->imem;
STsdbMeta *pMeta = pRepo->tsdbMeta;
pCommith->niters = pMem->maxTables;
pCommith->iters = (SCommitIter *)calloc(pMem->maxTables, sizeof(SCommitIter));
if (pCommith->iters == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (tsdbRLockRepoMeta(pRepo) < 0) return -1;
// reference all tables
for (int i = 0; i < pMem->maxTables; i++) {
if (pMeta->tables[i] != NULL) {
tsdbRefTable(pMeta->tables[i]);
pCommith->iters[i].pTable = pMeta->tables[i];
}
}
if (tsdbUnlockRepoMeta(pRepo) < 0) return -1;
for (int i = 0; i < pMem->maxTables; i++) {
if ((pCommith->iters[i].pTable != NULL) && (pMem->tData[i] != NULL) &&
(TABLE_UID(pCommith->iters[i].pTable) == pMem->tData[i]->uid)) {
if ((pCommith->iters[i].pIter = tSkipListCreateIter(pMem->tData[i]->pData)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
tSkipListIterNext(pCommith->iters[i].pIter);
}
}
return 0;
}
static void tsdbDestroyCommitIters(SCommitH *pCommith) {
if (pCommith->iters == NULL) return;
for (int i = 1; i < pCommith->niters; i++) {
if (pCommith->iters[i].pTable != NULL) {
tsdbUnRefTable(pCommith->iters[i].pTable);
tSkipListDestroyIter(pCommith->iters[i].pIter);
}
}
free(pCommith->iters);
pCommith->iters = NULL;
pCommith->niters = 0;
}
// Skip all keys until key (not included)
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key) {
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, key - 1, INT32_MAX, NULL, NULL, 0, true, NULL);
}
}
static int tsdbInitCommitH(SCommitH *pCommith, STsdb *pRepo) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
memset(pCommith, 0, sizeof(*pCommith));
tsdbGetRtnSnap(pRepo, &(pCommith->rtn));
TSDB_FSET_SET_CLOSED(TSDB_COMMIT_WRITE_FSET(pCommith));
// Init read handle
if (tsdbInitReadH(&(pCommith->readh), pRepo) < 0) {
return -1;
}
// Init file iterator
tsdbFSIterInit(&(pCommith->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD);
if (tsdbCreateCommitIters(pCommith) < 0) {
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx));
if (pCommith->aBlkIdx == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aSupBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSupBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aSubBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSubBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pCommith->pDataCols == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
return 0;
}
static void tsdbDestroyCommitH(SCommitH *pCommith) {
pCommith->pDataCols = tdFreeDataCols(pCommith->pDataCols);
pCommith->aSubBlk = taosArrayDestroy(pCommith->aSubBlk);
pCommith->aSupBlk = taosArrayDestroy(pCommith->aSupBlk);
pCommith->aBlkIdx = taosArrayDestroy(pCommith->aBlkIdx);
tsdbDestroyCommitIters(pCommith);
tsdbDestroyReadH(&(pCommith->readh));
tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith));
}
static int tsdbNextCommitFid(SCommitH *pCommith) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg *pCfg = REPO_CFG(pRepo);
int fid = TSDB_IVLD_FID;
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
if (nextKey == TSDB_DATA_TIMESTAMP_NULL) {
continue;
} else {
int tfid = (int)(TSDB_KEY_FID(nextKey, pCfg->daysPerFile, pCfg->precision));
if (fid == TSDB_IVLD_FID || fid > tfid) {
fid = tfid; // find the least fid
}
}
}
return fid;
}
static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
SCommitIter *pIter = pCommith->iters + tid;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
tsdbResetCommitTable(pCommith);
TSDB_RLOCK_TABLE(pIter->pTable);
// Set commit table
if (tsdbSetCommitTable(pCommith, pIter->pTable) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
// No disk data and no memory data, just return
if (pCommith->readh.pBlkIdx == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return 0;
}
// Must has disk data or has memory data
int nBlocks;
int bidx = 0;
SBlock *pBlock;
if (pCommith->readh.pBlkIdx) {
if (tsdbLoadBlockInfo(&(pCommith->readh), NULL, NULL) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
nBlocks = pCommith->readh.pBlkIdx->numOfBlocks;
} else {
nBlocks = 0;
}
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
while (true) {
if (pBlock == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) break;
if ((nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey) ||
(pBlock && (!pBlock->last) && tsdbComparKeyBlock((void *)(&nextKey), pBlock) > 0)) {
if (tsdbMoveBlock(pCommith, bidx) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
bidx++;
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
} else if (pBlock && (pBlock->last || tsdbComparKeyBlock((void *)(&nextKey), pBlock) == 0)) {
// merge pBlock data and memory data
if (tsdbMergeMemData(pCommith, pIter, bidx) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
bidx++;
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
nextKey = tsdbNextIterKey(pIter->pIter);
} else {
// Only commit memory data
if (pBlock == NULL) {
if (tsdbCommitMemData(pCommith, pIter, pCommith->maxKey, false) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
} else {
if (tsdbCommitMemData(pCommith, pIter, pBlock->keyFirst - 1, true) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
}
nextKey = tsdbNextIterKey(pIter->pIter);
}
}
TSDB_RUNLOCK_TABLE(pIter->pTable);
if (tsdbWriteBlockInfo(pCommith) < 0) {
tsdbError("vgId:%d failed to write SBlockInfo part into file %s since %s", TSDB_COMMIT_REPO_ID(pCommith),
TSDB_FILE_FULL_NAME(TSDB_COMMIT_HEAD_FILE(pCommith)), tstrerror(terrno));
return -1;
}
return 0;
}
static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable) {
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1);
pCommith->pTable = pTable;
if (tdInitDataCols(pCommith->pDataCols, pSchema) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (pCommith->isRFileSet) {
if (tsdbSetReadTable(&(pCommith->readh), pTable) < 0) {
return -1;
}
} else {
pCommith->readh.pBlkIdx = NULL;
}
return 0;
}
static int tsdbComparKeyBlock(const void *arg1, const void *arg2) {
TSKEY key = *(TSKEY *)arg1;
SBlock *pBlock = (SBlock *)arg2;
if (key < pBlock->keyFirst) {
return -1;
} else if (key > pBlock->keyLast) {
return 1;
} else {
return 0;
}
}
int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDFileAggr, SDataCols *pDataCols,
SBlock *pBlock, bool isLast, bool isSuper, void **ppBuf, void **ppCBuf, void **ppExBuf) {
STsdbCfg * pCfg = REPO_CFG(pRepo);
SBlockData * pBlockData;
SAggrBlkData *pAggrBlkData = NULL;
int64_t offset = 0, offsetAggr = 0;
int rowsToWrite = pDataCols->numOfRows;
ASSERT(rowsToWrite > 0 && rowsToWrite <= pCfg->maxRowsPerFileBlock);
ASSERT((!isLast) || rowsToWrite < pCfg->minRowsPerFileBlock);
// Make buffer space
if (tsdbMakeRoom(ppBuf, tsdbBlockStatisSize(pDataCols->numOfCols, SBlockVerLatest)) < 0) {
return -1;
}
pBlockData = (SBlockData *)(*ppBuf);
if (tsdbMakeRoom(ppExBuf, tsdbBlockAggrSize(pDataCols->numOfCols, SBlockVerLatest)) < 0) {
return -1;
}
pAggrBlkData = (SAggrBlkData *)(*ppExBuf);
// Get # of cols not all NULL(not including key column)
int nColsNotAllNull = 0;
for (int ncol = 1; ncol < pDataCols->numOfCols; ncol++) { // ncol from 1, we skip the timestamp column
SDataCol * pDataCol = pDataCols->cols + ncol;
SBlockCol * pBlockCol = pBlockData->cols + nColsNotAllNull;
SAggrBlkCol *pAggrBlkCol = (SAggrBlkCol *)pAggrBlkData + nColsNotAllNull;
if (isAllRowsNull(pDataCol)) { // all data to commit are NULL, just ignore it
continue;
}
memset(pBlockCol, 0, sizeof(*pBlockCol));
memset(pAggrBlkCol, 0, sizeof(*pAggrBlkCol));
pBlockCol->colId = pDataCol->colId;
pBlockCol->type = pDataCol->type;
pAggrBlkCol->colId = pDataCol->colId;
if (tDataTypes[pDataCol->type].statisFunc) {
#if 0
(*tDataTypes[pDataCol->type].statisFunc)(pDataCol->pData, rowsToWrite, &(pBlockCol->min), &(pBlockCol->max),
&(pBlockCol->sum), &(pBlockCol->minIndex), &(pBlockCol->maxIndex),
&(pBlockCol->numOfNull));
#endif
(*tDataTypes[pDataCol->type].statisFunc)(pDataCol->pData, rowsToWrite, &(pAggrBlkCol->min), &(pAggrBlkCol->max),
&(pAggrBlkCol->sum), &(pAggrBlkCol->minIndex), &(pAggrBlkCol->maxIndex),
&(pAggrBlkCol->numOfNull));
}
nColsNotAllNull++;
}
ASSERT(nColsNotAllNull >= 0 && nColsNotAllNull <= pDataCols->numOfCols);
// Compress the data if neccessary
int tcol = 0; // counter of not all NULL and written columns
uint32_t toffset = 0;
int32_t tsize = (int32_t)tsdbBlockStatisSize(nColsNotAllNull, SBlockVerLatest);
int32_t lsize = tsize;
int32_t keyLen = 0;
uint32_t tsizeAggr = (uint32_t)tsdbBlockAggrSize(nColsNotAllNull, SBlockVerLatest);
for (int ncol = 0; ncol < pDataCols->numOfCols; ncol++) {
// All not NULL columns finish
if (ncol != 0 && tcol >= nColsNotAllNull) break;
SDataCol * pDataCol = pDataCols->cols + ncol;
SBlockCol *pBlockCol = pBlockData->cols + tcol;
if (ncol != 0 && (pDataCol->colId != pBlockCol->colId)) continue;
int32_t flen; // final length
int32_t tlen = dataColGetNEleLen(pDataCol, rowsToWrite);
void * tptr;
// Make room
if (tsdbMakeRoom(ppBuf, lsize + tlen + COMP_OVERFLOW_BYTES + sizeof(TSCKSUM)) < 0) {
return -1;
}
pBlockData = (SBlockData *)(*ppBuf);
pBlockCol = pBlockData->cols + tcol;
tptr = POINTER_SHIFT(pBlockData, lsize);
if (pCfg->compression == TWO_STAGE_COMP && tsdbMakeRoom(ppCBuf, tlen + COMP_OVERFLOW_BYTES) < 0) {
return -1;
}
// Compress or just copy
if (pCfg->compression) {
flen = (*(tDataTypes[pDataCol->type].compFunc))((char *)pDataCol->pData, tlen, rowsToWrite, tptr,
tlen + COMP_OVERFLOW_BYTES, pCfg->compression, *ppCBuf,
tlen + COMP_OVERFLOW_BYTES);
} else {
flen = tlen;
memcpy(tptr, pDataCol->pData, flen);
}
// Add checksum
ASSERT(flen > 0);
flen += sizeof(TSCKSUM);
taosCalcChecksumAppend(0, (uint8_t *)tptr, flen);
tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(tptr, flen - sizeof(TSCKSUM)));
if (ncol != 0) {
tsdbSetBlockColOffset(pBlockCol, toffset);
pBlockCol->len = flen;
tcol++;
} else {
keyLen = flen;
}
toffset += flen;
lsize += flen;
}
pBlockData->delimiter = TSDB_FILE_DELIMITER;
pBlockData->uid = TABLE_UID(pTable);
pBlockData->numOfCols = nColsNotAllNull;
taosCalcChecksumAppend(0, (uint8_t *)pBlockData, tsize);
tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(pBlockData, tsize - sizeof(TSCKSUM)));
// Write the whole block to file
if (tsdbAppendDFile(pDFile, (void *)pBlockData, lsize, &offset) < lsize) {
return -1;
}
uint32_t aggrStatus = nColsNotAllNull > 0 ? 1 : 0;
if (aggrStatus > 0) {
taosCalcChecksumAppend(0, (uint8_t *)pAggrBlkData, tsizeAggr);
tsdbUpdateDFileMagic(pDFileAggr, POINTER_SHIFT(pAggrBlkData, tsizeAggr - sizeof(TSCKSUM)));
// Write the whole block to file
if (tsdbAppendDFile(pDFileAggr, (void *)pAggrBlkData, tsizeAggr, &offsetAggr) < tsizeAggr) {
return -1;
}
}
// Update pBlock membership variables
pBlock->last = isLast;
pBlock->offset = offset;
pBlock->algorithm = pCfg->compression;
pBlock->numOfRows = rowsToWrite;
pBlock->len = lsize;
pBlock->keyLen = keyLen;
pBlock->numOfSubBlocks = isSuper ? 1 : 0;
pBlock->numOfCols = nColsNotAllNull;
pBlock->keyFirst = dataColsKeyFirst(pDataCols);
pBlock->keyLast = dataColsKeyLast(pDataCols);
// since blkVer1
pBlock->aggrStat = aggrStatus;
pBlock->blkVer = SBlockVerLatest;
pBlock->aggrOffset = (uint64_t)offsetAggr;
tsdbDebug("vgId:%d tid:%d a block of data is written to file %s, offset %" PRId64
" numOfRows %d len %d numOfCols %" PRId16 " keyFirst %" PRId64 " keyLast %" PRId64,
REPO_ID(pRepo), TABLE_TID(pTable), TSDB_FILE_FULL_NAME(pDFile), offset, rowsToWrite, pBlock->len,
pBlock->numOfCols, pBlock->keyFirst, pBlock->keyLast);
return 0;
}
static int tsdbWriteBlock(SCommitH *pCommith, SDFile *pDFile, SDataCols *pDataCols, SBlock *pBlock, bool isLast,
bool isSuper) {
return tsdbWriteBlockImpl(TSDB_COMMIT_REPO(pCommith), TSDB_COMMIT_TABLE(pCommith), pDFile,
isLast ? TSDB_COMMIT_SMAL_FILE(pCommith) : TSDB_COMMIT_SMAD_FILE(pCommith), pDataCols,
pBlock, isLast, isSuper, (void **)(&(TSDB_COMMIT_BUF(pCommith))),
(void **)(&(TSDB_COMMIT_COMP_BUF(pCommith))), (void **)(&(TSDB_COMMIT_EXBUF(pCommith))));
}
static int tsdbWriteBlockInfo(SCommitH *pCommih) {
SDFile * pHeadf = TSDB_COMMIT_HEAD_FILE(pCommih);
SBlockIdx blkIdx;
STable * pTable = TSDB_COMMIT_TABLE(pCommih);
if (tsdbWriteBlockInfoImpl(pHeadf, pTable, pCommih->aSupBlk, pCommih->aSubBlk, (void **)(&(TSDB_COMMIT_BUF(pCommih))),
&blkIdx) < 0) {
return -1;
}
if (blkIdx.numOfBlocks == 0) {
return 0;
}
if (taosArrayPush(pCommih->aBlkIdx, (void *)(&blkIdx)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SMergeInfo mInfo;
int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith);
SDFile * pDFile;
bool isLast;
SBlock block;
while (true) {
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, keyLimit, defaultRows, pCommith->pDataCols, NULL, 0,
pCfg->update, &mInfo);
if (pCommith->pDataCols->numOfRows <= 0) break;
if (toData || pCommith->pDataCols->numOfRows >= pCfg->minRowsPerFileBlock) {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
} else {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isLast = true;
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, isLast, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) {
return -1;
}
}
return 0;
}
static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int nBlocks = pCommith->readh.pBlkIdx->numOfBlocks;
SBlock * pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
TSKEY keyLimit;
int16_t colId = 0;
SMergeInfo mInfo;
SBlock subBlocks[TSDB_MAX_SUBBLOCKS];
SBlock block, supBlock;
SDFile * pDFile;
if (bidx == nBlocks - 1) {
keyLimit = pCommith->maxKey;
} else {
keyLimit = pBlock[1].keyFirst - 1;
}
SSkipListIterator titer = *(pIter->pIter);
if (tsdbLoadBlockDataCols(&(pCommith->readh), pBlock, NULL, &colId, 1) < 0) return -1;
tsdbLoadDataFromCache(pIter->pTable, &titer, keyLimit, INT32_MAX, NULL, pCommith->readh.pDCols[0]->cols[0].pData,
pCommith->readh.pDCols[0]->numOfRows, pCfg->update, &mInfo);
if (mInfo.nOperations == 0) {
// no new data to insert (all updates denied)
if (tsdbMoveBlock(pCommith, bidx) < 0) {
return -1;
}
*(pIter->pIter) = titer;
} else if (pBlock->numOfRows + mInfo.rowsInserted - mInfo.rowsDeleteSucceed == 0) {
// Ignore the block
ASSERT(0);
*(pIter->pIter) = titer;
} else if (tsdbCanAddSubBlock(pCommith, pBlock, &mInfo)) {
// Add a sub-block
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, keyLimit, INT32_MAX, pCommith->pDataCols,
pCommith->readh.pDCols[0]->cols[0].pData, pCommith->readh.pDCols[0]->numOfRows, pCfg->update,
&mInfo);
if (pBlock->last) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, pBlock->last, false) < 0) return -1;
if (pBlock->numOfSubBlocks == 1) {
subBlocks[0] = *pBlock;
subBlocks[0].numOfSubBlocks = 0;
} else {
memcpy(subBlocks, POINTER_SHIFT(pCommith->readh.pBlkInfo, pBlock->offset),
sizeof(SBlock) * pBlock->numOfSubBlocks);
}
subBlocks[pBlock->numOfSubBlocks] = block;
supBlock = *pBlock;
supBlock.keyFirst = mInfo.keyFirst;
supBlock.keyLast = mInfo.keyLast;
supBlock.numOfSubBlocks++;
supBlock.numOfRows = pBlock->numOfRows + mInfo.rowsInserted - mInfo.rowsDeleteSucceed;
supBlock.offset = taosArrayGetSize(pCommith->aSubBlk) * sizeof(SBlock);
if (tsdbCommitAddBlock(pCommith, &supBlock, subBlocks, supBlock.numOfSubBlocks) < 0) return -1;
} else {
if (tsdbLoadBlockData(&(pCommith->readh), pBlock, NULL) < 0) return -1;
if (tsdbMergeBlockData(pCommith, pIter, pCommith->readh.pDCols[0], keyLimit, bidx == (nBlocks - 1)) < 0) return -1;
}
return 0;
}
static int tsdbMoveBlock(SCommitH *pCommith, int bidx) {
SBlock *pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
SDFile *pDFile;
SBlock block;
bool isSameFile;
ASSERT(pBlock->numOfSubBlocks > 0);
if (pBlock->last) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isSameFile = pCommith->isLFileSame;
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isSameFile = pCommith->isDFileSame;
}
if (isSameFile) {
if (pBlock->numOfSubBlocks == 1) {
if (tsdbCommitAddBlock(pCommith, pBlock, NULL, 0) < 0) {
return -1;
}
} else {
block = *pBlock;
block.offset = sizeof(SBlock) * taosArrayGetSize(pCommith->aSubBlk);
if (tsdbCommitAddBlock(pCommith, &block, POINTER_SHIFT(pCommith->readh.pBlkInfo, pBlock->offset),
pBlock->numOfSubBlocks) < 0) {
return -1;
}
}
} else {
if (tsdbLoadBlockData(&(pCommith->readh), pBlock, NULL) < 0) return -1;
if (tsdbWriteBlock(pCommith, pDFile, pCommith->readh.pDCols[0], &block, pBlock->last, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) return -1;
}
return 0;
}
static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks) {
if (taosArrayPush(pCommith->aSupBlk, pSupBlock) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (pSubBlocks && taosArrayAddBatch(pCommith->aSubBlk, pSubBlocks, nSubBlocks) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit,
bool isLastOneBlock) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg *pCfg = REPO_CFG(pRepo);
SBlock block;
SDFile * pDFile;
bool isLast;
int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith);
int biter = 0;
while (true) {
tsdbLoadAndMergeFromCache(pCommith->readh.pDCols[0], &biter, pIter, pCommith->pDataCols, keyLimit, defaultRows,
pCfg->update);
if (pCommith->pDataCols->numOfRows == 0) break;
if (isLastOneBlock) {
if (pCommith->pDataCols->numOfRows < pCfg->minRowsPerFileBlock) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isLast = true;
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
}
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, isLast, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) return -1;
}
return 0;
}
static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget,
TSKEY maxKey, int maxRows, int8_t update) {
TSKEY key1 = INT64_MAX;
TSKEY key2 = INT64_MAX;
STSchema *pSchema = NULL;
ASSERT(maxRows > 0 && dataColsKeyLast(pDataCols) <= maxKey);
tdResetDataCols(pTarget);
while (true) {
key1 = (*iter >= pDataCols->numOfRows) ? INT64_MAX : dataColsKeyAt(pDataCols, *iter);
SMemRow row = tsdbNextIterRow(pCommitIter->pIter);
if (row == NULL || memRowKey(row) > maxKey) {
key2 = INT64_MAX;
} else {
key2 = memRowKey(row);
}
if (key1 == INT64_MAX && key2 == INT64_MAX) break;
if (key1 < key2) {
for (int i = 0; i < pDataCols->numOfCols; i++) {
// TODO: dataColAppendVal may fail
dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows,
pTarget->maxPoints);
}
pTarget->numOfRows++;
(*iter)++;
} else if (key1 > key2) {
if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) {
pSchema =
tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row), (int8_t)memRowType(row));
ASSERT(pSchema != NULL);
}
tdAppendMemRowToDataCol(row, pSchema, pTarget, true);
tSkipListIterNext(pCommitIter->pIter);
} else {
if (update != TD_ROW_OVERWRITE_UPDATE) {
// copy disk data
for (int i = 0; i < pDataCols->numOfCols; i++) {
// TODO: dataColAppendVal may fail
dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows,
pTarget->maxPoints);
}
if (update == TD_ROW_DISCARD_UPDATE) pTarget->numOfRows++;
}
if (update != TD_ROW_DISCARD_UPDATE) {
// copy mem data
if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) {
pSchema =
tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row), (int8_t)memRowType(row));
ASSERT(pSchema != NULL);
}
tdAppendMemRowToDataCol(row, pSchema, pTarget, update == TD_ROW_OVERWRITE_UPDATE);
}
(*iter)++;
tSkipListIterNext(pCommitIter->pIter);
}
if (pTarget->numOfRows >= maxRows) break;
}
}
static void tsdbResetCommitFile(SCommitH *pCommith) {
pCommith->isRFileSet = false;
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
taosArrayClear(pCommith->aBlkIdx);
}
static void tsdbResetCommitTable(SCommitH *pCommith) {
taosArrayClear(pCommith->aSubBlk);
taosArrayClear(pCommith->aSupBlk);
pCommith->pTable = NULL;
}
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
SDiskID did;
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
SDFileSet *pWSet = TSDB_COMMIT_WRITE_FSET(pCommith);
tfsAllocDisk(tsdbGetFidLevel(fid, &(pCommith->rtn)), &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
return -1;
}
// Open read FSET
if (pSet) {
if (tsdbSetAndOpenReadFSet(&(pCommith->readh), pSet) < 0) {
return -1;
}
pCommith->isRFileSet = true;
if (tsdbLoadBlockIdx(&(pCommith->readh)) < 0) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is opened to read to commit", REPO_ID(pRepo), TSDB_FSET_FID(pSet),
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
pCommith->isRFileSet = false;
}
// Set and open commit FSET
if (pSet == NULL || did.level > TSDB_FSET_LEVEL(pSet)) {
// Create a new FSET to write data
tsdbInitDFileSet(pWSet, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_LATEST_FSET_VER);
if (tsdbCreateDFileSet(pWSet, true) < 0) {
tsdbError("vgId:%d failed to create FSET %d at level %d disk id %d since %s", REPO_ID(pRepo),
TSDB_FSET_FID(pWSet), TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet), tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
}
return -1;
}
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is created to commit", REPO_ID(pRepo), TSDB_FSET_FID(pWSet),
TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet));
} else {
did.level = TSDB_FSET_LEVEL(pSet);
did.id = TSDB_FSET_ID(pSet);
pCommith->wSet.fid = fid;
pCommith->wSet.state = 0;
pCommith->wSet.ver = TSDB_LATEST_FSET_VER;
// TSDB_FILE_HEAD
SDFile *pWHeadf = TSDB_COMMIT_HEAD_FILE(pCommith);
tsdbInitDFile(pWHeadf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_HEAD);
if (tsdbCreateDFile(pWHeadf, true, TSDB_FILE_HEAD) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWHeadf),
tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
// TSDB_FILE_DATA
SDFile *pRDataf = TSDB_READ_DATA_FILE(&(pCommith->readh));
SDFile *pWDataf = TSDB_COMMIT_DATA_FILE(pCommith);
tsdbInitDFileEx(pWDataf, pRDataf);
if (tsdbOpenDFile(pWDataf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWDataf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
pCommith->isDFileSame = true;
// TSDB_FILE_LAST
SDFile *pRLastf = TSDB_READ_LAST_FILE(&(pCommith->readh));
SDFile *pWLastf = TSDB_COMMIT_LAST_FILE(pCommith);
if (pRLastf->info.size < 32 * 1024) {
tsdbInitDFileEx(pWLastf, pRLastf);
pCommith->isLFileSame = true;
if (tsdbOpenDFile(pWLastf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
} else {
tsdbInitDFile(pWLastf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_LAST);
pCommith->isLFileSame = false;
if (tsdbCreateDFile(pWLastf, true, TSDB_FILE_LAST) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
(void)tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
}
// TSDB_FILE_SMAD
SDFile *pRSmadF = TSDB_READ_SMAD_FILE(&(pCommith->readh));
SDFile *pWSmadF = TSDB_COMMIT_SMAD_FILE(pCommith);
if (access(TSDB_FILE_FULL_NAME(pRSmadF), F_OK) != 0) {
tsdbDebug("vgId:%d create data file %s as not exist", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pRSmadF));
tsdbInitDFile(pWSmadF, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_SMAD);
if (tsdbCreateDFile(pWSmadF, true, TSDB_FILE_SMAD) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWSmadF),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
(void)tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
} else {
tsdbInitDFileEx(pWSmadF, pRSmadF);
if (tsdbOpenDFile(pWSmadF, O_RDWR) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWSmadF),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
}
// TSDB_FILE_SMAL
ASSERT(tsdbGetNFiles(pWSet) >= TSDB_FILE_SMAL);
SDFile *pRSmalF = TSDB_READ_SMAL_FILE(&(pCommith->readh));
SDFile *pWSmalF = TSDB_COMMIT_SMAL_FILE(pCommith);
if ((pCommith->isLFileSame) && access(TSDB_FILE_FULL_NAME(pRSmalF), F_OK) == 0) {
tsdbInitDFileEx(pWSmalF, pRSmalF);
if (tsdbOpenDFile(pWSmalF, O_RDWR) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWSmalF),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
} else {
tsdbDebug("vgId:%d create data file %s as not exist", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pRSmalF));
tsdbInitDFile(pWSmalF, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_SMAL);
if (tsdbCreateDFile(pWSmalF, true, TSDB_FILE_SMAL) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWSmalF),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
(void)tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
}
}
return 0;
}
static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError) {
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
}
if (!hasError) {
TSDB_FSET_FSYNC(TSDB_COMMIT_WRITE_FSET(pCommith));
}
tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith));
}
static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo) {
STsdb * pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg *pCfg = REPO_CFG(pRepo);
int mergeRows = pBlock->numOfRows + pInfo->rowsInserted - pInfo->rowsDeleteSucceed;
ASSERT(mergeRows > 0);
if (pBlock->numOfSubBlocks < TSDB_MAX_SUBBLOCKS && pInfo->nOperations <= pCfg->maxRowsPerFileBlock) {
if (pBlock->last) {
if (pCommith->isLFileSame && mergeRows < pCfg->minRowsPerFileBlock) return true;
} else {
if (pCommith->isDFileSame && mergeRows <= pCfg->maxRowsPerFileBlock) return true;
}
}
return false;
}
int tsdbApplyRtn(STsdb *pRepo) {
SRtn rtn;
SFSIter fsiter;
STsdbFS * pfs = REPO_FS(pRepo);
SDFileSet *pSet;
// Get retention snapshot
tsdbGetRtnSnap(pRepo, &rtn);
tsdbFSIterInit(&fsiter, pfs, TSDB_FS_ITER_FORWARD);
while ((pSet = tsdbFSIterNext(&fsiter))) {
if (pSet->fid < rtn.minFid) {
tsdbInfo("vgId:%d FSET %d at level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
continue;
}
if (tsdbApplyRtnOnFSet(pRepo, pSet, &rtn) < 0) {
return -1;
}
}
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#include "tsdbint.h"
typedef struct {
STable * pTable;
SBlockIdx * pBlkIdx;
SBlockIdx bindex;
SBlockInfo *pInfo;
} STableCompactH;
typedef struct {
SRtn rtn;
SFSIter fsIter;
SArray * tbArray; // table array to cache table obj and block indexes
SReadH readh;
SDFileSet wSet;
SArray * aBlkIdx;
SArray * aSupBlk;
SDataCols *pDataCols;
} SCompactH;
#define TSDB_COMPACT_WSET(pComph) (&((pComph)->wSet))
#define TSDB_COMPACT_REPO(pComph) TSDB_READ_REPO(&((pComph)->readh))
#define TSDB_COMPACT_HEAD_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_HEAD)
#define TSDB_COMPACT_DATA_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_DATA)
#define TSDB_COMPACT_LAST_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_LAST)
#define TSDB_COMPACT_SMAD_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_SMAD)
#define TSDB_COMPACT_SMAL_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_SMAL)
#define TSDB_COMPACT_BUF(pComph) TSDB_READ_BUF(&((pComph)->readh))
#define TSDB_COMPACT_COMP_BUF(pComph) TSDB_READ_COMP_BUF(&((pComph)->readh))
#define TSDB_COMPACT_EXBUF(pComph) TSDB_READ_EXBUF(&((pComph)->readh))
// static int tsdbAsyncCompact(STsdb *pRepo);
static void tsdbStartCompact(STsdb *pRepo);
static void tsdbEndCompact(STsdb *pRepo, int eno);
static int tsdbCompactMeta(STsdb *pRepo);
static int tsdbCompactTSData(STsdb *pRepo);
static int tsdbCompactFSet(SCompactH *pComph, SDFileSet *pSet);
static bool tsdbShouldCompact(SCompactH *pComph);
static int tsdbInitCompactH(SCompactH *pComph, STsdb *pRepo);
static void tsdbDestroyCompactH(SCompactH *pComph);
static int tsdbInitCompTbArray(SCompactH *pComph);
static void tsdbDestroyCompTbArray(SCompactH *pComph);
static int tsdbCacheFSetIndex(SCompactH *pComph);
static int tsdbCompactFSetInit(SCompactH *pComph, SDFileSet *pSet);
static void tsdbCompactFSetEnd(SCompactH *pComph);
static int tsdbCompactFSetImpl(SCompactH *pComph);
static int tsdbWriteBlockToRightFile(SCompactH *pComph, STable *pTable, SDataCols *pDataCols, void **ppBuf,
void **ppCBuf, void **ppExBuf);
enum { TSDB_NO_COMPACT, TSDB_IN_COMPACT, TSDB_WAITING_COMPACT};
// int tsdbCompact(STsdb *pRepo) { return tsdbAsyncCompact(pRepo); }
void *tsdbCompactImpl(STsdb *pRepo) {
// Check if there are files in TSDB FS to compact
if (REPO_FS(pRepo)->cstatus->pmf == NULL) {
pRepo->compactState = TSDB_NO_COMPACT;
tsem_post(&(pRepo->readyToCommit));
tsdbInfo("vgId:%d compact over, no file to compact in FS", REPO_ID(pRepo));
return NULL;
}
tsdbStartCompact(pRepo);
if (tsdbCompactMeta(pRepo) < 0) {
tsdbError("vgId:%d failed to compact META data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
if (tsdbCompactTSData(pRepo) < 0) {
tsdbError("vgId:%d failed to compact TS data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
tsdbEndCompact(pRepo, TSDB_CODE_SUCCESS);
return NULL;
_err:
pRepo->code = terrno;
tsdbEndCompact(pRepo, terrno);
return NULL;
}
// static int tsdbAsyncCompact(STsdb *pRepo) {
// if (pRepo->compactState != TSDB_NO_COMPACT) {
// tsdbInfo("vgId:%d not compact tsdb again ", REPO_ID(pRepo));
// return 0;
// }
// pRepo->compactState = TSDB_WAITING_COMPACT;
// tsem_wait(&(pRepo->readyToCommit));
// return tsdbScheduleCommit(pRepo, COMPACT_REQ);
// }
static void tsdbStartCompact(STsdb *pRepo) {
assert(pRepo->compactState != TSDB_IN_COMPACT);
tsdbInfo("vgId:%d start to compact!", REPO_ID(pRepo));
tsdbStartFSTxn(pRepo, 0, 0);
pRepo->code = TSDB_CODE_SUCCESS;
pRepo->compactState = TSDB_IN_COMPACT;
}
static void tsdbEndCompact(STsdb *pRepo, int eno) {
if (eno != TSDB_CODE_SUCCESS) {
tsdbEndFSTxnWithError(REPO_FS(pRepo));
} else {
tsdbEndFSTxn(pRepo);
}
pRepo->compactState = TSDB_NO_COMPACT;
tsdbInfo("vgId:%d compact over, %s", REPO_ID(pRepo), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed");
tsem_post(&(pRepo->readyToCommit));
}
static int tsdbCompactMeta(STsdb *pRepo) {
STsdbFS *pfs = REPO_FS(pRepo);
tsdbUpdateMFile(pfs, pfs->cstatus->pmf);
return 0;
}
static int tsdbCompactTSData(STsdb *pRepo) {
SCompactH compactH;
SDFileSet *pSet = NULL;
tsdbDebug("vgId:%d start to compact TS data", REPO_ID(pRepo));
// If no file, just return 0;
if (taosArrayGetSize(REPO_FS(pRepo)->cstatus->df) <= 0) {
tsdbDebug("vgId:%d no TS data file to compact, compact over", REPO_ID(pRepo));
return 0;
}
if (tsdbInitCompactH(&compactH, pRepo) < 0) {
return -1;
}
while ((pSet = tsdbFSIterNext(&(compactH.fsIter)))) {
// Remove those expired files
if (pSet->fid < compactH.rtn.minFid) {
tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
continue;
}
if (TSDB_FSET_LEVEL(pSet) == TFS_MAX_LEVEL) {
tsdbDebug("vgId:%d FSET %d on level %d, should not compact", REPO_ID(pRepo), pSet->fid, TFS_MAX_LEVEL);
tsdbUpdateDFileSet(REPO_FS(pRepo), pSet);
continue;
}
if (tsdbCompactFSet(&compactH, pSet) < 0) {
tsdbDestroyCompactH(&compactH);
tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno));
return -1;
}
}
tsdbDestroyCompactH(&compactH);
tsdbDebug("vgId:%d compact TS data over", REPO_ID(pRepo));
return 0;
}
static int tsdbCompactFSet(SCompactH *pComph, SDFileSet *pSet) {
STsdb *pRepo = TSDB_COMPACT_REPO(pComph);
SDiskID did;
tsdbDebug("vgId:%d start to compact FSET %d on level %d id %d", REPO_ID(pRepo), pSet->fid, TSDB_FSET_LEVEL(pSet),
TSDB_FSET_ID(pSet));
if (tsdbCompactFSetInit(pComph, pSet) < 0) {
return -1;
}
if (!tsdbShouldCompact(pComph)) {
tsdbDebug("vgId:%d no need to compact FSET %d", REPO_ID(pRepo), pSet->fid);
if (tsdbApplyRtnOnFSet(TSDB_COMPACT_REPO(pComph), pSet, &(pComph->rtn)) < 0) {
tsdbCompactFSetEnd(pComph);
return -1;
}
} else {
// Create new fset as compacted fset
tfsAllocDisk(tsdbGetFidLevel(pSet->fid, &(pComph->rtn)), &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno));
tsdbCompactFSetEnd(pComph);
return -1;
}
tsdbInitDFileSet(TSDB_COMPACT_WSET(pComph), did, REPO_ID(pRepo), TSDB_FSET_FID(pSet),
FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_LATEST_FSET_VER);
if (tsdbCreateDFileSet(TSDB_COMPACT_WSET(pComph), true) < 0) {
tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno));
tsdbCompactFSetEnd(pComph);
return -1;
}
if (tsdbCompactFSetImpl(pComph) < 0) {
tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph));
tsdbRemoveDFileSet(TSDB_COMPACT_WSET(pComph));
tsdbCompactFSetEnd(pComph);
return -1;
}
tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph));
tsdbUpdateDFileSet(REPO_FS(pRepo), TSDB_COMPACT_WSET(pComph));
tsdbDebug("vgId:%d FSET %d compact over", REPO_ID(pRepo), pSet->fid);
}
tsdbCompactFSetEnd(pComph);
return 0;
}
static bool tsdbShouldCompact(SCompactH *pComph) {
// if (tsdbForceCompactFile) {
// return true;
// }
STsdb * pRepo = TSDB_COMPACT_REPO(pComph);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SReadH * pReadh = &(pComph->readh);
STableCompactH *pTh;
SBlock * pBlock;
int defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock);
SDFile * pDataF = TSDB_READ_DATA_FILE(pReadh);
SDFile * pLastF = TSDB_READ_LAST_FILE(pReadh);
int tblocks = 0; // total blocks
int nSubBlocks = 0; // # of blocks with sub-blocks
int nSmallBlocks = 0; // # of blocks with rows < defaultRows
int64_t tsize = 0;
for (size_t i = 0; i < taosArrayGetSize(pComph->tbArray); i++) {
pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, i);
if (pTh->pTable == NULL || pTh->pBlkIdx == NULL) continue;
for (size_t bidx = 0; bidx < pTh->pBlkIdx->numOfBlocks; bidx++) {
tblocks++;
pBlock = pTh->pInfo->blocks + bidx;
if (pBlock->numOfRows < defaultRows) {
nSmallBlocks++;
}
if (pBlock->numOfSubBlocks > 1) {
nSubBlocks++;
for (int k = 0; k < pBlock->numOfSubBlocks; k++) {
SBlock *iBlock = ((SBlock *)POINTER_SHIFT(pTh->pInfo, pBlock->offset)) + k;
tsize = tsize + iBlock->len;
}
} else if (pBlock->numOfSubBlocks == 1) {
tsize += pBlock->len;
} else {
ASSERT(0);
}
}
}
return (((nSubBlocks * 1.0 / tblocks) > 0.33) || ((nSmallBlocks * 1.0 / tblocks) > 0.33) ||
(tsize * 1.0 / (pDataF->info.size + pLastF->info.size - 2 * TSDB_FILE_HEAD_SIZE) < 0.85));
}
static int tsdbInitCompactH(SCompactH *pComph, STsdb *pRepo) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
memset(pComph, 0, sizeof(*pComph));
TSDB_FSET_SET_CLOSED(TSDB_COMPACT_WSET(pComph));
tsdbGetRtnSnap(pRepo, &(pComph->rtn));
tsdbFSIterInit(&(pComph->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD);
if (tsdbInitReadH(&(pComph->readh), pRepo) < 0) {
return -1;
}
if (tsdbInitCompTbArray(pComph) < 0) {
tsdbDestroyCompactH(pComph);
return -1;
}
pComph->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx));
if (pComph->aBlkIdx == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCompactH(pComph);
return -1;
}
pComph->aSupBlk = taosArrayInit(1024, sizeof(SBlock));
if (pComph->aSupBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCompactH(pComph);
return -1;
}
pComph->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pComph->pDataCols == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCompactH(pComph);
return -1;
}
return 0;
}
static void tsdbDestroyCompactH(SCompactH *pComph) {
pComph->pDataCols = tdFreeDataCols(pComph->pDataCols);
pComph->aSupBlk = taosArrayDestroy(pComph->aSupBlk);
pComph->aBlkIdx = taosArrayDestroy(pComph->aBlkIdx);
tsdbDestroyCompTbArray(pComph);
tsdbDestroyReadH(&(pComph->readh));
tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph));
}
static int tsdbInitCompTbArray(SCompactH *pComph) { // Init pComp->tbArray
STsdb *pRepo = TSDB_COMPACT_REPO(pComph);
STsdbMeta *pMeta = pRepo->tsdbMeta;
if (tsdbRLockRepoMeta(pRepo) < 0) return -1;
pComph->tbArray = taosArrayInit(pMeta->maxTables, sizeof(STableCompactH));
if (pComph->tbArray == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbUnlockRepoMeta(pRepo);
return -1;
}
// Note here must start from 0
for (int i = 0; i < pMeta->maxTables; i++) {
STableCompactH ch = {0};
if (pMeta->tables[i] != NULL) {
tsdbRefTable(pMeta->tables[i]);
ch.pTable = pMeta->tables[i];
}
if (taosArrayPush(pComph->tbArray, &ch) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbUnlockRepoMeta(pRepo);
return -1;
}
}
if (tsdbUnlockRepoMeta(pRepo) < 0) return -1;
return 0;
}
static void tsdbDestroyCompTbArray(SCompactH *pComph) {
STableCompactH *pTh;
if (pComph->tbArray == NULL) return;
for (size_t i = 0; i < taosArrayGetSize(pComph->tbArray); i++) {
pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, i);
if (pTh->pTable) {
tsdbUnRefTable(pTh->pTable);
}
// pTh->pInfo = taosTZfree(pTh->pInfo);
tfree(pTh->pInfo);
}
pComph->tbArray = taosArrayDestroy(pComph->tbArray);
}
static int tsdbCacheFSetIndex(SCompactH *pComph) {
SReadH *pReadH = &(pComph->readh);
if (tsdbLoadBlockIdx(pReadH) < 0) {
return -1;
}
for (int tid = 1; tid < taosArrayGetSize(pComph->tbArray); tid++) {
STableCompactH *pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, tid);
pTh->pBlkIdx = NULL;
if (pTh->pTable == NULL) continue;
if (tsdbSetReadTable(pReadH, pTh->pTable) < 0) {
return -1;
}
if (pReadH->pBlkIdx == NULL) continue;
pTh->bindex = *(pReadH->pBlkIdx);
pTh->pBlkIdx = &(pTh->bindex);
uint32_t originLen = 0;
if (tsdbLoadBlockInfo(pReadH, (void **)(&(pTh->pInfo)), &originLen) < 0) {
return -1;
}
}
return 0;
}
static int tsdbCompactFSetInit(SCompactH *pComph, SDFileSet *pSet) {
taosArrayClear(pComph->aBlkIdx);
taosArrayClear(pComph->aSupBlk);
if (tsdbSetAndOpenReadFSet(&(pComph->readh), pSet) < 0) {
return -1;
}
if (tsdbCacheFSetIndex(pComph) < 0) {
tsdbCloseAndUnsetFSet(&(pComph->readh));
return -1;
}
return 0;
}
static void tsdbCompactFSetEnd(SCompactH *pComph) { tsdbCloseAndUnsetFSet(&(pComph->readh)); }
static int tsdbCompactFSetImpl(SCompactH *pComph) {
STsdb *pRepo = TSDB_COMPACT_REPO(pComph);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SReadH * pReadh = &(pComph->readh);
SBlockIdx blkIdx;
void ** ppBuf = &(TSDB_COMPACT_BUF(pComph));
void ** ppCBuf = &(TSDB_COMPACT_COMP_BUF(pComph));
void ** ppExBuf = &(TSDB_COMPACT_EXBUF(pComph));
int defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock);
taosArrayClear(pComph->aBlkIdx);
for (int tid = 1; tid < taosArrayGetSize(pComph->tbArray); tid++) {
STableCompactH *pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, tid);
STSchema * pSchema;
if (pTh->pTable == NULL || pTh->pBlkIdx == NULL) continue;
pSchema = tsdbGetTableSchemaImpl(pTh->pTable, true, true, -1, -1);
taosArrayClear(pComph->aSupBlk);
if ((tdInitDataCols(pComph->pDataCols, pSchema) < 0) || (tdInitDataCols(pReadh->pDCols[0], pSchema) < 0) ||
(tdInitDataCols(pReadh->pDCols[1], pSchema) < 0)) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tdFreeSchema(pSchema);
return -1;
}
tdFreeSchema(pSchema);
// Loop to compact each block data
for (int i = 0; i < pTh->pBlkIdx->numOfBlocks; i++) {
SBlock *pBlock = pTh->pInfo->blocks + i;
// Load the block data
if (tsdbLoadBlockData(pReadh, pBlock, pTh->pInfo) < 0) {
return -1;
}
// Merge pComph->pDataCols and pReadh->pDCols[0] and write data to file
if (pComph->pDataCols->numOfRows == 0 && pBlock->numOfRows >= defaultRows) {
if (tsdbWriteBlockToRightFile(pComph, pTh->pTable, pReadh->pDCols[0], ppBuf, ppCBuf, ppExBuf) < 0) {
return -1;
}
} else {
int ridx = 0;
while (true) {
if (pReadh->pDCols[0]->numOfRows - ridx == 0) break;
int rowsToMerge = MIN(pReadh->pDCols[0]->numOfRows - ridx, defaultRows - pComph->pDataCols->numOfRows);
tdMergeDataCols(pComph->pDataCols, pReadh->pDCols[0], rowsToMerge, &ridx, pCfg->update != TD_ROW_PARTIAL_UPDATE);
if (pComph->pDataCols->numOfRows < defaultRows) {
break;
}
if (tsdbWriteBlockToRightFile(pComph, pTh->pTable, pComph->pDataCols, ppBuf, ppCBuf, ppExBuf) < 0) {
return -1;
}
tdResetDataCols(pComph->pDataCols);
}
}
}
if (pComph->pDataCols->numOfRows > 0 &&
tsdbWriteBlockToRightFile(pComph, pTh->pTable, pComph->pDataCols, ppBuf, ppCBuf, ppExBuf) < 0) {
return -1;
}
if (tsdbWriteBlockInfoImpl(TSDB_COMPACT_HEAD_FILE(pComph), pTh->pTable, pComph->aSupBlk, NULL, ppBuf, &blkIdx) <
0) {
return -1;
}
if ((blkIdx.numOfBlocks > 0) && (taosArrayPush(pComph->aBlkIdx, (void *)(&blkIdx)) == NULL)) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
}
if (tsdbWriteBlockIdx(TSDB_COMPACT_HEAD_FILE(pComph), pComph->aBlkIdx, ppBuf) < 0) {
return -1;
}
return 0;
}
static int tsdbWriteBlockToRightFile(SCompactH *pComph, STable *pTable, SDataCols *pDataCols, void **ppBuf,
void **ppCBuf, void **ppExBuf) {
STsdb *pRepo = TSDB_COMPACT_REPO(pComph);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SDFile * pDFile;
bool isLast;
SBlock block;
ASSERT(pDataCols->numOfRows > 0);
if (pDataCols->numOfRows < pCfg->minRowsPerFileBlock) {
pDFile = TSDB_COMPACT_LAST_FILE(pComph);
isLast = true;
} else {
pDFile = TSDB_COMPACT_DATA_FILE(pComph);
isLast = false;
}
if (tsdbWriteBlockImpl(pRepo, pTable, pDFile,
isLast ? TSDB_COMPACT_SMAL_FILE(pComph) : TSDB_COMPACT_SMAD_FILE(pComph), pDataCols, &block,
isLast, true, ppBuf, ppCBuf, ppExBuf) < 0) {
return -1;
}
if (taosArrayPush(pComph->aSupBlk, (void *)(&block)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
return 0;
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "tsdbint.h"
#include <regex.h>
typedef enum { TSDB_TXN_TEMP_FILE = 0, TSDB_TXN_CURR_FILE } TSDB_TXN_FILE_T;
static const char *tsdbTxnFname[] = {"current.t", "current"};
#define TSDB_MAX_FSETS(keep, days) ((keep) / (days) + 3)
static int tsdbComparFidFSet(const void *arg1, const void *arg2);
static void tsdbResetFSStatus(SFSStatus *pStatus);
static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid);
static void tsdbApplyFSTxnOnDisk(SFSStatus *pFrom, SFSStatus *pTo);
static void tsdbGetTxnFname(int repoid, TSDB_TXN_FILE_T ftype, char fname[]);
static int tsdbOpenFSFromCurrent(STsdb *pRepo);
static int tsdbScanAndTryFixFS(STsdb *pRepo);
static int tsdbScanRootDir(STsdb *pRepo);
static int tsdbScanDataDir(STsdb *pRepo);
static bool tsdbIsTFileInFS(STsdbFS *pfs, const TFILE *pf);
static int tsdbRestoreCurrent(STsdb *pRepo);
static int tsdbComparTFILE(const void *arg1, const void *arg2);
static void tsdbScanAndTryFixDFilesHeader(STsdb *pRepo, int32_t *nExpired);
static int tsdbProcessExpiredFS(STsdb *pRepo);
static int tsdbCreateMeta(STsdb *pRepo);
static int tsdbFetchTFileSet(STsdb *pRepo, SArray **fArray);
// For backward compatibility
// ================== CURRENT file header info
static int tsdbEncodeFSHeader(void **buf, SFSHeader *pHeader) {
int tlen = 0;
tlen += taosEncodeFixedU32(buf, pHeader->version);
tlen += taosEncodeFixedU32(buf, pHeader->len);
return tlen;
}
static void *tsdbDecodeFSHeader(void *buf, SFSHeader *pHeader) {
buf = taosDecodeFixedU32(buf, &(pHeader->version));
buf = taosDecodeFixedU32(buf, &(pHeader->len));
return buf;
}
// ================== STsdbFSMeta
static int tsdbEncodeFSMeta(void **buf, STsdbFSMeta *pMeta) {
int tlen = 0;
tlen += taosEncodeFixedU32(buf, pMeta->version);
tlen += taosEncodeFixedI64(buf, pMeta->totalPoints);
tlen += taosEncodeFixedI64(buf, pMeta->totalStorage);
return tlen;
}
static void *tsdbDecodeFSMeta(void *buf, STsdbFSMeta *pMeta) {
buf = taosDecodeFixedU32(buf, &(pMeta->version));
buf = taosDecodeFixedI64(buf, &(pMeta->totalPoints));
buf = taosDecodeFixedI64(buf, &(pMeta->totalStorage));
return buf;
}
// ================== SFSStatus
static int tsdbEncodeDFileSetArray(void **buf, SArray *pArray) {
int tlen = 0;
uint64_t nset = taosArrayGetSize(pArray);
tlen += taosEncodeFixedU64(buf, nset);
for (size_t i = 0; i < nset; i++) {
SDFileSet *pSet = taosArrayGet(pArray, i);
tlen += tsdbEncodeDFileSet(buf, pSet);
}
return tlen;
}
static int tsdbDecodeDFileSetArray(void **originBuf, void *buf, SArray *pArray, SFSHeader *pSFSHeader) {
uint64_t nset;
SDFileSet dset;
dset.ver = TSDB_FSET_VER_0; // default value
taosArrayClear(pArray);
buf = taosDecodeFixedU64(buf, &nset);
if (pSFSHeader->version == TSDB_FS_VER_0) {
// record fver in new version of 'current' file
uint64_t extendedSize = pSFSHeader->len + nset * TSDB_FILE_MAX * sizeof(TSDB_FVER_TYPE);
if (taosTSizeof(*originBuf) < extendedSize) {
size_t ptrDistance = POINTER_DISTANCE(buf, *originBuf);
if (tsdbMakeRoom(originBuf, (size_t)extendedSize) < 0) {
terrno = TSDB_CODE_FS_OUT_OF_MEMORY;
return -1;
}
buf = POINTER_SHIFT(*originBuf, ptrDistance);
}
}
for (size_t i = 0; i < nset; i++) {
buf = tsdbDecodeDFileSet(buf, &dset, pSFSHeader->version);
taosArrayPush(pArray, (void *)(&dset));
}
return TSDB_CODE_SUCCESS;
}
static int tsdbEncodeFSStatus(void **buf, SFSStatus *pStatus) {
ASSERT(pStatus->pmf);
int tlen = 0;
tlen += tsdbEncodeSMFile(buf, pStatus->pmf);
tlen += tsdbEncodeDFileSetArray(buf, pStatus->df);
return tlen;
}
static int tsdbDecodeFSStatus(void **originBuf, void *buf, SFSStatus *pStatus, SFSHeader *pSFSHeader) {
tsdbResetFSStatus(pStatus);
pStatus->pmf = &(pStatus->mf);
buf = tsdbDecodeSMFile(buf, pStatus->pmf);
return tsdbDecodeDFileSetArray(originBuf, buf, pStatus->df, pSFSHeader);
}
static SFSStatus *tsdbNewFSStatus(int maxFSet) {
SFSStatus *pStatus = (SFSStatus *)calloc(1, sizeof(*pStatus));
if (pStatus == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
TSDB_FILE_SET_CLOSED(&(pStatus->mf));
pStatus->df = taosArrayInit(maxFSet, sizeof(SDFileSet));
if (pStatus->df == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
free(pStatus);
return NULL;
}
return pStatus;
}
static SFSStatus *tsdbFreeFSStatus(SFSStatus *pStatus) {
if (pStatus) {
pStatus->df = taosArrayDestroy(pStatus->df);
free(pStatus);
}
return NULL;
}
static void tsdbResetFSStatus(SFSStatus *pStatus) {
if (pStatus == NULL) {
return;
}
TSDB_FILE_SET_CLOSED(&(pStatus->mf));
pStatus->pmf = NULL;
taosArrayClear(pStatus->df);
}
static void tsdbSetStatusMFile(SFSStatus *pStatus, const SMFile *pMFile) {
ASSERT(pStatus->pmf == NULL);
pStatus->pmf = &(pStatus->mf);
tsdbInitMFileEx(pStatus->pmf, (SMFile *)pMFile);
}
static int tsdbAddDFileSetToStatus(SFSStatus *pStatus, const SDFileSet *pSet) {
if (taosArrayPush(pStatus->df, (void *)pSet) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
TSDB_FSET_SET_CLOSED(((SDFileSet *)taosArrayGetLast(pStatus->df)));
return 0;
}
// ================== STsdbFS
STsdbFS *tsdbNewFS(STsdbCfg *pCfg) {
int keep = pCfg->keep;
int days = pCfg->daysPerFile;
int maxFSet = TSDB_MAX_FSETS(keep, days);
STsdbFS *pfs;
pfs = (STsdbFS *)calloc(1, sizeof(*pfs));
if (pfs == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
int code = pthread_rwlock_init(&(pfs->lock), NULL);
if (code) {
terrno = TAOS_SYSTEM_ERROR(code);
free(pfs);
return NULL;
}
pfs->cstatus = tsdbNewFSStatus(maxFSet);
if (pfs->cstatus == NULL) {
tsdbFreeFS(pfs);
return NULL;
}
pfs->metaCache = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
if (pfs->metaCache == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbFreeFS(pfs);
return NULL;
}
pfs->intxn = false;
pfs->metaCacheComp = NULL;
pfs->nstatus = tsdbNewFSStatus(maxFSet);
if (pfs->nstatus == NULL) {
tsdbFreeFS(pfs);
return NULL;
}
return pfs;
}
void *tsdbFreeFS(STsdbFS *pfs) {
if (pfs) {
pfs->nstatus = tsdbFreeFSStatus(pfs->nstatus);
taosHashCleanup(pfs->metaCache);
pfs->metaCache = NULL;
pfs->cstatus = tsdbFreeFSStatus(pfs->cstatus);
pthread_rwlock_destroy(&(pfs->lock));
free(pfs);
}
return NULL;
}
static int tsdbProcessExpiredFS(STsdb *pRepo) {
tsdbStartFSTxn(pRepo, 0, 0);
if (tsdbCreateMeta(pRepo) < 0) {
tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
if (tsdbApplyRtn(pRepo) < 0) {
tsdbEndFSTxnWithError(REPO_FS(pRepo));
tsdbError("vgId:%d failed to apply rtn since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
if (tsdbEndFSTxn(pRepo) < 0) {
tsdbError("vgId:%d failed to end fs txn since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
return 0;
}
static int tsdbCreateMeta(STsdb *pRepo) {
STsdbFS *pfs = REPO_FS(pRepo);
SMFile * pOMFile = pfs->cstatus->pmf;
SMFile mf;
SDiskID did;
if (pOMFile != NULL) {
// keep the old meta file
tsdbUpdateMFile(pfs, pOMFile);
return 0;
}
// Create a new meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)));
if (tsdbCreateMFile(&mf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbInfo("vgId:%d meta file %s is created", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf));
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno));
tsdbApplyMFileChange(&mf, pOMFile);
return -1;
}
TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbUpdateMFile(pfs, &mf);
return 0;
}
int tsdbOpenFS(STsdb *pRepo) {
STsdbFS *pfs = REPO_FS(pRepo);
char current[TSDB_FILENAME_LEN] = "\0";
int nExpired = 0;
ASSERT(pfs != NULL);
tsdbGetTxnFname(REPO_ID(pRepo), TSDB_TXN_CURR_FILE, current);
tsdbGetRtnSnap(pRepo, &pRepo->rtn);
if (access(current, F_OK) == 0) {
if (tsdbOpenFSFromCurrent(pRepo) < 0) {
tsdbError("vgId:%d failed to open FS since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbScanAndTryFixDFilesHeader(pRepo, &nExpired);
if (nExpired > 0) {
tsdbProcessExpiredFS(pRepo);
}
} else {
// should skip expired fileset inside of the function
if (tsdbRestoreCurrent(pRepo) < 0) {
tsdbError("vgId:%d failed to restore current file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
}
if (tsdbScanAndTryFixFS(pRepo) < 0) {
tsdbError("vgId:%d failed to scan and fix FS since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
// Load meta cache if has meta file
if ((!(pRepo->state & TSDB_STATE_BAD_META)) && tsdbLoadMetaCache(pRepo, true) < 0) {
tsdbError("vgId:%d failed to open FS while loading meta cache since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
return 0;
}
void tsdbCloseFS(STsdb *pRepo) {
// Do nothing
}
// Start a new transaction to modify the file system
void tsdbStartFSTxn(STsdb *pRepo, int64_t pointsAdd, int64_t storageAdd) {
STsdbFS *pfs = REPO_FS(pRepo);
ASSERT(pfs->intxn == false);
pfs->intxn = true;
tsdbResetFSStatus(pfs->nstatus);
pfs->nstatus->meta = pfs->cstatus->meta;
if (pfs->cstatus->pmf == NULL) {
pfs->nstatus->meta.version = 0;
} else {
pfs->nstatus->meta.version = pfs->cstatus->meta.version + 1;
}
pfs->nstatus->meta.totalPoints = pfs->cstatus->meta.totalPoints + pointsAdd;
pfs->nstatus->meta.totalStorage = pfs->cstatus->meta.totalStorage += storageAdd;
}
void tsdbUpdateFSTxnMeta(STsdbFS *pfs, STsdbFSMeta *pMeta) { pfs->nstatus->meta = *pMeta; }
int tsdbEndFSTxn(STsdb *pRepo) {
STsdbFS *pfs = REPO_FS(pRepo);
ASSERT(FS_IN_TXN(pfs));
SFSStatus *pStatus;
// Write current file system snapshot
if (tsdbSaveFSStatus(pfs->nstatus, REPO_ID(pRepo)) < 0) {
tsdbEndFSTxnWithError(pfs);
return -1;
}
// Make new
tsdbWLockFS(pfs);
pStatus = pfs->cstatus;
pfs->cstatus = pfs->nstatus;
pfs->nstatus = pStatus;
tsdbUnLockFS(pfs);
// Apply actual change to each file and SDFileSet
tsdbApplyFSTxnOnDisk(pfs->nstatus, pfs->cstatus);
pfs->intxn = false;
return 0;
}
int tsdbEndFSTxnWithError(STsdbFS *pfs) {
tsdbApplyFSTxnOnDisk(pfs->nstatus, pfs->cstatus);
// TODO: if mf change, reload pfs->metaCache
pfs->intxn = false;
return 0;
}
void tsdbUpdateMFile(STsdbFS *pfs, const SMFile *pMFile) { tsdbSetStatusMFile(pfs->nstatus, pMFile); }
int tsdbUpdateDFileSet(STsdbFS *pfs, const SDFileSet *pSet) { return tsdbAddDFileSetToStatus(pfs->nstatus, pSet); }
static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid) {
SFSHeader fsheader;
void * pBuf = NULL;
void * ptr;
char hbuf[TSDB_FILE_HEAD_SIZE] = "\0";
char tfname[TSDB_FILENAME_LEN] = "\0";
char cfname[TSDB_FILENAME_LEN] = "\0";
tsdbGetTxnFname(vid, TSDB_TXN_TEMP_FILE, tfname);
tsdbGetTxnFname(vid, TSDB_TXN_CURR_FILE, cfname);
int fd = open(tfname, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755);
if (fd < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
fsheader.version = TSDB_LATEST_SFS_VER;
if (pStatus->pmf == NULL) {
ASSERT(taosArrayGetSize(pStatus->df) == 0);
fsheader.len = 0;
} else {
fsheader.len = tsdbEncodeFSStatus(NULL, pStatus) + sizeof(TSCKSUM);
}
// Encode header part and write
ptr = hbuf;
tsdbEncodeFSHeader(&ptr, &fsheader);
tsdbEncodeFSMeta(&ptr, &(pStatus->meta));
taosCalcChecksumAppend(0, (uint8_t *)hbuf, TSDB_FILE_HEAD_SIZE);
if (taosWriteFile(fd, hbuf, TSDB_FILE_HEAD_SIZE) < TSDB_FILE_HEAD_SIZE) {
terrno = TAOS_SYSTEM_ERROR(errno);
close(fd);
remove(tfname);
return -1;
}
// Encode file status and write to file
if (fsheader.len > 0) {
if (tsdbMakeRoom(&(pBuf), fsheader.len) < 0) {
close(fd);
remove(tfname);
return -1;
}
ptr = pBuf;
tsdbEncodeFSStatus(&ptr, pStatus);
taosCalcChecksumAppend(0, (uint8_t *)pBuf, fsheader.len);
if (taosWriteFile(fd, pBuf, fsheader.len) < fsheader.len) {
terrno = TAOS_SYSTEM_ERROR(errno);
close(fd);
(void)remove(tfname);
taosTZfree(pBuf);
return -1;
}
}
// fsync, close and rename
if (taosFsyncFile(fd) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
close(fd);
remove(tfname);
taosTZfree(pBuf);
return -1;
}
(void)close(fd);
(void)taosRenameFile(tfname, cfname);
taosTZfree(pBuf);
return 0;
}
static void tsdbApplyFSTxnOnDisk(SFSStatus *pFrom, SFSStatus *pTo) {
int ifrom = 0;
int ito = 0;
size_t sizeFrom, sizeTo;
SDFileSet *pSetFrom;
SDFileSet *pSetTo;
sizeFrom = taosArrayGetSize(pFrom->df);
sizeTo = taosArrayGetSize(pTo->df);
// Apply meta file change
(void)tsdbApplyMFileChange(pFrom->pmf, pTo->pmf);
// Apply SDFileSet change
if (ifrom >= sizeFrom) {
pSetFrom = NULL;
} else {
pSetFrom = taosArrayGet(pFrom->df, ifrom);
}
if (ito >= sizeTo) {
pSetTo = NULL;
} else {
pSetTo = taosArrayGet(pTo->df, ito);
}
while (true) {
if ((pSetTo == NULL) && (pSetFrom == NULL)) break;
if (pSetTo == NULL || (pSetFrom && pSetFrom->fid < pSetTo->fid)) {
tsdbApplyDFileSetChange(pSetFrom, NULL);
ifrom++;
if (ifrom >= sizeFrom) {
pSetFrom = NULL;
} else {
pSetFrom = taosArrayGet(pFrom->df, ifrom);
}
} else if (pSetFrom == NULL || pSetFrom->fid > pSetTo->fid) {
// Do nothing
ito++;
if (ito >= sizeTo) {
pSetTo = NULL;
} else {
pSetTo = taosArrayGet(pTo->df, ito);
}
} else {
tsdbApplyDFileSetChange(pSetFrom, pSetTo);
ifrom++;
if (ifrom >= sizeFrom) {
pSetFrom = NULL;
} else {
pSetFrom = taosArrayGet(pFrom->df, ifrom);
}
ito++;
if (ito >= sizeTo) {
pSetTo = NULL;
} else {
pSetTo = taosArrayGet(pTo->df, ito);
}
}
}
}
// ================== SFSIter
// ASSUMPTIONS: the FS Should be read locked when calling these functions
void tsdbFSIterInit(SFSIter *pIter, STsdbFS *pfs, int direction) {
pIter->pfs = pfs;
pIter->direction = direction;
size_t size = taosArrayGetSize(pfs->cstatus->df);
pIter->version = pfs->cstatus->meta.version;
if (size == 0) {
pIter->index = -1;
pIter->fid = TSDB_IVLD_FID;
} else {
if (direction == TSDB_FS_ITER_FORWARD) {
pIter->index = 0;
} else {
pIter->index = (int)(size - 1);
}
pIter->fid = ((SDFileSet *)taosArrayGet(pfs->cstatus->df, pIter->index))->fid;
}
}
void tsdbFSIterSeek(SFSIter *pIter, int fid) {
STsdbFS *pfs = pIter->pfs;
size_t size = taosArrayGetSize(pfs->cstatus->df);
int flags;
if (pIter->direction == TSDB_FS_ITER_FORWARD) {
flags = TD_GE;
} else {
flags = TD_LE;
}
void *ptr = taosbsearch(&fid, pfs->cstatus->df->pData, size, sizeof(SDFileSet), tsdbComparFidFSet, flags);
if (ptr == NULL) {
pIter->index = -1;
pIter->fid = TSDB_IVLD_FID;
} else {
pIter->index = (int)(TARRAY_ELEM_IDX(pfs->cstatus->df, ptr));
pIter->fid = ((SDFileSet *)ptr)->fid;
}
}
SDFileSet *tsdbFSIterNext(SFSIter *pIter) {
STsdbFS * pfs = pIter->pfs;
SDFileSet *pSet;
if (pIter->index < 0) {
ASSERT(pIter->fid == TSDB_IVLD_FID);
return NULL;
}
ASSERT(pIter->fid != TSDB_IVLD_FID);
if (pIter->version != pfs->cstatus->meta.version) {
pIter->version = pfs->cstatus->meta.version;
tsdbFSIterSeek(pIter, pIter->fid);
}
if (pIter->index < 0) {
return NULL;
}
pSet = (SDFileSet *)taosArrayGet(pfs->cstatus->df, pIter->index);
ASSERT(pSet->fid == pIter->fid);
if (pIter->direction == TSDB_FS_ITER_FORWARD) {
pIter->index++;
if (pIter->index >= taosArrayGetSize(pfs->cstatus->df)) {
pIter->index = -1;
}
} else {
pIter->index--;
}
if (pIter->index >= 0) {
pIter->fid = ((SDFileSet *)taosArrayGet(pfs->cstatus->df, pIter->index))->fid;
} else {
pIter->fid = TSDB_IVLD_FID;
}
return pSet;
}
static int tsdbComparFidFSet(const void *arg1, const void *arg2) {
int fid = *(int *)arg1;
SDFileSet *pSet = (SDFileSet *)arg2;
if (fid < pSet->fid) {
return -1;
} else if (fid == pSet->fid) {
return 0;
} else {
return 1;
}
}
static void tsdbGetTxnFname(int repoid, TSDB_TXN_FILE_T ftype, char fname[]) {
snprintf(fname, TSDB_FILENAME_LEN, "%s/vnode/vnode%d/tsdb/%s", TFS_PRIMARY_PATH(), repoid, tsdbTxnFname[ftype]);
}
static int tsdbOpenFSFromCurrent(STsdb *pRepo) {
STsdbFS * pfs = REPO_FS(pRepo);
int fd = -1;
void * buffer = NULL;
SFSHeader fsheader;
char current[TSDB_FILENAME_LEN] = "\0";
void * ptr;
tsdbGetTxnFname(REPO_ID(pRepo), TSDB_TXN_CURR_FILE, current);
// current file exists, try to recover
fd = open(current, O_RDONLY | O_BINARY);
if (fd < 0) {
tsdbError("vgId:%d failed to open file %s since %s", REPO_ID(pRepo), current, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
if (tsdbMakeRoom(&buffer, TSDB_FILE_HEAD_SIZE) < 0) {
goto _err;
}
int nread = (int)taosReadFile(fd, buffer, TSDB_FILE_HEAD_SIZE);
if (nread < 0) {
tsdbError("vgId:%d failed to read %d bytes from file %s since %s", REPO_ID(pRepo), TSDB_FILENAME_LEN, current,
strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
if (nread < TSDB_FILE_HEAD_SIZE) {
tsdbError("vgId:%d failed to read header of file %s, read bytes:%d", REPO_ID(pRepo), current, nread);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
goto _err;
}
if (!taosCheckChecksumWhole((uint8_t *)buffer, TSDB_FILE_HEAD_SIZE)) {
tsdbError("vgId:%d header of file %s failed checksum check", REPO_ID(pRepo), current);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
goto _err;
}
SFSStatus *pStatus = pfs->cstatus;
ptr = buffer;
ptr = tsdbDecodeFSHeader(ptr, &fsheader);
ptr = tsdbDecodeFSMeta(ptr, &(pStatus->meta));
if (fsheader.version != TSDB_FS_VER_0) {
// TODO: handle file version change
}
if (fsheader.len > 0) {
if (tsdbMakeRoom(&buffer, fsheader.len) < 0) {
goto _err;
}
nread = (int)taosReadFile(fd, buffer, fsheader.len);
if (nread < 0) {
tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), current, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
if (nread < fsheader.len) {
tsdbError("vgId:%d failed to read %d bytes from file %s", REPO_ID(pRepo), fsheader.len, current);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
goto _err;
}
if (!taosCheckChecksumWhole((uint8_t *)buffer, fsheader.len)) {
tsdbError("vgId:%d file %s is corrupted since wrong checksum", REPO_ID(pRepo), current);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
goto _err;
}
ptr = buffer;
if (tsdbDecodeFSStatus(&buffer, ptr, pStatus, &fsheader) < 0) {
goto _err;
}
} else {
tsdbResetFSStatus(pStatus);
}
taosTZfree(buffer);
close(fd);
return 0;
_err:
if (fd >= 0) {
close(fd);
}
taosTZfree(buffer);
return -1;
}
// Scan and try to fix incorrect files
static int tsdbScanAndTryFixFS(STsdb *pRepo) {
STsdbFS * pfs = REPO_FS(pRepo);
SFSStatus *pStatus = pfs->cstatus;
if (tsdbScanAndTryFixMFile(pRepo) < 0) {
tsdbError("vgId:%d failed to fix MFile since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
size_t size = taosArrayGetSize(pStatus->df);
for (size_t i = 0; i < size; i++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pStatus->df, i);
if (tsdbScanAndTryFixDFileSet(pRepo, pSet) < 0) {
tsdbError("vgId:%d failed to fix DFileSet since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
}
// remove those unused files
tsdbScanRootDir(pRepo);
tsdbScanDataDir(pRepo);
return 0;
}
int tsdbLoadMetaCache(STsdb *pRepo, bool recoverMeta) {
char tbuf[128];
STsdbFS * pfs = REPO_FS(pRepo);
SMFile mf;
SMFile * pMFile = &mf;
void * pBuf = NULL;
SKVRecord rInfo;
int64_t maxBufSize = 0;
SMFInfo minfo;
taosHashClear(pfs->metaCache);
// No meta file, just return
if (pfs->cstatus->pmf == NULL) return 0;
mf = pfs->cstatus->mf;
// Load cache first
if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) {
return -1;
}
if (tsdbLoadMFileHeader(pMFile, &minfo) < 0) {
tsdbCloseMFile(pMFile);
return -1;
}
while (true) {
int64_t tsize = tsdbReadMFile(pMFile, tbuf, sizeof(SKVRecord));
if (tsize == 0) break;
if (tsize < 0) {
tsdbError("vgId:%d failed to read META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
if (tsize < sizeof(SKVRecord)) {
tsdbError("vgId:%d failed to read %" PRIzu " bytes from file %s", REPO_ID(pRepo), sizeof(SKVRecord),
TSDB_FILE_FULL_NAME(pMFile));
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbCloseMFile(pMFile);
return -1;
}
void *ptr = tsdbDecodeKVRecord(tbuf, &rInfo);
ASSERT(POINTER_DISTANCE(ptr, tbuf) == sizeof(SKVRecord));
// ASSERT((rInfo.offset > 0) ? (pStore->info.size == rInfo.offset) : true);
if (rInfo.offset < 0) {
taosHashRemove(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid));
#if 0
pStore->info.size += sizeof(SKVRecord);
pStore->info.nRecords--;
pStore->info.nDels++;
pStore->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2);
#endif
} else {
ASSERT(rInfo.offset > 0 && rInfo.size > 0);
if (taosHashPut(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid), &rInfo, sizeof(rInfo)) < 0) {
tsdbError("vgId:%d failed to load meta cache from file %s since OOM", REPO_ID(pRepo),
TSDB_FILE_FULL_NAME(pMFile));
terrno = TSDB_CODE_OUT_OF_MEMORY;
tsdbCloseMFile(pMFile);
return -1;
}
maxBufSize = MAX(maxBufSize, rInfo.size);
if (tsdbSeekMFile(pMFile, rInfo.size, SEEK_CUR) < 0) {
tsdbError("vgId:%d failed to lseek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
tsdbCloseMFile(pMFile);
return -1;
}
#if 0
pStore->info.size += (sizeof(SKVRecord) + rInfo.size);
pStore->info.nRecords++;
#endif
}
}
if (recoverMeta) {
pBuf = malloc((size_t)maxBufSize);
if (pBuf == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbCloseMFile(pMFile);
return -1;
}
SKVRecord *pRecord = taosHashIterate(pfs->metaCache, NULL);
while (pRecord) {
if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) {
tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
tfree(pBuf);
tsdbCloseMFile(pMFile);
return -1;
}
int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size);
if (nread < 0) {
tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
tfree(pBuf);
tsdbCloseMFile(pMFile);
return -1;
}
if (nread < pRecord->size) {
tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d",
REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tfree(pBuf);
tsdbCloseMFile(pMFile);
return -1;
}
if (tsdbRestoreTable(pRepo, pBuf, (int)pRecord->size) < 0) {
tsdbError("vgId:%d failed to restore table, uid %" PRId64 ", since %s" PRIu64, REPO_ID(pRepo), pRecord->uid,
tstrerror(terrno));
tfree(pBuf);
tsdbCloseMFile(pMFile);
return -1;
}
pRecord = taosHashIterate(pfs->metaCache, pRecord);
}
tsdbOrgMeta(pRepo);
}
tsdbCloseMFile(pMFile);
tfree(pBuf);
return 0;
}
static int tsdbScanRootDir(STsdb *pRepo) {
char rootDir[TSDB_FILENAME_LEN];
char bname[TSDB_FILENAME_LEN];
STsdbFS * pfs = REPO_FS(pRepo);
const TFILE *pf;
tsdbGetRootDir(REPO_ID(pRepo), rootDir);
TDIR *tdir = tfsOpendir(rootDir);
if (tdir == NULL) {
tsdbError("vgId:%d failed to open directory %s since %s", REPO_ID(pRepo), rootDir, tstrerror(terrno));
return -1;
}
while ((pf = tfsReaddir(tdir))) {
tfsbasename(pf, bname);
if (strcmp(bname, tsdbTxnFname[TSDB_TXN_CURR_FILE]) == 0 || strcmp(bname, "data") == 0) {
// Skip current file and data directory
continue;
}
if (pfs->cstatus->pmf && tfsIsSameFile(pf, &(pfs->cstatus->pmf->f))) {
continue;
}
(void)tfsremove(pf);
tsdbDebug("vgId:%d invalid file %s is removed", REPO_ID(pRepo), TFILE_NAME(pf));
}
tfsClosedir(tdir);
return 0;
}
static int tsdbScanDataDir(STsdb *pRepo) {
char dataDir[TSDB_FILENAME_LEN];
char bname[TSDB_FILENAME_LEN];
STsdbFS * pfs = REPO_FS(pRepo);
const TFILE *pf;
tsdbGetDataDir(REPO_ID(pRepo), dataDir);
TDIR *tdir = tfsOpendir(dataDir);
if (tdir == NULL) {
tsdbError("vgId:%d failed to open directory %s since %s", REPO_ID(pRepo), dataDir, tstrerror(terrno));
return -1;
}
while ((pf = tfsReaddir(tdir))) {
tfsbasename(pf, bname);
if (!tsdbIsTFileInFS(pfs, pf)) {
(void)tfsremove(pf);
tsdbDebug("vgId:%d invalid file %s is removed", REPO_ID(pRepo), TFILE_NAME(pf));
}
}
tfsClosedir(tdir);
return 0;
}
static bool tsdbIsTFileInFS(STsdbFS *pfs, const TFILE *pf) {
SFSIter fsiter;
tsdbFSIterInit(&fsiter, pfs, TSDB_FS_ITER_FORWARD);
SDFileSet *pSet;
while ((pSet = tsdbFSIterNext(&fsiter))) {
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
SDFile *pDFile = TSDB_DFILE_IN_SET(pSet, ftype);
if (tfsIsSameFile(pf, TSDB_FILE_F(pDFile))) {
return true;
}
}
}
return false;
}
static int tsdbRestoreMeta(STsdb *pRepo) {
char rootDir[TSDB_FILENAME_LEN];
char bname[TSDB_FILENAME_LEN];
TDIR * tdir = NULL;
const TFILE *pf = NULL;
const char * pattern = "^meta(-ver[0-9]+)?$";
regex_t regex;
STsdbFS * pfs = REPO_FS(pRepo);
regcomp(&regex, pattern, REG_EXTENDED);
tsdbInfo("vgId:%d try to restore meta", REPO_ID(pRepo));
tsdbGetRootDir(REPO_ID(pRepo), rootDir);
tdir = tfsOpendir(rootDir);
if (tdir == NULL) {
tsdbError("vgId:%d failed to open dir %s since %s", REPO_ID(pRepo), rootDir, tstrerror(terrno));
regfree(&regex);
return -1;
}
while ((pf = tfsReaddir(tdir))) {
tfsbasename(pf, bname);
if (strcmp(bname, "data") == 0) {
// Skip the data/ directory
continue;
}
if (strcmp(bname, tsdbTxnFname[TSDB_TXN_TEMP_FILE]) == 0) {
// Skip current.t file
tsdbInfo("vgId:%d file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf));
(void)tfsremove(pf);
continue;
}
int code = regexec(&regex, bname, 0, NULL, 0);
if (code == 0) {
// Match
if (pfs->cstatus->pmf != NULL) {
tsdbError("vgId:%d failed to restore meta since two file exists, file1 %s and file2 %s", REPO_ID(pRepo),
TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), TFILE_NAME(pf));
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tfsClosedir(tdir);
regfree(&regex);
return -1;
} else {
uint32_t _version = 0;
if (strcmp(bname, "meta") != 0) {
sscanf(bname, "meta-ver%" PRIu32, &_version);
pfs->cstatus->meta.version = _version;
}
pfs->cstatus->pmf = &(pfs->cstatus->mf);
pfs->cstatus->pmf->f = *pf;
TSDB_FILE_SET_CLOSED(pfs->cstatus->pmf);
if (tsdbOpenMFile(pfs->cstatus->pmf, O_RDONLY) < 0) {
tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno));
tfsClosedir(tdir);
regfree(&regex);
return -1;
}
if (tsdbLoadMFileHeader(pfs->cstatus->pmf, &(pfs->cstatus->pmf->info)) < 0) {
tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbCloseMFile(pfs->cstatus->pmf);
tfsClosedir(tdir);
regfree(&regex);
return -1;
}
if (tsdbForceKeepFile) {
struct stat tfstat;
// Get real file size
if (fstat(pfs->cstatus->pmf->fd, &tfstat) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
tsdbCloseMFile(pfs->cstatus->pmf);
tfsClosedir(tdir);
regfree(&regex);
return -1;
}
if (pfs->cstatus->pmf->info.size != tfstat.st_size) {
int64_t tfsize = pfs->cstatus->pmf->info.size;
pfs->cstatus->pmf->info.size = tfstat.st_size;
tsdbInfo("vgId:%d file %s header size is changed from %" PRId64 " to %" PRId64, REPO_ID(pRepo),
TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), tfsize, pfs->cstatus->pmf->info.size);
}
}
tsdbCloseMFile(pfs->cstatus->pmf);
}
} else if (code == REG_NOMATCH) {
// Not match
tsdbInfo("vgId:%d invalid file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf));
tfsremove(pf);
continue;
} else {
// Has other error
tsdbError("vgId:%d failed to restore meta file while run regexec since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
tfsClosedir(tdir);
regfree(&regex);
return -1;
}
}
if (pfs->cstatus->pmf) {
tsdbInfo("vgId:%d meta file %s is restored", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pfs->cstatus->pmf));
} else {
tsdbInfo("vgId:%d no meta file is restored", REPO_ID(pRepo));
}
tfsClosedir(tdir);
regfree(&regex);
return 0;
}
static int tsdbFetchTFileSet(STsdb *pRepo, SArray **fArray) {
char dataDir[TSDB_FILENAME_LEN];
char bname[TSDB_FILENAME_LEN];
TDIR * tdir = NULL;
const TFILE *pf = NULL;
const char * pattern = "^v[0-9]+f[0-9]+\\.(head|data|last|smad|smal)(-ver[0-9]+)?$";
regex_t regex;
tsdbGetDataDir(REPO_ID(pRepo), dataDir);
// Resource allocation and init
regcomp(&regex, pattern, REG_EXTENDED);
*fArray = taosArrayInit(1024, sizeof(TFILE));
if (*fArray == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to fetch TFileSet while open directory %s since %s", REPO_ID(pRepo), dataDir,
tstrerror(terrno));
regfree(&regex);
return -1;
}
tdir = tfsOpendir(dataDir);
if (tdir == NULL) {
tsdbError("vgId:%d failed to fetch TFileSet while open directory %s since %s", REPO_ID(pRepo), dataDir,
tstrerror(terrno));
taosArrayDestroy(*fArray);
regfree(&regex);
return -1;
}
while ((pf = tfsReaddir(tdir))) {
tfsbasename(pf, bname);
int code = regexec(&regex, bname, 0, NULL, 0);
if (code == 0) {
if (taosArrayPush(*fArray, (void *)pf) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tfsClosedir(tdir);
taosArrayDestroy(*fArray);
regfree(&regex);
return -1;
}
} else if (code == REG_NOMATCH) {
// Not match
tsdbInfo("vgId:%d invalid file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf));
(void)tfsremove(pf);
continue;
} else {
// Has other error
tsdbError("vgId:%d failed to fetch TFileSet Array while run regexec since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
tfsClosedir(tdir);
taosArrayDestroy(*fArray);
regfree(&regex);
return -1;
}
}
tfsClosedir(tdir);
regfree(&regex);
// Sort the array according to file name
taosArraySort(*fArray, tsdbComparTFILE);
return 0;
}
// update the function if the DFileSet definition updates
static bool tsdbIsDFileSetValid(int nFiles) {
switch (nFiles) {
case TSDB_FILE_MIN:
case TSDB_FILE_MAX:
return true;
default:
return false;
}
}
static int tsdbRestoreDFileSet(STsdb *pRepo) {
const TFILE *pf = NULL;
SArray * fArray = NULL;
STsdbFS * pfs = REPO_FS(pRepo);
char dataDir[TSDB_FILENAME_LEN] = "\0";
size_t fArraySize = 0;
tsdbGetDataDir(REPO_ID(pRepo), dataDir);
if (tsdbFetchTFileSet(pRepo, &fArray) < 0) {
tsdbError("vgId:%d failed to fetch TFileSet from %s to restore since %s", REPO_ID(pRepo), dataDir,
tstrerror(terrno));
return -1;
}
if ((fArraySize = taosArrayGetSize(fArray)) <= 0) {
taosArrayDestroy(fArray);
tsdbInfo("vgId:%d size of DFileSet from %s is %" PRIu32, REPO_ID(pRepo), dataDir, (uint32_t)fArraySize);
return 0;
}
// Loop to recover each file set
SDFileSet fset = {0};
uint8_t nDFiles = 0;
bool isOneFSetFinish = true;
int lastFType = -1;
// one fileset ends when (1) the array ends or (2) encounter different fid
for (size_t index = 0; index < fArraySize; ++index) {
int tvid = -1, tfid = -1;
TSDB_FILE_T ttype = TSDB_FILE_MAX;
uint32_t tversion = -1;
char bname[TSDB_FILENAME_LEN] = "\0";
pf = taosArrayGet(fArray, index);
tfsbasename(pf, bname);
tsdbParseDFilename(bname, &tvid, &tfid, &ttype, &tversion);
ASSERT(tvid == REPO_ID(pRepo));
SDFile *pDFile = TSDB_DFILE_IN_SET(&fset, ttype);
if (tfid < pRepo->rtn.minFid) { // skip the file expired
continue;
}
if ((isOneFSetFinish == false) && (lastFType == ttype)) { // only fetch the 1st file with same fid and type.
continue;
}
lastFType = ttype;
if (index == 0) {
memset(&fset, 0, sizeof(SDFileSet));
TSDB_FSET_SET_CLOSED(&fset);
nDFiles = 1;
fset.fid = tfid;
pDFile->f = *pf;
isOneFSetFinish = false;
} else {
if (fset.fid == tfid) {
++nDFiles;
pDFile->f = *pf;
// (1) the array ends
if (index == fArraySize - 1) {
if (tsdbIsDFileSetValid(nDFiles)) {
tsdbInfo("vgId:%d DFileSet %d is fetched, nDFiles=%" PRIu8, REPO_ID(pRepo), fset.fid, nDFiles);
isOneFSetFinish = true;
} else {
// return error in case of removing uncomplete DFileSets
// terrno = TSDB_CODE_TDB_INCOMPLETE_DFILESET;
tsdbError("vgId:%d incomplete DFileSet, fid:%d, nDFiles=%" PRIu8, REPO_ID(pRepo), fset.fid, nDFiles);
taosArrayDestroy(fArray);
return -1;
}
}
} else {
// (2) encounter different fid
if (tsdbIsDFileSetValid(nDFiles)) {
tsdbInfo("vgId:%d DFileSet %d is fetched, nDFiles=%" PRIu8, REPO_ID(pRepo), fset.fid, nDFiles);
isOneFSetFinish = true;
} else {
// return error in case of removing uncomplete DFileSets
// terrno = TSDB_CODE_TDB_INCOMPLETE_DFILESET;
tsdbError("vgId:%d incomplete DFileSet, fid:%d, nDFiles=%" PRIu8, REPO_ID(pRepo), fset.fid, nDFiles);
taosArrayDestroy(fArray);
return -1;
#if 0
// next FSet
memset(&fset, 0, sizeof(SDFileSet));
TSDB_FSET_SET_CLOSED(&fset);
nDFiles = 1;
fset.fid = tfid;
pDFile->f = *pf;
isOneFSetFinish = false;
continue;
#endif
}
}
}
if (isOneFSetFinish) {
for (TSDB_FILE_T ftype = 0; ftype < nDFiles; ++ftype) {
SDFile * pDFile1 = TSDB_DFILE_IN_SET(&fset, ftype);
if (tsdbOpenDFile(pDFile1, O_RDONLY) < 0) {
tsdbError("vgId:%d failed to open DFile %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile1),
tstrerror(terrno));
taosArrayDestroy(fArray);
return -1;
}
if (tsdbLoadDFileHeader(pDFile1, &(pDFile1->info)) < 0) {
tsdbError("vgId:%d failed to load DFile %s header since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile1),
tstrerror(terrno));
taosArrayDestroy(fArray);
return -1;
}
if (tsdbForceKeepFile) {
struct stat tfstat;
// Get real file size
if (fstat(pDFile1->fd, &tfstat) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
taosArrayDestroy(fArray);
return -1;
}
if (pDFile1->info.size != tfstat.st_size) {
int64_t tfsize = pDFile1->info.size;
pDFile1->info.size = tfstat.st_size;
tsdbInfo("vgId:%d file %s header size is changed from %" PRId64 " to %" PRId64, REPO_ID(pRepo),
TSDB_FILE_FULL_NAME(pDFile1), tfsize, pDFile1->info.size);
}
}
tsdbCloseDFile(pDFile1);
}
tsdbInfo("vgId:%d FSET %d is restored", REPO_ID(pRepo), fset.fid);
// TODO: update the logic when TSDB_FSET_VER definition update.
if (nDFiles == TSDB_FILE_MIN) {
fset.ver = TSDB_FSET_VER_0;
} else {
fset.ver = TSDB_LATEST_FSET_VER;
}
taosArrayPush(pfs->cstatus->df, &fset);
// next FSet
memset(&fset, 0, sizeof(SDFileSet));
TSDB_FSET_SET_CLOSED(&fset);
nDFiles = 1;
fset.fid = tfid;
pDFile->f = *pf;
isOneFSetFinish = false;
}
}
// Resource release
taosArrayDestroy(fArray);
return 0;
}
static int tsdbRestoreCurrent(STsdb *pRepo) {
// Loop to recover mfile
if (tsdbRestoreMeta(pRepo) < 0) {
tsdbError("vgId:%d failed to restore current since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
// Loop to recover dfile set
if (tsdbRestoreDFileSet(pRepo) < 0) {
tsdbError("vgId:%d failed to restore DFileSet since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
if (tsdbSaveFSStatus(pRepo->fs->cstatus, REPO_ID(pRepo)) < 0) {
tsdbError("vgId:%d failed to restore corrent since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
return 0;
}
static int tsdbComparTFILE(const void *arg1, const void *arg2) {
TFILE *pf1 = (TFILE *)arg1;
TFILE *pf2 = (TFILE *)arg2;
int vid1, fid1, vid2, fid2;
TSDB_FILE_T ftype1, ftype2;
uint32_t version1, version2;
char bname1[TSDB_FILENAME_LEN];
char bname2[TSDB_FILENAME_LEN];
tfsbasename(pf1, bname1);
tfsbasename(pf2, bname2);
tsdbParseDFilename(bname1, &vid1, &fid1, &ftype1, &version1);
tsdbParseDFilename(bname2, &vid2, &fid2, &ftype2, &version2);
if (fid1 < fid2) {
return -1;
} else if (fid1 > fid2) {
return 1;
} else {
if (ftype1 < ftype2) {
return -1;
} else if (ftype1 > ftype2) {
return 1;
} else {
if (version1 < version2) {
return -1;
} else if (version1 > version2) {
return 1;
} else {
return 0;
}
}
}
}
static void tsdbScanAndTryFixDFilesHeader(STsdb *pRepo, int32_t *nExpired) {
STsdbFS * pfs = REPO_FS(pRepo);
SFSStatus *pStatus = pfs->cstatus;
SDFInfo info;
for (size_t i = 0; i < taosArrayGetSize(pStatus->df); i++) {
SDFileSet fset;
tsdbInitDFileSetEx(&fset, (SDFileSet *)taosArrayGet(pStatus->df, i));
if (fset.fid < pRepo->rtn.minFid) {
++*nExpired;
}
tsdbDebug("vgId:%d scan DFileSet %d header", REPO_ID(pRepo), fset.fid);
if (tsdbOpenDFileSet(&fset, O_RDWR) < 0) {
tsdbError("vgId:%d failed to open DFileSet %d since %s, continue", REPO_ID(pRepo), fset.fid, tstrerror(terrno));
continue;
}
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(&fset); ftype++) {
SDFile *pDFile = TSDB_DFILE_IN_SET(&fset, ftype);
if ((tsdbLoadDFileHeader(pDFile, &info) < 0) || pDFile->info.size != info.size ||
pDFile->info.magic != info.magic) {
if (tsdbUpdateDFileHeader(pDFile) < 0) {
tsdbError("vgId:%d failed to update DFile header of %s since %s, continue", REPO_ID(pRepo),
TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno));
} else {
tsdbInfo("vgId:%d DFile header of %s is updated", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile));
TSDB_FILE_FSYNC(pDFile);
}
} else {
tsdbDebug("vgId:%d DFile header of %s is correct", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile));
}
}
tsdbCloseDFileSet(&fset);
}
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbint.h"
static const char *TSDB_FNAME_SUFFIX[] = {
"head", // TSDB_FILE_HEAD
"data", // TSDB_FILE_DATA
"last", // TSDB_FILE_LAST
"smad", // TSDB_FILE_SMA_DATA(Small Materialized Aggregate for .data File)
"smal", // TSDB_FILE_SMA_LAST(Small Materialized Aggregate for .last File)
"", // TSDB_FILE_MAX
"meta", // TSDB_FILE_META
};
static void tsdbGetFilename(int vid, int fid, uint32_t ver, TSDB_FILE_T ftype, char *fname);
static int tsdbRollBackMFile(SMFile *pMFile);
static int tsdbEncodeDFInfo(void **buf, SDFInfo *pInfo);
static void *tsdbDecodeDFInfo(void *buf, SDFInfo *pInfo, TSDB_FVER_TYPE sfver);
static int tsdbRollBackDFile(SDFile *pDFile);
// ============== SMFile
void tsdbInitMFile(SMFile *pMFile, SDiskID did, int vid, uint32_t ver) {
char fname[TSDB_FILENAME_LEN];
TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_OK);
memset(&(pMFile->info), 0, sizeof(pMFile->info));
pMFile->info.magic = TSDB_FILE_INIT_MAGIC;
tsdbGetFilename(vid, 0, ver, TSDB_FILE_META, fname);
tfsInitFile(TSDB_FILE_F(pMFile), did.level, did.id, fname);
}
void tsdbInitMFileEx(SMFile *pMFile, const SMFile *pOMFile) {
*pMFile = *pOMFile;
TSDB_FILE_SET_CLOSED(pMFile);
}
int tsdbEncodeSMFile(void **buf, SMFile *pMFile) {
int tlen = 0;
tlen += tsdbEncodeMFInfo(buf, &(pMFile->info));
tlen += tfsEncodeFile(buf, &(pMFile->f));
return tlen;
}
void *tsdbDecodeSMFile(void *buf, SMFile *pMFile) {
buf = tsdbDecodeMFInfo(buf, &(pMFile->info));
buf = tfsDecodeFile(buf, &(pMFile->f));
TSDB_FILE_SET_CLOSED(pMFile);
return buf;
}
int tsdbEncodeSMFileEx(void **buf, SMFile *pMFile) {
int tlen = 0;
tlen += tsdbEncodeMFInfo(buf, &(pMFile->info));
tlen += taosEncodeString(buf, TSDB_FILE_FULL_NAME(pMFile));
return tlen;
}
void *tsdbDecodeSMFileEx(void *buf, SMFile *pMFile) {
char *aname;
buf = tsdbDecodeMFInfo(buf, &(pMFile->info));
buf = taosDecodeString(buf, &aname);
tstrncpy(TSDB_FILE_FULL_NAME(pMFile), aname, TSDB_FILENAME_LEN);
TSDB_FILE_SET_CLOSED(pMFile);
tfree(aname);
return buf;
}
int tsdbApplyMFileChange(SMFile *from, SMFile *to) {
if (from == NULL && to == NULL) return 0;
if (from != NULL) {
if (to == NULL) {
return tsdbRemoveMFile(from);
} else {
if (tfsIsSameFile(TSDB_FILE_F(from), TSDB_FILE_F(to))) {
if (from->info.size > to->info.size) {
tsdbRollBackMFile(to);
}
} else {
return tsdbRemoveMFile(from);
}
}
}
return 0;
}
int tsdbCreateMFile(SMFile *pMFile, bool updateHeader) {
ASSERT(pMFile->info.size == 0 && pMFile->info.magic == TSDB_FILE_INIT_MAGIC);
pMFile->fd = open(TSDB_FILE_FULL_NAME(pMFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755);
if (pMFile->fd < 0) {
if (errno == ENOENT) {
// Try to create directory recursively
char *s = strdup(TFILE_REL_NAME(&(pMFile->f)));
if (tfsMkdirRecurAt(dirname(s), TSDB_FILE_LEVEL(pMFile), TSDB_FILE_ID(pMFile)) < 0) {
tfree(s);
return -1;
}
tfree(s);
pMFile->fd = open(TSDB_FILE_FULL_NAME(pMFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755);
if (pMFile->fd < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
} else {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
}
if (!updateHeader) {
return 0;
}
pMFile->info.size += TSDB_FILE_HEAD_SIZE;
if (tsdbUpdateMFileHeader(pMFile) < 0) {
tsdbCloseMFile(pMFile);
tsdbRemoveMFile(pMFile);
return -1;
}
return 0;
}
int tsdbUpdateMFileHeader(SMFile *pMFile) {
char buf[TSDB_FILE_HEAD_SIZE] = "\0";
if (tsdbSeekMFile(pMFile, 0, SEEK_SET) < 0) {
return -1;
}
void *ptr = buf;
tsdbEncodeMFInfo(&ptr, TSDB_FILE_INFO(pMFile));
taosCalcChecksumAppend(0, (uint8_t *)buf, TSDB_FILE_HEAD_SIZE);
if (tsdbWriteMFile(pMFile, buf, TSDB_FILE_HEAD_SIZE) < 0) {
return -1;
}
return 0;
}
int tsdbLoadMFileHeader(SMFile *pMFile, SMFInfo *pInfo) {
char buf[TSDB_FILE_HEAD_SIZE] = "\0";
ASSERT(TSDB_FILE_OPENED(pMFile));
if (tsdbSeekMFile(pMFile, 0, SEEK_SET) < 0) {
return -1;
}
if (tsdbReadMFile(pMFile, buf, TSDB_FILE_HEAD_SIZE) < 0) {
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)buf, TSDB_FILE_HEAD_SIZE)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1;
}
tsdbDecodeMFInfo(buf, pInfo);
return 0;
}
int tsdbScanAndTryFixMFile(STsdb *pRepo) {
SMFile * pMFile = pRepo->fs->cstatus->pmf;
struct stat mfstat;
SMFile mf;
if (pMFile == NULL) {
// No meta file, no need to scan
return 0;
}
tsdbInitMFileEx(&mf, pMFile);
if (access(TSDB_FILE_FULL_NAME(pMFile), F_OK) != 0) {
tsdbError("vgId:%d meta file %s not exist, report to upper layer to fix it", REPO_ID(pRepo),
TSDB_FILE_FULL_NAME(pMFile));
pRepo->state |= TSDB_STATE_BAD_META;
TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_BAD);
return 0;
}
if (stat(TSDB_FILE_FULL_NAME(&mf), &mfstat) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
if (pMFile->info.size < mfstat.st_size) {
if (tsdbOpenMFile(&mf, O_WRONLY) < 0) {
return -1;
}
if (taosFtruncateFile(mf.fd, mf.info.size) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
tsdbCloseMFile(&mf);
return -1;
}
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbCloseMFile(&mf);
return -1;
}
tsdbCloseMFile(&mf);
tsdbInfo("vgId:%d file %s is truncated from %" PRId64 " to %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
mfstat.st_size, pMFile->info.size);
} else if (pMFile->info.size > mfstat.st_size) {
tsdbError("vgId:%d meta file %s has wrong size %" PRId64 " expected %" PRId64 ", report to upper layer to fix it",
REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), mfstat.st_size, pMFile->info.size);
pRepo->state |= TSDB_STATE_BAD_META;
TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_BAD);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return 0;
} else {
tsdbDebug("vgId:%d meta file %s passes the scan", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile));
}
return 0;
}
int tsdbEncodeMFInfo(void **buf, SMFInfo *pInfo) {
int tlen = 0;
tlen += taosEncodeVariantI64(buf, pInfo->size);
tlen += taosEncodeVariantI64(buf, pInfo->tombSize);
tlen += taosEncodeVariantI64(buf, pInfo->nRecords);
tlen += taosEncodeVariantI64(buf, pInfo->nDels);
tlen += taosEncodeFixedU32(buf, pInfo->magic);
return tlen;
}
void *tsdbDecodeMFInfo(void *buf, SMFInfo *pInfo) {
buf = taosDecodeVariantI64(buf, &(pInfo->size));
buf = taosDecodeVariantI64(buf, &(pInfo->tombSize));
buf = taosDecodeVariantI64(buf, &(pInfo->nRecords));
buf = taosDecodeVariantI64(buf, &(pInfo->nDels));
buf = taosDecodeFixedU32(buf, &(pInfo->magic));
return buf;
}
static int tsdbRollBackMFile(SMFile *pMFile) {
SMFile mf;
tsdbInitMFileEx(&mf, pMFile);
if (tsdbOpenMFile(&mf, O_WRONLY) < 0) {
return -1;
}
if (taosFtruncateFile(TSDB_FILE_FD(&mf), pMFile->info.size) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
tsdbCloseMFile(&mf);
return -1;
}
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbCloseMFile(&mf);
return -1;
}
TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
return 0;
}
// ============== Operations on SDFile
void tsdbInitDFile(SDFile *pDFile, SDiskID did, int vid, int fid, uint32_t ver, TSDB_FILE_T ftype) {
char fname[TSDB_FILENAME_LEN];
TSDB_FILE_SET_STATE(pDFile, TSDB_FILE_STATE_OK);
TSDB_FILE_SET_CLOSED(pDFile);
memset(&(pDFile->info), 0, sizeof(pDFile->info));
pDFile->info.magic = TSDB_FILE_INIT_MAGIC;
pDFile->info.fver = tsdbGetDFSVersion(ftype);
tsdbGetFilename(vid, fid, ver, ftype, fname);
tfsInitFile(&(pDFile->f), did.level, did.id, fname);
}
void tsdbInitDFileEx(SDFile *pDFile, SDFile *pODFile) {
*pDFile = *pODFile;
TSDB_FILE_SET_CLOSED(pDFile);
}
int tsdbEncodeSDFile(void **buf, SDFile *pDFile) {
int tlen = 0;
tlen += tsdbEncodeDFInfo(buf, &(pDFile->info));
tlen += tfsEncodeFile(buf, &(pDFile->f));
return tlen;
}
void *tsdbDecodeSDFile(void *buf, SDFile *pDFile, uint32_t sfver) {
buf = tsdbDecodeDFInfo(buf, &(pDFile->info), sfver);
buf = tfsDecodeFile(buf, &(pDFile->f));
TSDB_FILE_SET_CLOSED(pDFile);
return buf;
}
static int tsdbEncodeSDFileEx(void **buf, SDFile *pDFile) {
int tlen = 0;
tlen += tsdbEncodeDFInfo(buf, &(pDFile->info));
tlen += taosEncodeString(buf, TSDB_FILE_FULL_NAME(pDFile));
return tlen;
}
static void *tsdbDecodeSDFileEx(void *buf, SDFile *pDFile) {
char *aname;
// The sync module would send DFileSet with latest verion.
buf = tsdbDecodeDFInfo(buf, &(pDFile->info), TSDB_LATEST_SFS_VER);
buf = taosDecodeString(buf, &aname);
tstrncpy(TSDB_FILE_FULL_NAME(pDFile), aname, TSDB_FILENAME_LEN);
TSDB_FILE_SET_CLOSED(pDFile);
tfree(aname);
return buf;
}
int tsdbCreateDFile(SDFile *pDFile, bool updateHeader, TSDB_FILE_T fType) {
ASSERT(pDFile->info.size == 0 && pDFile->info.magic == TSDB_FILE_INIT_MAGIC);
pDFile->fd = open(TSDB_FILE_FULL_NAME(pDFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755);
if (pDFile->fd < 0) {
if (errno == ENOENT) {
// Try to create directory recursively
char *s = strdup(TFILE_REL_NAME(&(pDFile->f)));
if (tfsMkdirRecurAt(dirname(s), TSDB_FILE_LEVEL(pDFile), TSDB_FILE_ID(pDFile)) < 0) {
tfree(s);
return -1;
}
tfree(s);
pDFile->fd = open(TSDB_FILE_FULL_NAME(pDFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755);
if (pDFile->fd < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
} else {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
}
if (!updateHeader) {
return 0;
}
pDFile->info.size += TSDB_FILE_HEAD_SIZE;
pDFile->info.fver = tsdbGetDFSVersion(fType);
if (tsdbUpdateDFileHeader(pDFile) < 0) {
tsdbCloseDFile(pDFile);
tsdbRemoveDFile(pDFile);
return -1;
}
return 0;
}
int tsdbUpdateDFileHeader(SDFile *pDFile) {
char buf[TSDB_FILE_HEAD_SIZE] = "\0";
if (tsdbSeekDFile(pDFile, 0, SEEK_SET) < 0) {
return -1;
}
void *ptr = buf;
tsdbEncodeDFInfo(&ptr, &(pDFile->info));
taosCalcChecksumAppend(0, (uint8_t *)buf, TSDB_FILE_HEAD_SIZE);
if (tsdbWriteDFile(pDFile, buf, TSDB_FILE_HEAD_SIZE) < 0) {
return -1;
}
return 0;
}
int tsdbLoadDFileHeader(SDFile *pDFile, SDFInfo *pInfo) {
char buf[TSDB_FILE_HEAD_SIZE] = "\0";
// uint32_t _version;
ASSERT(TSDB_FILE_OPENED(pDFile));
if (tsdbSeekDFile(pDFile, 0, SEEK_SET) < 0) {
return -1;
}
if (tsdbReadDFile(pDFile, buf, TSDB_FILE_HEAD_SIZE) < 0) {
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)buf, TSDB_FILE_HEAD_SIZE)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1;
}
void *pBuf = buf;
pBuf = tsdbDecodeDFInfo(pBuf, pInfo, TSDB_LATEST_FVER); // only make sure the parameter sfver > 0
return 0;
}
static int tsdbScanAndTryFixDFile(STsdb *pRepo, SDFile *pDFile) {
struct stat dfstat;
SDFile df;
tsdbInitDFileEx(&df, pDFile);
if (access(TSDB_FILE_FULL_NAME(pDFile), F_OK) != 0) {
tsdbError("vgId:%d data file %s not exist, report to upper layer to fix it", REPO_ID(pRepo),
TSDB_FILE_FULL_NAME(pDFile));
pRepo->state |= TSDB_STATE_BAD_DATA;
TSDB_FILE_SET_STATE(pDFile, TSDB_FILE_STATE_BAD);
return 0;
}
if (stat(TSDB_FILE_FULL_NAME(&df), &dfstat) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
if (pDFile->info.size < dfstat.st_size) {
if (tsdbOpenDFile(&df, O_WRONLY) < 0) {
return -1;
}
if (taosFtruncateFile(df.fd, df.info.size) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
tsdbCloseDFile(&df);
return -1;
}
if (tsdbUpdateDFileHeader(&df) < 0) {
tsdbCloseDFile(&df);
return -1;
}
tsdbCloseDFile(&df);
tsdbInfo("vgId:%d file %s is truncated from %" PRId64 " to %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile),
dfstat.st_size, pDFile->info.size);
} else if (pDFile->info.size > dfstat.st_size) {
tsdbError("vgId:%d data file %s has wrong size %" PRId64 " expected %" PRId64 ", report to upper layer to fix it",
REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile), dfstat.st_size, pDFile->info.size);
pRepo->state |= TSDB_STATE_BAD_DATA;
TSDB_FILE_SET_STATE(pDFile, TSDB_FILE_STATE_BAD);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return 0;
} else {
tsdbDebug("vgId:%d file %s passes the scan", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile));
}
return 0;
}
static int tsdbEncodeDFInfo(void **buf, SDFInfo *pInfo) {
int tlen = 0;
tlen += taosEncodeFixedU32(buf, pInfo->fver);
tlen += taosEncodeFixedU32(buf, pInfo->magic);
tlen += taosEncodeFixedU32(buf, pInfo->len);
tlen += taosEncodeFixedU32(buf, pInfo->totalBlocks);
tlen += taosEncodeFixedU32(buf, pInfo->totalSubBlocks);
tlen += taosEncodeFixedU32(buf, pInfo->offset);
tlen += taosEncodeFixedU64(buf, pInfo->size);
tlen += taosEncodeFixedU64(buf, pInfo->tombSize);
return tlen;
}
static void *tsdbDecodeDFInfo(void *buf, SDFInfo *pInfo, TSDB_FVER_TYPE sfver) {
if (sfver > TSDB_FS_VER_0) {
buf = taosDecodeFixedU32(buf, &(pInfo->fver));
} else {
pInfo->fver = TSDB_FS_VER_0; // default value
}
buf = taosDecodeFixedU32(buf, &(pInfo->magic));
buf = taosDecodeFixedU32(buf, &(pInfo->len));
buf = taosDecodeFixedU32(buf, &(pInfo->totalBlocks));
buf = taosDecodeFixedU32(buf, &(pInfo->totalSubBlocks));
buf = taosDecodeFixedU32(buf, &(pInfo->offset));
buf = taosDecodeFixedU64(buf, &(pInfo->size));
buf = taosDecodeFixedU64(buf, &(pInfo->tombSize));
return buf;
}
static int tsdbApplyDFileChange(SDFile *from, SDFile *to) {
ASSERT(from != NULL || to != NULL);
if (from != NULL) {
if (to == NULL) {
tsdbRemoveDFile(from);
} else {
if (tfsIsSameFile(TSDB_FILE_F(from), TSDB_FILE_F(to))) {
if (from->info.size > to->info.size) {
tsdbRollBackDFile(to);
}
} else {
(void)tsdbRemoveDFile(from);
}
}
}
return 0;
}
static int tsdbRollBackDFile(SDFile *pDFile) {
SDFile df = *pDFile;
if (tsdbOpenDFile(&df, O_WRONLY) < 0) {
return -1;
}
if (taosFtruncateFile(TSDB_FILE_FD(&df), pDFile->info.size) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
tsdbCloseDFile(&df);
return -1;
}
if (tsdbUpdateDFileHeader(&df) < 0) {
tsdbCloseDFile(&df);
return -1;
}
TSDB_FILE_FSYNC(&df);
tsdbCloseDFile(&df);
return 0;
}
// ============== Operations on SDFileSet
void tsdbInitDFileSet(SDFileSet *pSet, SDiskID did, int vid, int fid, uint32_t ver, uint16_t fsetVer) {
pSet->fid = fid;
pSet->state = 0;
pSet->ver = fsetVer;
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
SDFile *pDFile = TSDB_DFILE_IN_SET(pSet, ftype);
tsdbInitDFile(pDFile, did, vid, fid, ver, ftype);
}
}
void tsdbInitDFileSetEx(SDFileSet *pSet, SDFileSet *pOSet) {
ASSERT_TSDB_FSET_NFILES_VALID(pOSet);
pSet->fid = pOSet->fid;
pSet->ver = pOSet->ver;
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
tsdbInitDFileEx(TSDB_DFILE_IN_SET(pSet, ftype), TSDB_DFILE_IN_SET(pOSet, ftype));
}
}
int tsdbEncodeDFileSet(void **buf, SDFileSet *pSet) {
int tlen = 0;
tlen += taosEncodeFixedI32(buf, pSet->fid);
tlen += taosEncodeFixedU16(buf, pSet->ver);
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
tlen += tsdbEncodeSDFile(buf, TSDB_DFILE_IN_SET(pSet, ftype));
}
return tlen;
}
void *tsdbDecodeDFileSet(void *buf, SDFileSet *pSet, uint32_t sfver) {
int32_t fid;
buf = taosDecodeFixedI32(buf, &(fid));
pSet->state = 0;
pSet->fid = fid;
if (sfver > TSDB_FS_VER_0) {
buf = taosDecodeFixedU16(buf, &(pSet->ver));
}
ASSERT_TSDB_FSET_NFILES_VALID(pSet);
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
buf = tsdbDecodeSDFile(buf, TSDB_DFILE_IN_SET(pSet, ftype), sfver);
}
return buf;
}
int tsdbEncodeDFileSetEx(void **buf, SDFileSet *pSet) {
int tlen = 0;
tlen += taosEncodeFixedI32(buf, pSet->fid);
tlen += taosEncodeFixedU16(buf, pSet->ver);
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
tlen += tsdbEncodeSDFileEx(buf, TSDB_DFILE_IN_SET(pSet, ftype));
}
return tlen;
}
void *tsdbDecodeDFileSetEx(void *buf, SDFileSet *pSet) {
int32_t fid;
buf = taosDecodeFixedI32(buf, &(fid));
buf = taosDecodeFixedU16(buf, &(pSet->ver));
pSet->fid = fid;
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
buf = tsdbDecodeSDFileEx(buf, TSDB_DFILE_IN_SET(pSet, ftype));
}
return buf;
}
int tsdbApplyDFileSetChange(SDFileSet *from, SDFileSet *to) {
uint8_t nFilesFrom = from ? tsdbGetNFiles(from) : 0;
uint8_t nFilesTo = to ? tsdbGetNFiles(to) : 0;
for (TSDB_FILE_T ftype = 0; ftype < MAX(nFilesFrom, nFilesTo); ftype++) {
SDFile *pDFileFrom = ftype < nFilesFrom ? TSDB_DFILE_IN_SET(from, ftype) : NULL;
SDFile *pDFileTo = ftype < nFilesTo ? TSDB_DFILE_IN_SET(to, ftype) : NULL;
if (tsdbApplyDFileChange(pDFileFrom, pDFileTo) < 0) {
return -1;
}
}
return 0;
}
int tsdbCreateDFileSet(SDFileSet *pSet, bool updateHeader) {
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
if (tsdbCreateDFile(TSDB_DFILE_IN_SET(pSet, ftype), updateHeader, ftype) < 0) {
tsdbCloseDFileSet(pSet);
tsdbRemoveDFileSet(pSet);
return -1;
}
}
return 0;
}
int tsdbUpdateDFileSetHeader(SDFileSet *pSet) {
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
if (tsdbUpdateDFileHeader(TSDB_DFILE_IN_SET(pSet, ftype)) < 0) {
return -1;
}
}
return 0;
}
int tsdbScanAndTryFixDFileSet(STsdb *pRepo, SDFileSet *pSet) {
ASSERT_TSDB_FSET_NFILES_VALID(pSet);
for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) {
if (tsdbScanAndTryFixDFile(pRepo, TSDB_DFILE_IN_SET(pSet, ftype)) < 0) {
return -1;
}
}
return 0;
}
int tsdbParseDFilename(const char *fname, int *vid, int *fid, TSDB_FILE_T *ftype, uint32_t *_version) {
char *p = NULL;
*_version = 0;
*ftype = TSDB_FILE_MAX;
sscanf(fname, "v%df%d.%m[a-z]-ver%" PRIu32, vid, fid, &p, _version);
for (TSDB_FILE_T i = 0; i < TSDB_FILE_MAX; i++) {
if (strcmp(p, TSDB_FNAME_SUFFIX[i]) == 0) {
*ftype = i;
break;
}
}
tfree(p);
return 0;
}
static void tsdbGetFilename(int vid, int fid, uint32_t ver, TSDB_FILE_T ftype, char *fname) {
ASSERT(ftype != TSDB_FILE_MAX);
if (ftype < TSDB_FILE_MAX) {
if (ver == 0) {
snprintf(fname, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data/v%df%d.%s", vid, vid, fid, TSDB_FNAME_SUFFIX[ftype]);
} else {
snprintf(fname, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data/v%df%d.%s-ver%" PRIu32, vid, vid, fid,
TSDB_FNAME_SUFFIX[ftype], ver);
}
} else {
if (ver == 0) {
snprintf(fname, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/%s", vid, TSDB_FNAME_SUFFIX[ftype]);
} else {
snprintf(fname, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/%s-ver%" PRIu32, vid, TSDB_FNAME_SUFFIX[ftype], ver);
}
}
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// no test file errors here
#include "taosdef.h"
#include "tsdbint.h"
#include "tthread.h"
#include "ttimer.h"
#define IS_VALID_PRECISION(precision) \
(((precision) >= TSDB_TIME_PRECISION_MILLI) && ((precision) <= TSDB_TIME_PRECISION_NANO))
#define TSDB_DEFAULT_COMPRESSION TWO_STAGE_COMP
#define IS_VALID_COMPRESSION(compression) (((compression) >= NO_COMPRESSION) && ((compression) <= TWO_STAGE_COMP))
static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg);
static STsdb * tsdbNewRepo(STsdbCfg *pCfg, STsdbAppH *pAppH);
static void tsdbFreeRepo(STsdb *pRepo);
static void tsdbStartStream(STsdb *pRepo);
static void tsdbStopStream(STsdb *pRepo);
static int tsdbRestoreLastColumns(STsdb *pRepo, STable *pTable, SReadH *pReadh);
static int tsdbRestoreLastRow(STsdb *pRepo, STable *pTable, SReadH *pReadh, SBlockIdx *pIdx);
STsdb *tsdbOpen(const char *path, STsdbCfg *pCfg) {
STsdb * pTsdb;
STsdbCfg config = *pCfg;
terrno = TSDB_CODE_SUCCESS;
// Check and set default configurations
if (tsdbCheckAndSetDefaultCfg(&config) < 0) {
tsdbError("vgId:%d failed to open TSDB repository since %s", config.tsdbId, tstrerror(terrno));
return NULL;
}
// Create new TSDB object
if ((pTsdb = tsdbNewRepo(&config, pAppH)) == NULL) {
tsdbError("vgId:%d failed to open TSDB repository while creating TSDB object since %s", config.tsdbId,
tstrerror(terrno));
return NULL;
}
// Open meta
if (tsdbOpenMeta(pTsdb) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while opening Meta since %s", config.tsdbId, tstrerror(terrno));
tsdbClose(pTsdb, false);
return NULL;
}
if (tsdbOpenFS(pTsdb) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while opening FS since %s", config.tsdbId, tstrerror(terrno));
tsdbClose(pTsdb, false);
return NULL;
}
// TODO: Restore information from data
if ((!(pTsdb->state & TSDB_STATE_BAD_DATA)) && tsdbRestoreInfo(pTsdb) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while restore info since %s", config.tsdbId, tstrerror(terrno));
tsdbClose(pTsdb, false);
return NULL;
}
pTsdb->mergeBuf = NULL;
tsdbStartStream(pTsdb);
tsdbDebug("vgId:%d, TSDB repository opened", REPO_ID(pTsdb));
return pTsdb;
}
// Note: all working thread and query thread must stopped when calling this function
int tsdbClose(STsdb *repo, int toCommit) {
if (repo == NULL) return 0;
STsdb *pRepo = repo;
int vgId = REPO_ID(pRepo);
terrno = TSDB_CODE_SUCCESS;
tsdbStopStream(pRepo);
if (pRepo->pthread) {
taosDestoryThread(pRepo->pthread);
pRepo->pthread = NULL;
}
if (toCommit) {
tsdbSyncCommit(repo);
}
tsem_wait(&(pRepo->readyToCommit));
tsdbUnRefMemTable(pRepo, pRepo->mem);
tsdbUnRefMemTable(pRepo, pRepo->imem);
pRepo->mem = NULL;
pRepo->imem = NULL;
tsdbCloseFS(pRepo);
tsdbCloseMeta(pRepo);
tsdbFreeRepo(pRepo);
tsdbDebug("vgId:%d repository is closed", vgId);
if (terrno != TSDB_CODE_SUCCESS) {
return -1;
} else {
return 0;
}
}
STsdbCfg *tsdbGetCfg(const STsdb *repo) {
ASSERT(repo != NULL);
return &((STsdb *)repo)->config;
}
int tsdbLockRepo(STsdb *pRepo) {
int code = pthread_mutex_lock(&pRepo->mutex);
if (code != 0) {
tsdbError("vgId:%d failed to lock tsdb since %s", REPO_ID(pRepo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
pRepo->repoLocked = true;
return 0;
}
int tsdbUnlockRepo(STsdb *pRepo) {
ASSERT(IS_REPO_LOCKED(pRepo));
pRepo->repoLocked = false;
int code = pthread_mutex_unlock(&pRepo->mutex);
if (code != 0) {
tsdbError("vgId:%d failed to unlock tsdb since %s", REPO_ID(pRepo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
// int tsdbCheckWal(STsdbRepo *pRepo, uint32_t walSize) { // MB
// STsdbCfg *pCfg = &(pRepo->config);
// if ((walSize > tsdbWalFlushSize) && (walSize > (pCfg->totalBlocks / 2 * pCfg->cacheBlockSize))) {
// if (tsdbAsyncCommit(pRepo) < 0) return -1;
// }
// return 0;
// }
// int tsdbCheckCommit(STsdb *pRepo) {
// ASSERT(pRepo->mem != NULL);
// STsdbCfg *pCfg = &(pRepo->config);
// STsdbBufBlock *pBufBlock = tsdbGetCurrBufBlock(pRepo);
// ASSERT(pBufBlock != NULL);
// if ((pRepo->mem->extraBuffList != NULL) ||
// ((listNEles(pRepo->mem->bufBlockList) >= pCfg->totalBlocks / 3) && (pBufBlock->remain < TSDB_BUFFER_RESERVE)))
// {
// // trigger commit
// if (tsdbAsyncCommit(pRepo) < 0) return -1;
// }
// return 0;
// }
STsdbMeta *tsdbGetMeta(STsdb *pRepo) { return pRepo->tsdbMeta; }
STsdbRepoInfo *tsdbGetStatus(STsdb *pRepo) { return NULL; }
int tsdbGetState(STsdb *repo) { return repo->state; }
int8_t tsdbGetCompactState(STsdb *repo) { return (int8_t)(repo->compactState); }
void tsdbReportStat(void *repo, int64_t *totalPoints, int64_t *totalStorage, int64_t *compStorage) {
ASSERT(repo != NULL);
STsdb *pRepo = repo;
*totalPoints = pRepo->stat.pointsWritten;
*totalStorage = pRepo->stat.totalStorage;
*compStorage = pRepo->stat.compStorage;
}
int32_t tsdbConfigRepo(STsdb *repo, STsdbCfg *pCfg) {
// TODO: think about multithread cases
if (tsdbCheckAndSetDefaultCfg(pCfg) < 0) return -1;
STsdbCfg *pRCfg = &repo->config;
ASSERT(pRCfg->tsdbId == pCfg->tsdbId);
ASSERT(pRCfg->cacheBlockSize == pCfg->cacheBlockSize);
ASSERT(pRCfg->daysPerFile == pCfg->daysPerFile);
ASSERT(pRCfg->minRowsPerFileBlock == pCfg->minRowsPerFileBlock);
ASSERT(pRCfg->maxRowsPerFileBlock == pCfg->maxRowsPerFileBlock);
ASSERT(pRCfg->precision == pCfg->precision);
bool configChanged = false;
if (pRCfg->compression != pCfg->compression) {
configChanged = true;
}
if (pRCfg->keep != pCfg->keep) {
configChanged = true;
}
if (pRCfg->keep1 != pCfg->keep1) {
configChanged = true;
}
if (pRCfg->keep2 != pCfg->keep2) {
configChanged = true;
}
if (pRCfg->cacheLastRow != pCfg->cacheLastRow) {
configChanged = true;
}
if (pRCfg->totalBlocks != pCfg->totalBlocks) {
configChanged = true;
}
if (!configChanged) {
tsdbError("vgId:%d no config changed", REPO_ID(repo));
}
int code = pthread_mutex_lock(&repo->save_mutex);
if (code != 0) {
tsdbError("vgId:%d failed to lock tsdb save config mutex since %s", REPO_ID(repo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
STsdbCfg *pSaveCfg = &repo->save_config;
*pSaveCfg = repo->config;
pSaveCfg->compression = pCfg->compression;
pSaveCfg->keep = pCfg->keep;
pSaveCfg->keep1 = pCfg->keep1;
pSaveCfg->keep2 = pCfg->keep2;
pSaveCfg->cacheLastRow = pCfg->cacheLastRow;
pSaveCfg->totalBlocks = pCfg->totalBlocks;
tsdbInfo("vgId:%d old config: compression(%d), keep(%d,%d,%d), cacheLastRow(%d),totalBlocks(%d)", REPO_ID(repo),
pRCfg->compression, pRCfg->keep, pRCfg->keep1, pRCfg->keep2, pRCfg->cacheLastRow, pRCfg->totalBlocks);
tsdbInfo("vgId:%d new config: compression(%d), keep(%d,%d,%d), cacheLastRow(%d),totalBlocks(%d)", REPO_ID(repo),
pSaveCfg->compression, pSaveCfg->keep, pSaveCfg->keep1, pSaveCfg->keep2, pSaveCfg->cacheLastRow,
pSaveCfg->totalBlocks);
repo->config_changed = true;
pthread_mutex_unlock(&repo->save_mutex);
// schedule a commit msg and wait for the new config applied
tsdbSyncCommitConfig(repo);
return 0;
#if 0
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbCfg config = pRepo->config;
STsdbCfg * pRCfg = &pRepo->config;
if (tsdbCheckAndSetDefaultCfg(pCfg) < 0) return -1;
ASSERT(pRCfg->tsdbId == pCfg->tsdbId);
ASSERT(pRCfg->cacheBlockSize == pCfg->cacheBlockSize);
ASSERT(pRCfg->daysPerFile == pCfg->daysPerFile);
ASSERT(pRCfg->minRowsPerFileBlock == pCfg->minRowsPerFileBlock);
ASSERT(pRCfg->maxRowsPerFileBlock == pCfg->maxRowsPerFileBlock);
ASSERT(pRCfg->precision == pCfg->precision);
bool configChanged = false;
if (pRCfg->compression != pCfg->compression) {
tsdbAlterCompression(pRepo, pCfg->compression);
config.compression = pCfg->compression;
configChanged = true;
}
if (pRCfg->keep != pCfg->keep) {
if (tsdbAlterKeep(pRepo, pCfg->keep) < 0) {
tsdbError("vgId:%d failed to configure repo when alter keep since %s", REPO_ID(pRepo), tstrerror(terrno));
config.keep = pCfg->keep;
return -1;
}
configChanged = true;
}
if (pRCfg->totalBlocks != pCfg->totalBlocks) {
tsdbAlterCacheTotalBlocks(pRepo, pCfg->totalBlocks);
config.totalBlocks = pCfg->totalBlocks;
configChanged = true;
}
if (pRCfg->cacheLastRow != pCfg->cacheLastRow) {
config.cacheLastRow = pCfg->cacheLastRow;
configChanged = true;
}
if (configChanged) {
if (tsdbSaveConfig(pRepo->rootDir, &config) < 0) {
tsdbError("vgId:%d failed to configure repository while save config since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
}
return 0;
#endif
}
void tsdbGetRootDir(int repoid, char dirName[]) { snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb", repoid); }
void tsdbGetDataDir(int repoid, char dirName[]) {
snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data", repoid);
}
static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg) {
// Check tsdbId
if (pCfg->tsdbId < 0) {
tsdbError("vgId:%d invalid vgroup ID", pCfg->tsdbId);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
// Check precision
if (pCfg->precision == -1) {
pCfg->precision = TSDB_DEFAULT_PRECISION;
} else {
if (!IS_VALID_PRECISION(pCfg->precision)) {
tsdbError("vgId:%d invalid precision configuration %d", pCfg->tsdbId, pCfg->precision);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check compression
if (pCfg->compression == -1) {
pCfg->compression = TSDB_DEFAULT_COMPRESSION;
} else {
if (!IS_VALID_COMPRESSION(pCfg->compression)) {
tsdbError("vgId:%d invalid compression configuration %d", pCfg->tsdbId, pCfg->precision);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check daysPerFile
if (pCfg->daysPerFile == -1) {
pCfg->daysPerFile = TSDB_DEFAULT_DAYS_PER_FILE;
} else {
if (pCfg->daysPerFile < TSDB_MIN_DAYS_PER_FILE || pCfg->daysPerFile > TSDB_MAX_DAYS_PER_FILE) {
tsdbError(
"vgId:%d invalid daysPerFile configuration! daysPerFile %d TSDB_MIN_DAYS_PER_FILE %d TSDB_MAX_DAYS_PER_FILE "
"%d",
pCfg->tsdbId, pCfg->daysPerFile, TSDB_MIN_DAYS_PER_FILE, TSDB_MAX_DAYS_PER_FILE);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check minRowsPerFileBlock and maxRowsPerFileBlock
if (pCfg->minRowsPerFileBlock == -1) {
pCfg->minRowsPerFileBlock = TSDB_DEFAULT_MIN_ROW_FBLOCK;
} else {
if (pCfg->minRowsPerFileBlock < TSDB_MIN_MIN_ROW_FBLOCK || pCfg->minRowsPerFileBlock > TSDB_MAX_MIN_ROW_FBLOCK) {
tsdbError(
"vgId:%d invalid minRowsPerFileBlock configuration! minRowsPerFileBlock %d TSDB_MIN_MIN_ROW_FBLOCK %d "
"TSDB_MAX_MIN_ROW_FBLOCK %d",
pCfg->tsdbId, pCfg->minRowsPerFileBlock, TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->maxRowsPerFileBlock == -1) {
pCfg->maxRowsPerFileBlock = TSDB_DEFAULT_MAX_ROW_FBLOCK;
} else {
if (pCfg->maxRowsPerFileBlock < TSDB_MIN_MAX_ROW_FBLOCK || pCfg->maxRowsPerFileBlock > TSDB_MAX_MAX_ROW_FBLOCK) {
tsdbError(
"vgId:%d invalid maxRowsPerFileBlock configuration! maxRowsPerFileBlock %d TSDB_MIN_MAX_ROW_FBLOCK %d "
"TSDB_MAX_MAX_ROW_FBLOCK %d",
pCfg->tsdbId, pCfg->maxRowsPerFileBlock, TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->minRowsPerFileBlock > pCfg->maxRowsPerFileBlock) {
tsdbError("vgId:%d invalid configuration! minRowsPerFileBlock %d maxRowsPerFileBlock %d", pCfg->tsdbId,
pCfg->minRowsPerFileBlock, pCfg->maxRowsPerFileBlock);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
// Check keep
#if 0 // already checked and set in mnodeSetDefaultDbCfg
if (pCfg->keep == -1) {
pCfg->keep = TSDB_DEFAULT_KEEP;
} else {
if (pCfg->keep < TSDB_MIN_KEEP || pCfg->keep > TSDB_MAX_KEEP) {
tsdbError(
"vgId:%d invalid keep configuration! keep %d TSDB_MIN_KEEP %d "
"TSDB_MAX_KEEP %d",
pCfg->tsdbId, pCfg->keep, TSDB_MIN_KEEP, TSDB_MAX_KEEP);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->keep1 == 0) {
pCfg->keep1 = pCfg->keep;
}
if (pCfg->keep2 == 0) {
pCfg->keep2 = pCfg->keep;
}
#endif
int32_t keepMin = pCfg->keep1;
int32_t keepMid = pCfg->keep2;
int32_t keepMax = pCfg->keep;
if (keepMin > keepMid) {
SWAP(keepMin, keepMid, int32_t);
}
if (keepMin > keepMax) {
SWAP(keepMin, keepMax, int32_t);
}
if (keepMid > keepMax) {
SWAP(keepMid, keepMax, int32_t);
}
pCfg->keep = keepMax;
pCfg->keep1 = keepMin;
pCfg->keep2 = keepMid;
// update check
if (pCfg->update < TD_ROW_DISCARD_UPDATE || pCfg->update > TD_ROW_PARTIAL_UPDATE)
pCfg->update = TD_ROW_DISCARD_UPDATE;
// update cacheLastRow
if (pCfg->cacheLastRow != 0) {
if (pCfg->cacheLastRow > 3) pCfg->cacheLastRow = 1;
}
return 0;
}
static STsdb *tsdbNewRepo(STsdbCfg *pCfg, STsdbAppH *pAppH) {
STsdb *pRepo = (STsdb *)calloc(1, sizeof(*pRepo));
if (pRepo == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
pRepo->state = TSDB_STATE_OK;
pRepo->code = TSDB_CODE_SUCCESS;
pRepo->compactState = 0;
pRepo->config = *pCfg;
if (pAppH) {
pRepo->appH = *pAppH;
}
pRepo->repoLocked = false;
pRepo->pthread = NULL;
int code = pthread_mutex_init(&(pRepo->mutex), NULL);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
code = pthread_mutex_init(&(pRepo->save_mutex), NULL);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->config_changed = false;
pRepo->cacheLastConfigVersion = 0;
code = tsem_init(&(pRepo->readyToCommit), 0, 1);
if (code != 0) {
code = errno;
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->tsdbMeta = tsdbNewMeta(pCfg);
if (pRepo->tsdbMeta == NULL) {
tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->fs = tsdbNewFS(pCfg);
if (pRepo->fs == NULL) {
tsdbError("vgId:%d failed to TSDB file system since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbFreeRepo(pRepo);
return NULL;
}
return pRepo;
}
static void tsdbFreeRepo(STsdb *pRepo) {
if (pRepo) {
tsdbFreeFS(pRepo->fs);
tsdbFreeMeta(pRepo->tsdbMeta);
tsdbFreeMergeBuf(pRepo->mergeBuf);
// tsdbFreeMemTable(pRepo->mem);
// tsdbFreeMemTable(pRepo->imem);
tsem_destroy(&(pRepo->readyToCommit));
pthread_mutex_destroy(&pRepo->mutex);
free(pRepo);
}
}
static void tsdbStartStream(STsdb *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
for (int i = 0; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable && pTable->type == TSDB_STREAM_TABLE) {
pTable->cqhandle =
(*pRepo->appH.cqCreateFunc)(pRepo->appH.cqH, TABLE_UID(pTable), TABLE_TID(pTable), TABLE_NAME(pTable)->data,
pTable->sql, tsdbGetTableSchemaImpl(pTable, false, false, -1, -1), 0);
}
}
}
static void tsdbStopStream(STsdb *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
for (int i = 0; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable && pTable->type == TSDB_STREAM_TABLE) {
(*pRepo->appH.cqDropFunc)(pTable->cqhandle);
}
}
}
static int tsdbRestoreLastColumns(STsdb *pRepo, STable *pTable, SReadH *pReadh) {
// tsdbInfo("tsdbRestoreLastColumns of table %s", pTable->name->data);
STSchema *pSchema = tsdbGetTableLatestSchema(pTable);
if (pSchema == NULL) {
tsdbError("tsdbGetTableLatestSchema of table %s fail", pTable->name->data);
return 0;
}
SBlock * pBlock;
int numColumns;
int32_t blockIdx;
SDataStatis *pBlockStatis = NULL;
// SMemRow row = NULL;
// restore last column data with last schema
int err = 0;
numColumns = schemaNCols(pSchema);
if (numColumns <= pTable->restoreColumnNum) {
pTable->hasRestoreLastColumn = true;
return 0;
}
if (pTable->lastColSVersion != schemaVersion(pSchema)) {
if (tsdbInitColIdCacheWithSchema(pTable, pSchema) < 0) {
return -1;
}
}
// row = taosTMalloc(memRowMaxBytesFromSchema(pSchema));
// if (row == NULL) {
// terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
// err = -1;
// goto out;
// }
// memRowSetType(row, SMEM_ROW_DATA);
// tdInitDataRow(memRowDataBody(row), pSchema);
// first load block index info
if (tsdbLoadBlockInfo(pReadh, NULL, NULL) < 0) {
err = -1;
goto out;
}
pBlockStatis = calloc(numColumns, sizeof(SDataStatis));
if (pBlockStatis == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
err = -1;
goto out;
}
memset(pBlockStatis, 0, numColumns * sizeof(SDataStatis));
for (int32_t i = 0; i < numColumns; ++i) {
STColumn *pCol = schemaColAt(pSchema, i);
pBlockStatis[i].colId = pCol->colId;
}
// load block from backward
SBlockIdx *pIdx = pReadh->pBlkIdx;
blockIdx = (int32_t)(pIdx->numOfBlocks - 1);
while (numColumns > pTable->restoreColumnNum && blockIdx >= 0) {
bool loadStatisData = false;
pBlock = pReadh->pBlkInfo->blocks + blockIdx;
blockIdx -= 1;
// load block data
if (tsdbLoadBlockData(pReadh, pBlock, NULL) < 0) {
err = -1;
goto out;
}
// file block with sub-blocks has no statistics data
if (pBlock->numOfSubBlocks <= 1) {
if (tsdbLoadBlockStatis(pReadh, pBlock) == 0) {
tsdbGetBlockStatis(pReadh, pBlockStatis, (int)numColumns, pBlock);
loadStatisData = true;
}
}
TSDB_WLOCK_TABLE(pTable); // lock when update pTable->lastCols[]
for (int16_t i = 0; i < numColumns && numColumns > pTable->restoreColumnNum; ++i) {
STColumn *pCol = schemaColAt(pSchema, i);
// ignore loaded columns
if (pTable->lastCols[i].bytes != 0) {
continue;
}
// ignore block which has no not-null colId column
if (loadStatisData && pBlockStatis[i].numOfNull == pBlock->numOfRows) {
continue;
}
// OK,let's load row from backward to get not-null column
for (int32_t rowId = pBlock->numOfRows - 1; rowId >= 0; rowId--) {
SDataCol * pDataCol = pReadh->pDCols[0]->cols + i;
const void *pColData = tdGetColDataOfRow(pDataCol, rowId);
// tdAppendColVal(memRowDataBody(row), pColData, pCol->type, pCol->offset);
// SDataCol *pDataCol = readh.pDCols[0]->cols + j;
// void *value = tdGetRowDataOfCol(memRowDataBody(row), (int8_t)pCol->type, TD_DATA_ROW_HEAD_SIZE +
//
// pCol->offset);
if (isNull(pColData, pCol->type)) {
continue;
}
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pCol->colId);
if (idx == -1) {
tsdbError("tsdbRestoreLastColumns restore vgId:%d,table:%s cache column %d fail", REPO_ID(pRepo),
pTable->name->data, pCol->colId);
continue;
}
// save not-null column
uint16_t bytes = IS_VAR_DATA_TYPE(pCol->type) ? varDataTLen(pColData) : pCol->bytes;
SDataCol *pLastCol = &(pTable->lastCols[idx]);
pLastCol->pData = malloc(bytes);
pLastCol->bytes = bytes;
pLastCol->colId = pCol->colId;
memcpy(pLastCol->pData, pColData, bytes);
// save row ts(in column 0)
pDataCol = pReadh->pDCols[0]->cols + 0;
// pCol = schemaColAt(pSchema, 0);
// tdAppendColVal(memRowDataBody(row), tdGetColDataOfRow(pDataCol, rowId), pCol->type, pCol->offset);
// pLastCol->ts = memRowKey(row);
pLastCol->ts = tdGetKey(*(TKEY *)(tdGetColDataOfRow(pDataCol, rowId)));
pTable->restoreColumnNum += 1;
tsdbDebug("tsdbRestoreLastColumns restore vgId:%d,table:%s cache column %d, %" PRId64, REPO_ID(pRepo),
pTable->name->data, pLastCol->colId, pLastCol->ts);
break;
}
}
TSDB_WUNLOCK_TABLE(pTable);
}
out:
// taosTZfree(row);
tfree(pBlockStatis);
if (err == 0 && numColumns <= pTable->restoreColumnNum) {
pTable->hasRestoreLastColumn = true;
}
return err;
}
static int tsdbRestoreLastRow(STsdb *pRepo, STable *pTable, SReadH *pReadh, SBlockIdx *pIdx) {
ASSERT(pTable->lastRow == NULL);
if (tsdbLoadBlockInfo(pReadh, NULL, NULL) < 0) {
return -1;
}
SBlock *pBlock = pReadh->pBlkInfo->blocks + pIdx->numOfBlocks - 1;
if (tsdbLoadBlockData(pReadh, pBlock, NULL) < 0) {
return -1;
}
// Get the data in row
STSchema *pSchema = tsdbGetTableSchema(pTable);
SMemRow lastRow = taosTMalloc(memRowMaxBytesFromSchema(pSchema));
if (lastRow == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
memRowSetType(lastRow, SMEM_ROW_DATA);
tdInitDataRow(memRowDataBody(lastRow), pSchema);
for (int icol = 0; icol < schemaNCols(pSchema); icol++) {
STColumn *pCol = schemaColAt(pSchema, icol);
SDataCol *pDataCol = pReadh->pDCols[0]->cols + icol;
tdAppendColVal(memRowDataBody(lastRow), tdGetColDataOfRow(pDataCol, pBlock->numOfRows - 1), pCol->type,
pCol->offset);
}
TSKEY lastKey = memRowKey(lastRow);
// during the load data in file, new data would be inserted and last row has been updated
TSDB_WLOCK_TABLE(pTable);
if (pTable->lastRow == NULL) {
pTable->lastKey = lastKey;
pTable->lastRow = lastRow;
TSDB_WUNLOCK_TABLE(pTable);
} else {
TSDB_WUNLOCK_TABLE(pTable);
taosTZfree(lastRow);
}
return 0;
}
int tsdbRestoreInfo(STsdb *pRepo) {
SFSIter fsiter;
SReadH readh;
SDFileSet *pSet;
STsdbMeta *pMeta = pRepo->tsdbMeta;
STsdbCfg * pCfg = REPO_CFG(pRepo);
if (tsdbInitReadH(&readh, pRepo) < 0) {
return -1;
}
tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD);
if (CACHE_LAST_NULL_COLUMN(pCfg)) {
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
}
while ((pSet = tsdbFSIterNext(&fsiter)) != NULL) {
if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (tsdbLoadBlockIdx(&readh) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
// tsdbInfo("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data);
if (tsdbSetReadTable(&readh, pTable) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
TSKEY lastKey = tsdbGetTableLastKeyImpl(pTable);
SBlockIdx *pIdx = readh.pBlkIdx;
if (pIdx && lastKey < pIdx->maxKey) {
pTable->lastKey = pIdx->maxKey;
if (CACHE_LAST_ROW(pCfg) && tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
}
// restore NULL columns
if (pIdx && CACHE_LAST_NULL_COLUMN(pCfg) && !pTable->hasRestoreLastColumn) {
if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
}
}
}
tsdbDestroyReadH(&readh);
// if (CACHE_LAST_NULL_COLUMN(pCfg)) {
// atomic_store_8(&pRepo->hasCachedLastColumn, 1);
// }
return 0;
}
int32_t tsdbLoadLastCache(STsdb *pRepo, STable *pTable) {
SFSIter fsiter;
SReadH readh;
SDFileSet *pSet;
int cacheLastRowTableNum = 0;
int cacheLastColTableNum = 0;
bool cacheLastRow = CACHE_LAST_ROW(&(pRepo->config));
bool cacheLastCol = CACHE_LAST_NULL_COLUMN(&(pRepo->config));
tsdbDebug("tsdbLoadLastCache for %s, cacheLastRow:%d, cacheLastCol:%d", pTable->name->data, cacheLastRow,
cacheLastCol);
pTable->cacheLastConfigVersion = pRepo->cacheLastConfigVersion;
if (!cacheLastRow && pTable->lastRow != NULL) {
taosTZfree(pTable->lastRow);
pTable->lastRow = NULL;
}
if (!cacheLastCol && pTable->lastCols != NULL) {
tsdbFreeLastColumns(pTable);
}
if (!cacheLastRow && !cacheLastCol) {
return 0;
}
cacheLastRowTableNum = (cacheLastRow && pTable->lastRow == NULL) ? 1 : 0;
cacheLastColTableNum = (cacheLastCol && pTable->lastCols == NULL) ? 1 : 0;
if (cacheLastRowTableNum == 0 && cacheLastColTableNum == 0) {
return 0;
}
if (tsdbInitReadH(&readh, pRepo) < 0) {
return -1;
}
tsdbRLockFS(REPO_FS(pRepo));
tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD);
while ((cacheLastRowTableNum > 0 || cacheLastColTableNum > 0) && (pSet = tsdbFSIterNext(&fsiter)) != NULL) {
if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) {
tsdbUnLockFS(REPO_FS(pRepo));
tsdbDestroyReadH(&readh);
return -1;
}
if (tsdbLoadBlockIdx(&readh) < 0) {
tsdbUnLockFS(REPO_FS(pRepo));
tsdbDestroyReadH(&readh);
return -1;
}
// tsdbDebug("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data);
if (tsdbSetReadTable(&readh, pTable) < 0) {
tsdbUnLockFS(REPO_FS(pRepo));
tsdbDestroyReadH(&readh);
return -1;
}
SBlockIdx *pIdx = readh.pBlkIdx;
if (pIdx && (cacheLastRowTableNum > 0) && (pTable->lastRow == NULL)) {
if (tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) {
tsdbUnLockFS(REPO_FS(pRepo));
tsdbDestroyReadH(&readh);
return -1;
}
cacheLastRowTableNum -= 1;
}
// restore NULL columns
if (pIdx && (cacheLastColTableNum > 0) && !pTable->hasRestoreLastColumn) {
if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) {
tsdbUnLockFS(REPO_FS(pRepo));
tsdbDestroyReadH(&readh);
return -1;
}
if (pTable->hasRestoreLastColumn) {
cacheLastColTableNum -= 1;
}
}
}
tsdbUnLockFS(REPO_FS(pRepo));
tsdbDestroyReadH(&readh);
return 0;
}
UNUSED_FUNC int tsdbCacheLastData(STsdb *pRepo, STsdbCfg *oldCfg) {
bool cacheLastRow = false, cacheLastCol = false;
SFSIter fsiter;
SReadH readh;
SDFileSet *pSet;
STsdbMeta *pMeta = pRepo->tsdbMeta;
int tableNum = 0;
int maxTableIdx = 0;
int cacheLastRowTableNum = 0;
int cacheLastColTableNum = 0;
bool need_free_last_row = CACHE_LAST_ROW(oldCfg) && !CACHE_LAST_ROW(&(pRepo->config));
bool need_free_last_col = CACHE_LAST_NULL_COLUMN(oldCfg) && !CACHE_LAST_NULL_COLUMN(&(pRepo->config));
if (CACHE_LAST_ROW(&(pRepo->config)) || CACHE_LAST_NULL_COLUMN(&(pRepo->config))) {
tsdbInfo("tsdbCacheLastData cache last data since cacheLast option changed");
cacheLastRow = !CACHE_LAST_ROW(oldCfg) && CACHE_LAST_ROW(&(pRepo->config));
cacheLastCol = !CACHE_LAST_NULL_COLUMN(oldCfg) && CACHE_LAST_NULL_COLUMN(&(pRepo->config));
}
// calc max table idx and table num
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
tableNum += 1;
maxTableIdx = i;
if (cacheLastCol) {
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
}
// if close last option,need to free data
if (need_free_last_row || need_free_last_col) {
// if (need_free_last_col) {
// atomic_store_8(&pRepo->hasCachedLastColumn, 0);
// }
tsdbInfo("free cache last data since cacheLast option changed");
for (int i = 1; i <= maxTableIdx; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
if (need_free_last_row) {
taosTZfree(pTable->lastRow);
pTable->lastRow = NULL;
}
if (need_free_last_col) {
tsdbFreeLastColumns(pTable);
pTable->hasRestoreLastColumn = false;
}
}
}
if (!cacheLastRow && !cacheLastCol) {
return 0;
}
cacheLastRowTableNum = cacheLastRow ? tableNum : 0;
cacheLastColTableNum = cacheLastCol ? tableNum : 0;
if (tsdbInitReadH(&readh, pRepo) < 0) {
return -1;
}
tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD);
while ((pSet = tsdbFSIterNext(&fsiter)) != NULL && (cacheLastRowTableNum > 0 || cacheLastColTableNum > 0)) {
if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (tsdbLoadBlockIdx(&readh) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
for (int i = 1; i <= maxTableIdx; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
// tsdbInfo("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data);
if (tsdbSetReadTable(&readh, pTable) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
SBlockIdx *pIdx = readh.pBlkIdx;
if (pIdx && cacheLastRowTableNum > 0 && pTable->lastRow == NULL) {
pTable->lastKey = pIdx->maxKey;
if (tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
cacheLastRowTableNum -= 1;
}
// restore NULL columns
if (pIdx && cacheLastColTableNum > 0 && !pTable->hasRestoreLastColumn) {
if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (pTable->hasRestoreLastColumn) {
cacheLastColTableNum -= 1;
}
}
}
}
tsdbDestroyReadH(&readh);
// if (cacheLastCol) {
// atomic_store_8(&pRepo->hasCachedLastColumn, 1);
// }
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#include "tdataformat.h"
#include "tfunctional.h"
#include "tsdbRowMergeBuf.h"
#include "tsdbint.h"
#include "tskiplist.h"
#include "ttime.h"
#define TSDB_DATA_SKIPLIST_LEVEL 5
#define TSDB_MAX_INSERT_BATCH 512
static SMemTable * tsdbNewMemTable(STsdb *pRepo);
static void tsdbFreeMemTable(SMemTable *pMemTable);
static STableData *tsdbNewTableData(STsdbCfg *pCfg, STable *pTable);
static void tsdbFreeTableData(STableData *pTableData);
static char * tsdbGetTsTupleKey(const void *data);
static int tsdbAdjustMemMaxTables(SMemTable *pMemTable, int maxTables);
static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row);
static int tsdbScanAndConvertSubmitMsg(STsdb *pRepo, SSubmitMsg *pMsg);
static int tsdbInsertDataToTable(STsdb *pRepo, SSubmitBlk *pBlock, int32_t *affectedrows);
static int tsdbCheckTableSchema(STsdb *pRepo, SSubmitBlk *pBlock, STable *pTable);
static int tsdbUpdateTableLatestInfo(STsdb *pRepo, STable *pTable, SMemRow row);
static FORCE_INLINE int tsdbCheckRowRange(STsdb *pRepo, STable *pTable, SMemRow row, TSKEY minKey, TSKEY maxKey,
TSKEY now);
int32_t tsdbInsertData(STsdb *repo, SSubmitMsg *pMsg, SShellSubmitRspMsg *pRsp) {
STsdb * pRepo = repo;
SSubmitMsgIter msgIter = {0};
SSubmitBlk * pBlock = NULL;
int32_t affectedrows = 0, numOfRows = 0;
if (tsdbScanAndConvertSubmitMsg(pRepo, pMsg) < 0) {
if (terrno != TSDB_CODE_TDB_TABLE_RECONFIGURE) {
tsdbError("vgId:%d failed to insert data since %s", REPO_ID(pRepo), tstrerror(terrno));
}
return -1;
}
tInitSubmitMsgIter(pMsg, &msgIter);
while (true) {
tGetSubmitMsgNext(&msgIter, &pBlock);
if (pBlock == NULL) break;
if (tsdbInsertDataToTable(pRepo, pBlock, &affectedrows) < 0) {
return -1;
}
numOfRows += pBlock->numOfRows;
}
if (pRsp != NULL) {
pRsp->affectedRows = htonl(affectedrows);
pRsp->numOfRows = htonl(numOfRows);
}
if (tsdbCheckCommit(pRepo) < 0) return -1;
return 0;
}
// ---------------- INTERNAL FUNCTIONS ----------------
int tsdbRefMemTable(STsdb *pRepo, SMemTable *pMemTable) {
if (pMemTable == NULL) return 0;
int ref = T_REF_INC(pMemTable);
tsdbDebug("vgId:%d ref memtable %p ref %d", REPO_ID(pRepo), pMemTable, ref);
return 0;
}
// Need to lock the repository
int tsdbUnRefMemTable(STsdb *pRepo, SMemTable *pMemTable) {
if (pMemTable == NULL) return 0;
int ref = T_REF_DEC(pMemTable);
tsdbDebug("vgId:%d unref memtable %p ref %d", REPO_ID(pRepo), pMemTable, ref);
if (ref == 0) {
STsdbBufPool *pBufPool = pRepo->pPool;
SListNode *pNode = NULL;
bool addNew = false;
if (tsdbLockRepo(pRepo) < 0) return -1;
while ((pNode = tdListPopHead(pMemTable->bufBlockList)) != NULL) {
if (pBufPool->nRecycleBlocks > 0) {
tsdbRecycleBufferBlock(pBufPool, pNode, false);
pBufPool->nRecycleBlocks -= 1;
} else {
if (pBufPool->nElasticBlocks > 0 && listNEles(pBufPool->bufBlockList) > 2) {
tsdbRecycleBufferBlock(pBufPool, pNode, true);
} else {
tdListAppendNode(pBufPool->bufBlockList, pNode);
addNew = true;
}
}
}
if (addNew) {
int code = pthread_cond_signal(&pBufPool->poolNotEmpty);
if (code != 0) {
if (tsdbUnlockRepo(pRepo) < 0) return -1;
tsdbError("vgId:%d failed to signal pool not empty since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
}
if (tsdbUnlockRepo(pRepo) < 0) return -1;
for (int i = 0; i < pMemTable->maxTables; i++) {
if (pMemTable->tData[i] != NULL) {
tsdbFreeTableData(pMemTable->tData[i]);
}
}
tdListDiscard(pMemTable->actList);
tdListDiscard(pMemTable->bufBlockList);
tsdbFreeMemTable(pMemTable);
}
return 0;
}
int tsdbTakeMemSnapshot(STsdb *pRepo, SMemSnapshot *pSnapshot, SArray *pATable) {
memset(pSnapshot, 0, sizeof(*pSnapshot));
if (tsdbLockRepo(pRepo) < 0) return -1;
pSnapshot->omem = pRepo->mem;
pSnapshot->imem = pRepo->imem;
tsdbRefMemTable(pRepo, pRepo->mem);
tsdbRefMemTable(pRepo, pRepo->imem);
if (tsdbUnlockRepo(pRepo) < 0) return -1;
if (pSnapshot->omem) {
taosRLockLatch(&(pSnapshot->omem->latch));
pSnapshot->mem = &(pSnapshot->mtable);
pSnapshot->mem->tData = (STableData **)calloc(pSnapshot->omem->maxTables, sizeof(STableData *));
if (pSnapshot->mem->tData == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
taosRUnLockLatch(&(pSnapshot->omem->latch));
tsdbUnRefMemTable(pRepo, pSnapshot->omem);
tsdbUnRefMemTable(pRepo, pSnapshot->imem);
pSnapshot->mem = NULL;
pSnapshot->imem = NULL;
pSnapshot->omem = NULL;
return -1;
}
pSnapshot->mem->keyFirst = pSnapshot->omem->keyFirst;
pSnapshot->mem->keyLast = pSnapshot->omem->keyLast;
pSnapshot->mem->numOfRows = pSnapshot->omem->numOfRows;
pSnapshot->mem->maxTables = pSnapshot->omem->maxTables;
for (size_t i = 0; i < taosArrayGetSize(pATable); i++) {
STable * pTable = *(STable **)taosArrayGet(pATable, i);
int32_t tid = TABLE_TID(pTable);
STableData *pTableData = (tid < pSnapshot->omem->maxTables) ? pSnapshot->omem->tData[tid] : NULL;
if ((pTableData == NULL) || (TABLE_UID(pTable) != pTableData->uid)) continue;
pSnapshot->mem->tData[tid] = pTableData;
T_REF_INC(pTableData);
}
taosRUnLockLatch(&(pSnapshot->omem->latch));
}
tsdbDebug("vgId:%d take memory snapshot, pMem %p pIMem %p", REPO_ID(pRepo), pSnapshot->omem, pSnapshot->imem);
return 0;
}
void tsdbUnTakeMemSnapShot(STsdb *pRepo, SMemSnapshot *pSnapshot) {
tsdbDebug("vgId:%d untake memory snapshot, pMem %p pIMem %p", REPO_ID(pRepo), pSnapshot->omem, pSnapshot->imem);
if (pSnapshot->mem) {
ASSERT(pSnapshot->omem != NULL);
for (size_t i = 0; i < pSnapshot->mem->maxTables; i++) {
STableData *pTableData = pSnapshot->mem->tData[i];
if (pTableData) {
tsdbFreeTableData(pTableData);
}
}
tfree(pSnapshot->mem->tData);
tsdbUnRefMemTable(pRepo, pSnapshot->omem);
}
tsdbUnRefMemTable(pRepo, pSnapshot->imem);
pSnapshot->mem = NULL;
pSnapshot->imem = NULL;
pSnapshot->omem = NULL;
}
void *tsdbAllocBytes(STsdb *pRepo, int bytes) {
STsdbCfg * pCfg = &pRepo->config;
STsdbBufBlock *pBufBlock = NULL;
void * ptr = NULL;
// Either allocate from buffer blocks or from SYSTEM memory pool
if (pRepo->mem == NULL) {
SMemTable *pMemTable = tsdbNewMemTable(pRepo);
if (pMemTable == NULL) return NULL;
pRepo->mem = pMemTable;
}
ASSERT(pRepo->mem != NULL);
pBufBlock = tsdbGetCurrBufBlock(pRepo);
if ((pRepo->mem->extraBuffList != NULL) ||
((listNEles(pRepo->mem->bufBlockList) >= pCfg->totalBlocks / 3) && (pBufBlock->remain < bytes))) {
// allocate from SYSTEM buffer pool
if (pRepo->mem->extraBuffList == NULL) {
pRepo->mem->extraBuffList = tdListNew(0);
if (pRepo->mem->extraBuffList == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
}
ASSERT(pRepo->mem->extraBuffList != NULL);
SListNode *pNode = (SListNode *)malloc(sizeof(SListNode) + bytes);
if (pNode == NULL) {
if (listNEles(pRepo->mem->extraBuffList) == 0) {
tdListFree(pRepo->mem->extraBuffList);
pRepo->mem->extraBuffList = NULL;
}
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
TD_DLIST_NODE_NEXT(pNode) = TD_DLIST_NODE_PREV(pNode) = NULL;
tdListAppendNode(pRepo->mem->extraBuffList, pNode);
ptr = (void *)(pNode->data);
tsdbTrace("vgId:%d allocate %d bytes from SYSTEM buffer block", REPO_ID(pRepo), bytes);
} else { // allocate from TSDB buffer pool
if (pBufBlock == NULL || pBufBlock->remain < bytes) {
ASSERT(listNEles(pRepo->mem->bufBlockList) < pCfg->totalBlocks / 3);
if (tsdbLockRepo(pRepo) < 0) return NULL;
SListNode *pNode = tsdbAllocBufBlockFromPool(pRepo);
tdListAppendNode(pRepo->mem->bufBlockList, pNode);
if (tsdbUnlockRepo(pRepo) < 0) return NULL;
pBufBlock = tsdbGetCurrBufBlock(pRepo);
}
ASSERT(pBufBlock->remain >= bytes);
ptr = POINTER_SHIFT(pBufBlock->data, pBufBlock->offset);
pBufBlock->offset += bytes;
pBufBlock->remain -= bytes;
tsdbTrace("vgId:%d allocate %d bytes from TSDB buffer block, nBlocks %d offset %d remain %d", REPO_ID(pRepo), bytes,
listNEles(pRepo->mem->bufBlockList), pBufBlock->offset, pBufBlock->remain);
}
return ptr;
}
int tsdbSyncCommitConfig(STsdb *pRepo) {
ASSERT(pRepo->config_changed == true);
tsem_wait(&(pRepo->readyToCommit));
if (pRepo->code != TSDB_CODE_SUCCESS) {
tsdbWarn("vgId:%d try to commit config when TSDB not in good state: %s", REPO_ID(pRepo), tstrerror(terrno));
}
if (tsdbLockRepo(pRepo) < 0) return -1;
tsdbScheduleCommit(pRepo, COMMIT_CONFIG_REQ);
if (tsdbUnlockRepo(pRepo) < 0) return -1;
tsem_wait(&(pRepo->readyToCommit));
tsem_post(&(pRepo->readyToCommit));
if (pRepo->code != TSDB_CODE_SUCCESS) {
terrno = pRepo->code;
return -1;
}
terrno = TSDB_CODE_SUCCESS;
return 0;
}
int tsdbAsyncCommit(STsdb *pRepo) {
tsem_wait(&(pRepo->readyToCommit));
ASSERT(pRepo->imem == NULL);
if (pRepo->mem == NULL) {
tsem_post(&(pRepo->readyToCommit));
return 0;
}
if (pRepo->code != TSDB_CODE_SUCCESS) {
tsdbWarn("vgId:%d try to commit when TSDB not in good state: %s", REPO_ID(pRepo), tstrerror(terrno));
}
if (pRepo->appH.notifyStatus) pRepo->appH.notifyStatus(pRepo->appH.appH, TSDB_STATUS_COMMIT_START, TSDB_CODE_SUCCESS);
if (tsdbLockRepo(pRepo) < 0) return -1;
pRepo->imem = pRepo->mem;
pRepo->mem = NULL;
tsdbScheduleCommit(pRepo, COMMIT_REQ);
if (tsdbUnlockRepo(pRepo) < 0) return -1;
return 0;
}
int tsdbSyncCommit(STsdb *repo) {
STsdb *pRepo = repo;
tsdbAsyncCommit(pRepo);
tsem_wait(&(pRepo->readyToCommit));
tsem_post(&(pRepo->readyToCommit));
if (pRepo->code != TSDB_CODE_SUCCESS) {
terrno = pRepo->code;
return -1;
} else {
terrno = TSDB_CODE_SUCCESS;
return 0;
}
}
/**
* This is an important function to load data or try to load data from memory skiplist iterator.
*
* This function load memory data until:
* 1. iterator ends
* 2. data key exceeds maxKey
* 3. rowsIncreased = rowsInserted - rowsDeleteSucceed >= maxRowsToRead
* 4. operations in pCols not exceeds its max capacity if pCols is given
*
* The function tries to procceed AS MUCH AS POSSIBLE.
*/
int tsdbLoadDataFromCache(STable *pTable, SSkipListIterator *pIter, TSKEY maxKey, int maxRowsToRead, SDataCols *pCols,
TKEY *filterKeys, int nFilterKeys, bool keepDup, SMergeInfo *pMergeInfo) {
ASSERT(maxRowsToRead > 0 && nFilterKeys >= 0);
if (pIter == NULL) return 0;
STSchema * pSchema = NULL;
TSKEY rowKey = 0;
TSKEY fKey = 0;
bool isRowDel = false;
int filterIter = 0;
SMemRow row = NULL;
SMergeInfo mInfo;
if (pMergeInfo == NULL) pMergeInfo = &mInfo;
memset(pMergeInfo, 0, sizeof(*pMergeInfo));
pMergeInfo->keyFirst = INT64_MAX;
pMergeInfo->keyLast = INT64_MIN;
if (pCols) tdResetDataCols(pCols);
row = tsdbNextIterRow(pIter);
if (row == NULL || memRowKey(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = memRowKey(row);
isRowDel = memRowDeleted(row);
}
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
while (true) {
if (fKey == INT64_MAX && rowKey == INT64_MAX) break;
if (fKey < rowKey) {
pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, fKey);
pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, fKey);
filterIter++;
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
} else if (fKey > rowKey) {
if (isRowDel) {
pMergeInfo->rowsDeleteFailed++;
} else {
if (pMergeInfo->rowsInserted - pMergeInfo->rowsDeleteSucceed >= maxRowsToRead) break;
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsInserted++;
pMergeInfo->nOperations++;
pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, rowKey);
pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, rowKey);
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
}
tSkipListIterNext(pIter);
row = tsdbNextIterRow(pIter);
if (row == NULL || memRowKey(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = memRowKey(row);
isRowDel = memRowDeleted(row);
}
} else {
if (isRowDel) {
ASSERT(!keepDup);
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsDeleteSucceed++;
pMergeInfo->nOperations++;
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
} else {
if (keepDup) {
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsUpdated++;
pMergeInfo->nOperations++;
pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, rowKey);
pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, rowKey);
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
} else {
pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, fKey);
pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, fKey);
}
}
tSkipListIterNext(pIter);
row = tsdbNextIterRow(pIter);
if (row == NULL || memRowKey(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = memRowKey(row);
isRowDel = memRowDeleted(row);
}
filterIter++;
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
}
}
return 0;
}
// ---------------- LOCAL FUNCTIONS ----------------
static SMemTable *tsdbNewMemTable(STsdb *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
SMemTable *pMemTable = (SMemTable *)calloc(1, sizeof(*pMemTable));
if (pMemTable == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pMemTable->keyFirst = INT64_MAX;
pMemTable->keyLast = 0;
pMemTable->numOfRows = 0;
pMemTable->maxTables = pMeta->maxTables;
pMemTable->tData = (STableData **)calloc(pMemTable->maxTables, sizeof(STableData *));
if (pMemTable->tData == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pMemTable->actList = tdListNew(0);
if (pMemTable->actList == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pMemTable->bufBlockList = tdListNew(sizeof(STsdbBufBlock *));
if (pMemTable->bufBlockList == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
T_REF_INC(pMemTable);
return pMemTable;
_err:
tsdbFreeMemTable(pMemTable);
return NULL;
}
static void tsdbFreeMemTable(SMemTable *pMemTable) {
if (pMemTable) {
ASSERT((pMemTable->bufBlockList == NULL) ? true : (listNEles(pMemTable->bufBlockList) == 0));
ASSERT((pMemTable->actList == NULL) ? true : (listNEles(pMemTable->actList) == 0));
tdListFree(pMemTable->extraBuffList);
tdListFree(pMemTable->bufBlockList);
tdListFree(pMemTable->actList);
tfree(pMemTable->tData);
free(pMemTable);
}
}
static STableData *tsdbNewTableData(STsdbCfg *pCfg, STable *pTable) {
STableData *pTableData = (STableData *)calloc(1, sizeof(*pTableData));
if (pTableData == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
pTableData->uid = TABLE_UID(pTable);
pTableData->keyFirst = INT64_MAX;
pTableData->keyLast = 0;
pTableData->numOfRows = 0;
uint8_t skipListCreateFlags;
if (pCfg->update == TD_ROW_DISCARD_UPDATE)
skipListCreateFlags = SL_DISCARD_DUP_KEY;
else
skipListCreateFlags = SL_UPDATE_DUP_KEY;
pTableData->pData =
tSkipListCreate(TSDB_DATA_SKIPLIST_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, TYPE_BYTES[TSDB_DATA_TYPE_TIMESTAMP],
tkeyComparFn, skipListCreateFlags, tsdbGetTsTupleKey);
if (pTableData->pData == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
free(pTableData);
return NULL;
}
T_REF_INC(pTableData);
return pTableData;
}
static void tsdbFreeTableData(STableData *pTableData) {
if (pTableData) {
int32_t ref = T_REF_DEC(pTableData);
if (ref == 0) {
tSkipListDestroy(pTableData->pData);
free(pTableData);
}
}
}
static char *tsdbGetTsTupleKey(const void *data) { return memRowKeys((SMemRow)data); }
static int tsdbAdjustMemMaxTables(SMemTable *pMemTable, int maxTables) {
ASSERT(pMemTable->maxTables < maxTables);
STableData **pTableData = (STableData **)calloc(maxTables, sizeof(STableData *));
if (pTableData == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
memcpy((void *)pTableData, (void *)pMemTable->tData, sizeof(STableData *) * pMemTable->maxTables);
STableData **tData = pMemTable->tData;
taosWLockLatch(&(pMemTable->latch));
pMemTable->maxTables = maxTables;
pMemTable->tData = pTableData;
taosWUnLockLatch(&(pMemTable->latch));
tfree(tData);
return 0;
}
static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row) {
if (pCols) {
if (*ppSchema == NULL || schemaVersion(*ppSchema) != memRowVersion(row)) {
*ppSchema = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row), (int8_t)memRowType(row));
if (*ppSchema == NULL) {
ASSERT(false);
return -1;
}
}
tdAppendMemRowToDataCol(row, *ppSchema, pCols, true);
}
return 0;
}
static FORCE_INLINE int tsdbCheckRowRange(STsdb *pRepo, STable *pTable, SMemRow row, TSKEY minKey, TSKEY maxKey,
TSKEY now) {
TSKEY rowKey = memRowKey(row);
if (rowKey < minKey || rowKey > maxKey) {
tsdbError("vgId:%d table %s tid %d uid %" PRIu64 " timestamp is out of range! now %" PRId64 " minKey %" PRId64
" maxKey %" PRId64 " row key %" PRId64,
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), now, minKey, maxKey,
rowKey);
terrno = TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE;
return -1;
}
return 0;
}
static int tsdbScanAndConvertSubmitMsg(STsdb *pRepo, SSubmitMsg *pMsg) {
ASSERT(pMsg != NULL);
STsdbMeta * pMeta = pRepo->tsdbMeta;
SSubmitMsgIter msgIter = {0};
SSubmitBlk * pBlock = NULL;
SSubmitBlkIter blkIter = {0};
SMemRow row = NULL;
TSKEY now = taosGetTimestamp(pRepo->config.precision);
TSKEY minKey = now - tsTickPerDay[pRepo->config.precision] * pRepo->config.keep;
TSKEY maxKey = now + tsTickPerDay[pRepo->config.precision] * pRepo->config.daysPerFile;
terrno = TSDB_CODE_SUCCESS;
pMsg->length = htonl(pMsg->length);
pMsg->numOfBlocks = htonl(pMsg->numOfBlocks);
if (tInitSubmitMsgIter(pMsg, &msgIter) < 0) return -1;
while (true) {
if (tGetSubmitMsgNext(&msgIter, &pBlock) < 0) return -1;
if (pBlock == NULL) break;
pBlock->uid = htobe64(pBlock->uid);
pBlock->tid = htonl(pBlock->tid);
pBlock->sversion = htonl(pBlock->sversion);
pBlock->dataLen = htonl(pBlock->dataLen);
pBlock->schemaLen = htonl(pBlock->schemaLen);
pBlock->numOfRows = htons(pBlock->numOfRows);
if (pBlock->tid <= 0 || pBlock->tid >= pMeta->maxTables) {
tsdbError("vgId:%d failed to get table to insert data, uid %" PRIu64 " tid %d", REPO_ID(pRepo), pBlock->uid,
pBlock->tid);
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
return -1;
}
STable *pTable = pMeta->tables[pBlock->tid];
if (pTable == NULL || TABLE_UID(pTable) != pBlock->uid) {
tsdbError("vgId:%d failed to get table to insert data, uid %" PRIu64 " tid %d", REPO_ID(pRepo), pBlock->uid,
pBlock->tid);
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
return -1;
}
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tsdbError("vgId:%d invalid action trying to insert a super table %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable));
terrno = TSDB_CODE_TDB_INVALID_ACTION;
return -1;
}
// Check schema version and update schema if needed
if (tsdbCheckTableSchema(pRepo, pBlock, pTable) < 0) {
if (terrno == TSDB_CODE_TDB_TABLE_RECONFIGURE) {
continue;
} else {
return -1;
}
}
tInitSubmitBlkIter(pBlock, &blkIter);
while ((row = tGetSubmitBlkNext(&blkIter)) != NULL) {
if (tsdbCheckRowRange(pRepo, pTable, row, minKey, maxKey, now) < 0) {
return -1;
}
}
}
if (terrno != TSDB_CODE_SUCCESS) return -1;
return 0;
}
// row1 has higher priority
static SMemRow tsdbInsertDupKeyMerge(SMemRow row1, SMemRow row2, STsdb *pRepo, STSchema **ppSchema1,
STSchema **ppSchema2, STable *pTable, int32_t *pPoints, SMemRow *pLastRow) {
// for compatiblity, duplicate key inserted when update=0 should be also calculated as affected rows!
if (row1 == NULL && row2 == NULL && pRepo->config.update == TD_ROW_DISCARD_UPDATE) {
(*pPoints)++;
return NULL;
}
tsdbTrace("vgId:%d a row is %s table %s tid %d uid %" PRIu64 " key %" PRIu64, REPO_ID(pRepo), "updated in",
TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), memRowKey(row1));
if (row2 == NULL || pRepo->config.update != TD_ROW_PARTIAL_UPDATE) {
void *pMem = tsdbAllocBytes(pRepo, memRowTLen(row1));
if (pMem == NULL) return NULL;
memRowCpy(pMem, row1);
(*pPoints)++;
*pLastRow = pMem;
return pMem;
}
STSchema * pSchema1 = *ppSchema1;
STSchema * pSchema2 = *ppSchema2;
SMergeBuf *pBuf = &pRepo->mergeBuf;
int dv1 = memRowVersion(row1);
int dv2 = memRowVersion(row2);
if (pSchema1 == NULL || schemaVersion(pSchema1) != dv1) {
if (pSchema2 != NULL && schemaVersion(pSchema2) == dv1) {
*ppSchema1 = pSchema2;
} else {
*ppSchema1 = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row1), (int8_t)memRowType(row1));
}
pSchema1 = *ppSchema1;
}
if (pSchema2 == NULL || schemaVersion(pSchema2) != dv2) {
if (schemaVersion(pSchema1) == dv2) {
pSchema2 = pSchema1;
} else {
*ppSchema2 = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row2), (int8_t)memRowType(row2));
pSchema2 = *ppSchema2;
}
}
SMemRow tmp = tsdbMergeTwoRows(pBuf, row1, row2, pSchema1, pSchema2);
void *pMem = tsdbAllocBytes(pRepo, memRowTLen(tmp));
if (pMem == NULL) return NULL;
memRowCpy(pMem, tmp);
(*pPoints)++;
*pLastRow = pMem;
return pMem;
}
static void *tsdbInsertDupKeyMergePacked(void **args) {
return tsdbInsertDupKeyMerge(args[0], args[1], args[2], (STSchema **)&args[3], (STSchema **)&args[4], args[5],
args[6], args[7]);
}
static void tsdbSetupSkipListHookFns(SSkipList *pSkipList, STsdb *pRepo, STable *pTable, int32_t *pPoints,
SMemRow *pLastRow) {
if (pSkipList->insertHandleFn == NULL) {
tGenericSavedFunc *dupHandleSavedFunc = genericSavedFuncInit((GenericVaFunc)&tsdbInsertDupKeyMergePacked, 9);
dupHandleSavedFunc->args[2] = pRepo;
dupHandleSavedFunc->args[3] = NULL;
dupHandleSavedFunc->args[4] = NULL;
dupHandleSavedFunc->args[5] = pTable;
pSkipList->insertHandleFn = dupHandleSavedFunc;
}
pSkipList->insertHandleFn->args[6] = pPoints;
pSkipList->insertHandleFn->args[7] = pLastRow;
}
static int tsdbInsertDataToTable(STsdb *pRepo, SSubmitBlk *pBlock, int32_t *pAffectedRows) {
STsdbMeta * pMeta = pRepo->tsdbMeta;
int32_t points = 0;
STable * pTable = NULL;
SSubmitBlkIter blkIter = {0};
SMemTable * pMemTable = NULL;
STableData * pTableData = NULL;
STsdbCfg * pCfg = &(pRepo->config);
tInitSubmitBlkIter(pBlock, &blkIter);
if (blkIter.row == NULL) return 0;
TSKEY firstRowKey = memRowKey(blkIter.row);
tsdbAllocBytes(pRepo, 0);
pMemTable = pRepo->mem;
ASSERT(pMemTable != NULL);
ASSERT(pBlock->tid < pMeta->maxTables);
pTable = pMeta->tables[pBlock->tid];
ASSERT(pTable != NULL && TABLE_UID(pTable) == pBlock->uid);
if (TABLE_TID(pTable) >= pMemTable->maxTables) {
if (tsdbAdjustMemMaxTables(pMemTable, pMeta->maxTables) < 0) {
return -1;
}
}
pTableData = pMemTable->tData[TABLE_TID(pTable)];
if (pTableData == NULL || pTableData->uid != TABLE_UID(pTable)) {
if (pTableData != NULL) {
taosWLockLatch(&(pMemTable->latch));
pMemTable->tData[TABLE_TID(pTable)] = NULL;
tsdbFreeTableData(pTableData);
taosWUnLockLatch(&(pMemTable->latch));
}
pTableData = tsdbNewTableData(pCfg, pTable);
if (pTableData == NULL) {
tsdbError("vgId:%d failed to insert data to table %s uid %" PRId64 " tid %d since %s", REPO_ID(pRepo),
TABLE_CHAR_NAME(pTable), TABLE_UID(pTable), TABLE_TID(pTable), tstrerror(terrno));
return -1;
}
pRepo->mem->tData[TABLE_TID(pTable)] = pTableData;
}
ASSERT((pTableData != NULL) && pTableData->uid == TABLE_UID(pTable));
SMemRow lastRow = NULL;
int64_t osize = SL_SIZE(pTableData->pData);
tsdbSetupSkipListHookFns(pTableData->pData, pRepo, pTable, &points, &lastRow);
tSkipListPutBatchByIter(pTableData->pData, &blkIter, (iter_next_fn_t)tGetSubmitBlkNext);
int64_t dsize = SL_SIZE(pTableData->pData) - osize;
(*pAffectedRows) += points;
if (lastRow != NULL) {
TSKEY lastRowKey = memRowKey(lastRow);
if (pMemTable->keyFirst > firstRowKey) pMemTable->keyFirst = firstRowKey;
pMemTable->numOfRows += dsize;
if (pTableData->keyFirst > firstRowKey) pTableData->keyFirst = firstRowKey;
pTableData->numOfRows += dsize;
if (pMemTable->keyLast < lastRowKey) pMemTable->keyLast = lastRowKey;
if (pTableData->keyLast < lastRowKey) pTableData->keyLast = lastRowKey;
if (tsdbUpdateTableLatestInfo(pRepo, pTable, lastRow) < 0) {
return -1;
}
}
STSchema *pSchema = tsdbGetTableSchemaByVersion(pTable, pBlock->sversion, -1);
pRepo->stat.pointsWritten += points * schemaNCols(pSchema);
pRepo->stat.totalStorage += points * schemaVLen(pSchema);
return 0;
}
static int tsdbCheckTableSchema(STsdb *pRepo, SSubmitBlk *pBlock, STable *pTable) {
ASSERT(pTable != NULL);
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1);
int sversion = schemaVersion(pSchema);
if (pBlock->sversion == sversion) {
return 0;
} else {
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) { // stream table is not allowed to change schema
terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION;
return -1;
}
}
if (pBlock->sversion > sversion) { // may need to update table schema
if (pBlock->schemaLen > 0) {
tsdbDebug(
"vgId:%d table %s tid %d uid %" PRIu64 " schema version %d is out of data, client version %d, update...",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), sversion, pBlock->sversion);
ASSERT(pBlock->schemaLen % sizeof(STColumn) == 0);
int numOfCols = pBlock->schemaLen / sizeof(STColumn);
STColumn *pTCol = (STColumn *)pBlock->data;
STSchemaBuilder schemaBuilder = {0};
if (tdInitTSchemaBuilder(&schemaBuilder, pBlock->sversion) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to update schema of table %s since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
tstrerror(terrno));
return -1;
}
for (int i = 0; i < numOfCols; i++) {
if (tdAddColToSchema(&schemaBuilder, pTCol[i].type, htons(pTCol[i].colId), htons(pTCol[i].bytes)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to update schema of table %s since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
tstrerror(terrno));
tdDestroyTSchemaBuilder(&schemaBuilder);
return -1;
}
}
STSchema *pNSchema = tdGetSchemaFromBuilder(&schemaBuilder);
if (pNSchema == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tdDestroyTSchemaBuilder(&schemaBuilder);
return -1;
}
tdDestroyTSchemaBuilder(&schemaBuilder);
tsdbUpdateTableSchema(pRepo, pTable, pNSchema, true);
} else {
tsdbDebug(
"vgId:%d table %s tid %d uid %" PRIu64 " schema version %d is out of data, client version %d, reconfigure...",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), sversion, pBlock->sversion);
terrno = TSDB_CODE_TDB_TABLE_RECONFIGURE;
return -1;
}
} else {
ASSERT(pBlock->sversion >= 0);
if (tsdbGetTableSchemaImpl(pTable, false, false, pBlock->sversion, -1) == NULL) {
tsdbError("vgId:%d invalid submit schema version %d to table %s tid %d from client", REPO_ID(pRepo),
pBlock->sversion, TABLE_CHAR_NAME(pTable), TABLE_TID(pTable));
terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION;
return -1;
}
}
return 0;
}
static void updateTableLatestColumn(STsdb *pRepo, STable *pTable, SMemRow row) {
tsdbDebug("vgId:%d updateTableLatestColumn, %s row version:%d", REPO_ID(pRepo), pTable->name->data,
memRowVersion(row));
STSchema *pSchema = tsdbGetTableLatestSchema(pTable);
if (tsdbUpdateLastColSchema(pTable, pSchema) < 0) {
return;
}
pSchema = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row), (int8_t)memRowType(row));
if (pSchema == NULL) {
return;
}
SDataCol *pLatestCols = pTable->lastCols;
int32_t kvIdx = 0;
for (int16_t j = 0; j < schemaNCols(pSchema); j++) {
STColumn *pTCol = schemaColAt(pSchema, j);
// ignore not exist colId
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pTCol->colId);
if (idx == -1) {
continue;
}
void *value = NULL;
value = tdGetMemRowDataOfColEx(row, pTCol->colId, (int8_t)pTCol->type,
TD_DATA_ROW_HEAD_SIZE + pSchema->columns[j].offset, &kvIdx);
if ((value == NULL) || isNull(value, pTCol->type)) {
continue;
}
// lock
TSDB_WLOCK_TABLE(pTable);
SDataCol *pDataCol = &(pLatestCols[idx]);
if (pDataCol->pData == NULL) {
pDataCol->pData = malloc(pTCol->bytes);
pDataCol->bytes = pTCol->bytes;
} else if (pDataCol->bytes < pTCol->bytes) {
pDataCol->pData = realloc(pDataCol->pData, pTCol->bytes);
pDataCol->bytes = pTCol->bytes;
}
// the actual value size
uint16_t bytes = IS_VAR_DATA_TYPE(pTCol->type) ? varDataTLen(value) : pTCol->bytes;
// the actual data size CANNOT larger than column size
assert(pTCol->bytes >= bytes);
memcpy(pDataCol->pData, value, bytes);
// tsdbInfo("updateTableLatestColumn vgId:%d cache column %d for %d,%s", REPO_ID(pRepo), j, pDataCol->bytes,
// (char*)pDataCol->pData);
pDataCol->ts = memRowKey(row);
// unlock
TSDB_WUNLOCK_TABLE(pTable);
}
}
static int tsdbUpdateTableLatestInfo(STsdb *pRepo, STable *pTable, SMemRow row) {
STsdbCfg *pCfg = &pRepo->config;
// if cacheLastRow config has been reset, free the lastRow
if (!pCfg->cacheLastRow && pTable->lastRow != NULL) {
SMemRow cachedLastRow = pTable->lastRow;
TSDB_WLOCK_TABLE(pTable);
pTable->lastRow = NULL;
TSDB_WUNLOCK_TABLE(pTable);
taosTZfree(cachedLastRow);
}
if (tsdbGetTableLastKeyImpl(pTable) <= memRowKey(row)) {
if (CACHE_LAST_ROW(pCfg) || pTable->lastRow != NULL) {
SMemRow nrow = pTable->lastRow;
if (taosTSizeof(nrow) < memRowTLen(row)) {
SMemRow orow = nrow;
nrow = taosTMalloc(memRowTLen(row));
if (nrow == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
memRowCpy(nrow, row);
TSDB_WLOCK_TABLE(pTable);
pTable->lastKey = memRowKey(row);
pTable->lastRow = nrow;
TSDB_WUNLOCK_TABLE(pTable);
taosTZfree(orow);
} else {
TSDB_WLOCK_TABLE(pTable);
pTable->lastKey = memRowKey(row);
memRowCpy(nrow, row);
TSDB_WUNLOCK_TABLE(pTable);
}
} else {
pTable->lastKey = memRowKey(row);
}
if (CACHE_LAST_NULL_COLUMN(pCfg)) {
updateTableLatestColumn(pRepo, pTable, row);
}
}
pTable->cacheLastConfigVersion = pRepo->cacheLastConfigVersion;
return 0;
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#include "tcompare.h"
#include "tsdbint.h"
#include "tutil.h"
#define TSDB_SUPER_TABLE_SL_LEVEL 5
#define DEFAULT_TAG_INDEX_COLUMN 0
static char * getTagIndexKey(const void *pData);
static STable *tsdbNewTable();
static STable *tsdbCreateTableFromCfg(STableCfg *pCfg, bool isSuper, STable *pSTable);
static void tsdbFreeTable(STable *pTable);
static int tsdbAddTableToMeta(STsdbRepo *pRepo, STable *pTable, bool addIdx, bool lock);
static void tsdbRemoveTableFromMeta(STsdbRepo *pRepo, STable *pTable, bool rmFromIdx, bool lock);
static int tsdbAddTableIntoIndex(STsdbMeta *pMeta, STable *pTable, bool refSuper);
static int tsdbRemoveTableFromIndex(STsdbMeta *pMeta, STable *pTable);
static int tsdbInitTableCfg(STableCfg *config, ETableType type, uint64_t uid, int32_t tid);
static int tsdbTableSetSchema(STableCfg *config, STSchema *pSchema, bool dup);
static int tsdbTableSetName(STableCfg *config, char *name, bool dup);
static int tsdbTableSetTagSchema(STableCfg *config, STSchema *pSchema, bool dup);
static int tsdbTableSetSName(STableCfg *config, char *sname, bool dup);
static int tsdbTableSetSuperUid(STableCfg *config, uint64_t uid);
static int tsdbTableSetTagValue(STableCfg *config, SKVRow row, bool dup);
static int tsdbTableSetStreamSql(STableCfg *config, char *sql, bool dup);
static int tsdbEncodeTableName(void **buf, tstr *name);
static void * tsdbDecodeTableName(void *buf, tstr **name);
static int tsdbEncodeTable(void **buf, STable *pTable);
static void * tsdbDecodeTable(void *buf, STable **pRTable);
static int tsdbGetTableEncodeSize(int8_t act, STable *pTable);
static void * tsdbInsertTableAct(STsdbRepo *pRepo, int8_t act, void *buf, STable *pTable);
static int tsdbRemoveTableFromStore(STsdbRepo *pRepo, STable *pTable);
static int tsdbRmTableFromMeta(STsdbRepo *pRepo, STable *pTable);
static int tsdbAdjustMetaTables(STsdbRepo *pRepo, int tid);
static int tsdbCheckTableTagVal(SKVRow *pKVRow, STSchema *pSchema);
static int tsdbInsertNewTableAction(STsdbRepo *pRepo, STable *pTable);
static int tsdbAddSchema(STable *pTable, STSchema *pSchema);
static void tsdbFreeTableSchema(STable *pTable);
// ------------------ OUTER FUNCTIONS ------------------
int tsdbCreateTable(STsdbRepo *repo, STableCfg *pCfg) {
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbMeta *pMeta = pRepo->tsdbMeta;
STable * super = NULL;
STable * table = NULL;
bool newSuper = false;
bool superChanged = false;
int tid = pCfg->tableId.tid;
STable * pTable = NULL;
if (tid < 1 || tid > TSDB_MAX_TABLES) {
tsdbError("vgId:%d failed to create table since invalid tid %d", REPO_ID(pRepo), tid);
terrno = TSDB_CODE_TDB_IVD_CREATE_TABLE_INFO;
goto _err;
}
if (tid < pMeta->maxTables && pMeta->tables[tid] != NULL) {
if (TABLE_UID(pMeta->tables[tid]) == pCfg->tableId.uid) {
tsdbError("vgId:%d table %s already exists, tid %d uid %" PRId64, REPO_ID(pRepo),
TABLE_CHAR_NAME(pMeta->tables[tid]), TABLE_TID(pMeta->tables[tid]), TABLE_UID(pMeta->tables[tid]));
return 0;
} else {
tsdbInfo("vgId:%d table %s at tid %d uid %" PRIu64
" exists, replace it with new table, this can be not reasonable",
REPO_ID(pRepo), TABLE_CHAR_NAME(pMeta->tables[tid]), TABLE_TID(pMeta->tables[tid]),
TABLE_UID(pMeta->tables[tid]));
tsdbDropTable(pRepo, pMeta->tables[tid]->tableId);
}
}
pTable = tsdbGetTableByUid(pMeta, pCfg->tableId.uid);
if (pTable != NULL) {
tsdbError("vgId:%d table %s already exists, tid %d uid %" PRId64, REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
TABLE_TID(pTable), TABLE_UID(pTable));
terrno = TSDB_CODE_TDB_TABLE_ALREADY_EXIST;
goto _err;
}
if (pCfg->type == TSDB_CHILD_TABLE) {
super = tsdbGetTableByUid(pMeta, pCfg->superUid);
if (super == NULL) { // super table not exists, try to create it
newSuper = true;
super = tsdbCreateTableFromCfg(pCfg, true, NULL);
if (super == NULL) goto _err;
} else {
if (TABLE_TYPE(super) != TSDB_SUPER_TABLE || TABLE_UID(super) != pCfg->superUid) {
terrno = TSDB_CODE_TDB_IVD_CREATE_TABLE_INFO;
goto _err;
}
if (schemaVersion(pCfg->tagSchema) > schemaVersion(super->tagSchema)) {
// tag schema out of date, need to update super table tag version
STSchema *pOldSchema = super->tagSchema;
TSDB_WLOCK_TABLE(super);
super->tagSchema = tdDupSchema(pCfg->tagSchema);
TSDB_WUNLOCK_TABLE(super);
tdFreeSchema(pOldSchema);
superChanged = true;
}
}
}
table = tsdbCreateTableFromCfg(pCfg, false, super);
if (table == NULL) goto _err;
// Register to meta
tsdbWLockRepoMeta(pRepo);
if (newSuper) {
if (tsdbAddTableToMeta(pRepo, super, true, false) < 0) {
super = NULL;
tsdbUnlockRepoMeta(pRepo);
goto _err;
}
}
if (tsdbAddTableToMeta(pRepo, table, true, false) < 0) {
table = NULL;
tsdbUnlockRepoMeta(pRepo);
goto _err;
}
tsdbUnlockRepoMeta(pRepo);
// Write to memtable action
if (newSuper || superChanged) {
// add insert new super table action
if (tsdbInsertNewTableAction(pRepo, super) != 0) {
goto _err;
}
}
// add insert new table action
if (tsdbInsertNewTableAction(pRepo, table) != 0) {
goto _err;
}
if (tsdbCheckCommit(pRepo) < 0) return -1;
return 0;
_err:
if (newSuper) {
tsdbFreeTable(super);
}
tsdbFreeTable(table);
return -1;
}
int tsdbDropTable(STsdbRepo *repo, STableId tableId) {
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbMeta *pMeta = pRepo->tsdbMeta;
uint64_t uid = tableId.uid;
int tid = 0;
char * tbname = NULL;
STable *pTable = tsdbGetTableByUid(pMeta, uid);
if (pTable == NULL) {
tsdbError("vgId:%d failed to drop table since table not exists! tid:%d uid %" PRIu64, REPO_ID(pRepo), tableId.tid,
uid);
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
return -1;
}
tsdbDebug("vgId:%d try to drop table %s type %d", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TYPE(pTable));
tid = TABLE_TID(pTable);
tbname = strdup(TABLE_CHAR_NAME(pTable));
if (tbname == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
// Write to KV store first
if (tsdbRemoveTableFromStore(pRepo, pTable) < 0) {
tsdbError("vgId:%d failed to drop table %s since %s", REPO_ID(pRepo), tbname, tstrerror(terrno));
goto _err;
}
// Remove table from Meta
if (tsdbRmTableFromMeta(pRepo, pTable) < 0) {
tsdbError("vgId:%d failed to drop table %s since %s", REPO_ID(pRepo), tbname, tstrerror(terrno));
goto _err;
}
tsdbDebug("vgId:%d, table %s is dropped! tid:%d, uid:%" PRId64, pRepo->config.tsdbId, tbname, tid, uid);
free(tbname);
if (tsdbCheckCommit(pRepo) < 0) goto _err;
return 0;
_err:
tfree(tbname);
return -1;
}
void *tsdbGetTableTagVal(const void *pTable, int32_t colId, int16_t type) {
// TODO: this function should be changed also
STSchema *pSchema = tsdbGetTableTagSchema((STable *)pTable);
STColumn *pCol = tdGetColOfID(pSchema, colId);
if (pCol == NULL) {
return NULL; // No matched tag volumn
}
char *val = NULL;
if (pCol->type == TSDB_DATA_TYPE_JSON) {
val = ((STable *)pTable)->tagVal;
} else {
val = tdGetKVRowValOfCol(((STable *)pTable)->tagVal, colId);
assert(type == pCol->type);
}
return val;
}
char *tsdbGetTableName(void *pTable) {
// TODO: need to change as thread-safe
if (pTable == NULL) {
return NULL;
} else {
return (char *)(((STable *)pTable)->name);
}
}
STableCfg *tsdbCreateTableCfgFromMsg(SMDCreateTableMsg *pMsg) {
if (pMsg == NULL) return NULL;
SSchema *pSchema = (SSchema *)pMsg->data;
int16_t numOfCols = htons(pMsg->numOfColumns);
int16_t numOfTags = htons(pMsg->numOfTags);
STSchemaBuilder schemaBuilder = {0};
STableCfg *pCfg = (STableCfg *)calloc(1, sizeof(STableCfg));
if (pCfg == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
if (tsdbInitTableCfg(pCfg, pMsg->tableType, htobe64(pMsg->uid), htonl(pMsg->tid)) < 0) goto _err;
if (tdInitTSchemaBuilder(&schemaBuilder, htonl(pMsg->sversion)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
for (int i = 0; i < numOfCols; i++) {
if (tdAddColToSchema(&schemaBuilder, pSchema[i].type, htons(pSchema[i].colId), htons(pSchema[i].bytes)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
}
if (tsdbTableSetSchema(pCfg, tdGetSchemaFromBuilder(&schemaBuilder), false) < 0) goto _err;
if (tsdbTableSetName(pCfg, pMsg->tableFname, true) < 0) goto _err;
if (numOfTags > 0) {
// Decode tag schema
tdResetTSchemaBuilder(&schemaBuilder, htonl(pMsg->tversion));
for (int i = numOfCols; i < numOfCols + numOfTags; i++) {
if (tdAddColToSchema(&schemaBuilder, pSchema[i].type, htons(pSchema[i].colId), htons(pSchema[i].bytes)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
}
if (tsdbTableSetTagSchema(pCfg, tdGetSchemaFromBuilder(&schemaBuilder), false) < 0) goto _err;
if (tsdbTableSetSName(pCfg, pMsg->stableFname, true) < 0) goto _err;
if (tsdbTableSetSuperUid(pCfg, htobe64(pMsg->superTableUid)) < 0) goto _err;
int32_t tagDataLen = htonl(pMsg->tagDataLen);
if (tagDataLen) {
char *pTagData = pMsg->data + (numOfCols + numOfTags) * sizeof(SSchema);
tsdbTableSetTagValue(pCfg, pTagData, true);
}
}
if (pMsg->tableType == TSDB_STREAM_TABLE) {
char *sql = pMsg->data + (numOfCols + numOfTags) * sizeof(SSchema);
tsdbTableSetStreamSql(pCfg, sql, true);
}
tdDestroyTSchemaBuilder(&schemaBuilder);
return pCfg;
_err:
tdDestroyTSchemaBuilder(&schemaBuilder);
tsdbClearTableCfg(pCfg);
return NULL;
}
static UNUSED_FUNC int32_t colIdCompar(const void *left, const void *right) {
int16_t colId = *(int16_t *)left;
STColumn *p2 = (STColumn *)right;
if (colId == p2->colId) {
return 0;
}
return (colId < p2->colId) ? -1 : 1;
}
int tsdbUpdateTableTagValue(STsdbRepo *repo, SUpdateTableTagValMsg *pMsg) {
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbMeta *pMeta = pRepo->tsdbMeta;
STSchema * pNewSchema = NULL;
pMsg->uid = htobe64(pMsg->uid);
pMsg->tid = htonl(pMsg->tid);
pMsg->tversion = htons(pMsg->tversion);
pMsg->colId = htons(pMsg->colId);
pMsg->bytes = htons(pMsg->bytes);
pMsg->tagValLen = htonl(pMsg->tagValLen);
pMsg->numOfTags = htons(pMsg->numOfTags);
pMsg->schemaLen = htonl(pMsg->schemaLen);
for (int i = 0; i < pMsg->numOfTags; i++) {
STColumn *pTCol = (STColumn *)pMsg->data + i;
pTCol->bytes = htons(pTCol->bytes);
pTCol->colId = htons(pTCol->colId);
}
STable *pTable = tsdbGetTableByUid(pMeta, pMsg->uid);
if (pTable == NULL || TABLE_TID(pTable) != pMsg->tid) {
tsdbError("vgId:%d failed to update table tag value since invalid table id %d uid %" PRIu64, REPO_ID(pRepo),
pMsg->tid, pMsg->uid);
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
return -1;
}
if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) {
tsdbError("vgId:%d try to update tag value of a non-child table, invalid action", REPO_ID(pRepo));
terrno = TSDB_CODE_TDB_INVALID_ACTION;
return -1;
}
if (schemaVersion(pTable->pSuper->tagSchema) > pMsg->tversion) {
tsdbError(
"vgId:%d failed to update tag value of table %s since version out of date, client tag version %d server tag "
"version %d",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), pMsg->tversion, schemaVersion(pTable->pSuper->tagSchema));
terrno = TSDB_CODE_TDB_TAG_VER_OUT_OF_DATE;
return -1;
}
if (schemaVersion(pTable->pSuper->tagSchema) < pMsg->tversion) { // tag schema out of data,
tsdbDebug("vgId:%d need to update tag schema of table %s tid %d uid %" PRIu64
" since out of date, current version %d new version %d",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable),
schemaVersion(pTable->pSuper->tagSchema), pMsg->tversion);
STSchemaBuilder schemaBuilder = {0};
STColumn *pTCol = (STColumn *)pMsg->data;
ASSERT(pMsg->schemaLen % sizeof(STColumn) == 0 &&
pTCol[0].colId == colColId(schemaColAt(pTable->pSuper->tagSchema, 0)));
if (tdInitTSchemaBuilder(&schemaBuilder, pMsg->tversion) < 0) {
tsdbDebug("vgId:%d failed to update tag schema of table %s tid %d uid %" PRIu64 " since out of memory",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable));
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
for (int i = 0; i < (pMsg->schemaLen / sizeof(STColumn)); i++) {
if (tdAddColToSchema(&schemaBuilder, pTCol[i].type, pTCol[i].colId, pTCol[i].bytes) < 0) {
tdDestroyTSchemaBuilder(&schemaBuilder);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
}
pNewSchema = tdGetSchemaFromBuilder(&schemaBuilder);
if (pNewSchema == NULL) {
tdDestroyTSchemaBuilder(&schemaBuilder);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
tdDestroyTSchemaBuilder(&schemaBuilder);
}
// Change in memory
if (pNewSchema != NULL) { // change super table tag schema
TSDB_WLOCK_TABLE(pTable->pSuper);
STSchema *pOldSchema = pTable->pSuper->tagSchema;
pTable->pSuper->tagSchema = pNewSchema;
tdFreeSchema(pOldSchema);
TSDB_WUNLOCK_TABLE(pTable->pSuper);
}
bool isChangeIndexCol =
(pMsg->colId == colColId(schemaColAt(pTable->pSuper->tagSchema, 0))) || pMsg->type == TSDB_DATA_TYPE_JSON;
// STColumn *pCol = bsearch(&(pMsg->colId), pMsg->data, pMsg->numOfTags, sizeof(STColumn), colIdCompar);
// ASSERT(pCol != NULL);
if (isChangeIndexCol) {
tsdbWLockRepoMeta(pRepo);
tsdbRemoveTableFromIndex(pMeta, pTable);
}
TSDB_WLOCK_TABLE(pTable);
if (pMsg->type == TSDB_DATA_TYPE_JSON) {
kvRowFree(pTable->tagVal);
pTable->tagVal = tdKVRowDup(POINTER_SHIFT(pMsg->data, pMsg->schemaLen));
} else {
tdSetKVRowDataOfCol(&(pTable->tagVal), pMsg->colId, pMsg->type, POINTER_SHIFT(pMsg->data, pMsg->schemaLen));
}
TSDB_WUNLOCK_TABLE(pTable);
if (isChangeIndexCol) {
tsdbAddTableIntoIndex(pMeta, pTable, false);
tsdbUnlockRepoMeta(pRepo);
}
// Update on file
int tlen1 = (pNewSchema) ? tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable->pSuper) : 0;
int tlen2 = tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable);
void *buf = tsdbAllocBytes(pRepo, tlen1 + tlen2);
ASSERT(buf != NULL);
if (pNewSchema) {
void *pBuf = tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, buf, pTable->pSuper);
ASSERT(POINTER_DISTANCE(pBuf, buf) == tlen1);
buf = pBuf;
}
tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, buf, pTable);
if (tsdbCheckCommit(pRepo) < 0) return -1;
return 0;
}
// ------------------ INTERNAL FUNCTIONS ------------------
static int tsdbInsertNewTableAction(STsdbRepo *pRepo, STable *pTable) {
int tlen = 0;
void *pBuf = NULL;
tlen = tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable);
pBuf = tsdbAllocBytes(pRepo, tlen);
if (pBuf == NULL) {
return -1;
}
void *tBuf = tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, pBuf, pTable);
ASSERT(POINTER_DISTANCE(tBuf, pBuf) == tlen);
return 0;
}
STsdbMeta *tsdbNewMeta(STsdbCfg *pCfg) {
STsdbMeta *pMeta = (STsdbMeta *)calloc(1, sizeof(*pMeta));
if (pMeta == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
int code = pthread_rwlock_init(&pMeta->rwLock, NULL);
if (code != 0) {
tsdbError("vgId:%d failed to init TSDB meta r/w lock since %s", pCfg->tsdbId, strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
goto _err;
}
pMeta->maxTables = TSDB_INIT_NTABLES + 1;
pMeta->tables = (STable **)calloc(pMeta->maxTables, sizeof(STable *));
if (pMeta->tables == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pMeta->superList = tdListNew(sizeof(STable *));
if (pMeta->superList == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pMeta->uidMap =
taosHashInit((size_t)(TSDB_INIT_NTABLES * 1.1), taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, false);
if (pMeta->uidMap == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
return pMeta;
_err:
tsdbFreeMeta(pMeta);
return NULL;
}
void tsdbFreeMeta(STsdbMeta *pMeta) {
if (pMeta) {
taosHashCleanup(pMeta->uidMap);
tdListFree(pMeta->superList);
tfree(pMeta->tables);
pthread_rwlock_destroy(&pMeta->rwLock);
free(pMeta);
}
}
int tsdbOpenMeta(STsdbRepo *pRepo) {
return 0;
#if 0
char * fname = NULL;
STsdbMeta *pMeta = pRepo->tsdbMeta;
ASSERT(pMeta != NULL);
fname = tsdbGetMetaFileName(pRepo->rootDir);
if (fname == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
// pMeta->pStore = tdOpenKVStore(fname, tsdbRestoreTable, tsdbOrgMeta, (void *)pRepo);
// if (pMeta->pStore == NULL) {
// tsdbError("vgId:%d failed to open TSDB meta while open the kv store since %s", REPO_ID(pRepo), tstrerror(terrno));
// goto _err;
// }
tsdbDebug("vgId:%d open TSDB meta succeed", REPO_ID(pRepo));
tfree(fname);
return 0;
_err:
tfree(fname);
return -1;
#endif
}
int tsdbCloseMeta(STsdbRepo *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
SListNode *pNode = NULL;
STable * pTable = NULL;
if (pMeta == NULL) return 0;
// tdCloseKVStore(pMeta->pStore);
for (int i = 1; i < pMeta->maxTables; i++) {
tsdbFreeTable(pMeta->tables[i]);
}
while ((pNode = tdListPopHead(pMeta->superList)) != NULL) {
tdListNodeGetData(pMeta->superList, pNode, (void *)(&pTable));
tsdbFreeTable(pTable);
listNodeFree(pNode);
}
tsdbDebug("vgId:%d TSDB meta is closed", REPO_ID(pRepo));
return 0;
}
STable *tsdbGetTableByUid(STsdbMeta *pMeta, uint64_t uid) {
void *ptr = taosHashGet(pMeta->uidMap, (char *)(&uid), sizeof(uid));
if (ptr == NULL) return NULL;
return *(STable **)ptr;
}
STSchema *tsdbGetTableSchemaByVersion(STable *pTable, int16_t _version, int8_t rowType) {
return tsdbGetTableSchemaImpl(pTable, true, false, _version, rowType);
}
int tsdbWLockRepoMeta(STsdbRepo *pRepo) {
int code = pthread_rwlock_wrlock(&(pRepo->tsdbMeta->rwLock));
if (code != 0) {
tsdbError("vgId:%d failed to write lock TSDB meta since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
int tsdbRLockRepoMeta(STsdbRepo *pRepo) {
int code = pthread_rwlock_rdlock(&(pRepo->tsdbMeta->rwLock));
if (code != 0) {
tsdbError("vgId:%d failed to read lock TSDB meta since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
int tsdbUnlockRepoMeta(STsdbRepo *pRepo) {
int code = pthread_rwlock_unlock(&(pRepo->tsdbMeta->rwLock));
if (code != 0) {
tsdbError("vgId:%d failed to unlock TSDB meta since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
void tsdbRefTable(STable *pTable) {
int32_t ref = T_REF_INC(pTable);
UNUSED(ref);
tsdbDebug("ref table %s uid %" PRIu64 " tid:%d, refCount:%d", TABLE_CHAR_NAME(pTable), TABLE_UID(pTable),
TABLE_TID(pTable), ref);
}
void tsdbUnRefTable(STable *pTable) {
uint64_t uid = TABLE_UID(pTable);
int32_t tid = TABLE_TID(pTable);
int32_t ref = T_REF_DEC(pTable);
tsdbDebug("unref table, uid:%" PRIu64 " tid:%d, refCount:%d", uid, tid, ref);
if (ref == 0) {
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) {
tsdbUnRefTable(pTable->pSuper);
}
tsdbFreeTable(pTable);
}
}
void tsdbFreeLastColumns(STable *pTable) {
if (pTable->lastCols == NULL) {
return;
}
for (int i = 0; i < pTable->maxColNum; ++i) {
if (pTable->lastCols[i].bytes == 0) {
continue;
}
tfree(pTable->lastCols[i].pData);
pTable->lastCols[i].bytes = 0;
pTable->lastCols[i].pData = NULL;
}
tfree(pTable->lastCols);
pTable->lastCols = NULL;
pTable->maxColNum = 0;
pTable->lastColSVersion = -1;
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
int16_t tsdbGetLastColumnsIndexByColId(STable *pTable, int16_t colId) {
if (pTable->lastCols == NULL) {
return -1;
}
// TODO: use binary search instead
for (int16_t i = 0; i < pTable->maxColNum; ++i) {
if (pTable->lastCols[i].colId == colId) {
return i;
}
}
return -1;
}
int tsdbInitColIdCacheWithSchema(STable *pTable, STSchema *pSchema) {
TSDB_WLOCK_TABLE(pTable);
if (pTable->lastCols == NULL) {
int16_t numOfColumn = pSchema->numOfCols;
pTable->lastCols = (SDataCol *)malloc(numOfColumn * sizeof(SDataCol));
if (pTable->lastCols == NULL) {
TSDB_WUNLOCK_TABLE(pTable);
return -1;
}
for (int16_t i = 0; i < numOfColumn; ++i) {
STColumn *pCol = schemaColAt(pSchema, i);
SDataCol *pDataCol = &(pTable->lastCols[i]);
pDataCol->bytes = 0;
pDataCol->pData = NULL;
pDataCol->colId = pCol->colId;
}
pTable->lastColSVersion = schemaVersion(pSchema);
pTable->maxColNum = numOfColumn;
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
TSDB_WUNLOCK_TABLE(pTable);
return 0;
}
STSchema *tsdbGetTableLatestSchema(STable *pTable) { return tsdbGetTableSchemaByVersion(pTable, -1, -1); }
int tsdbUpdateLastColSchema(STable *pTable, STSchema *pNewSchema) {
if (pTable->lastColSVersion == schemaVersion(pNewSchema)) {
return 0;
}
tsdbDebug("tsdbUpdateLastColSchema:%s,%d->%d", pTable->name->data, pTable->lastColSVersion,
schemaVersion(pNewSchema));
int16_t numOfCols = pNewSchema->numOfCols;
SDataCol *lastCols = (SDataCol *)malloc(numOfCols * sizeof(SDataCol));
if (lastCols == NULL) {
return -1;
}
TSDB_WLOCK_TABLE(pTable);
for (int16_t i = 0; i < numOfCols; ++i) {
STColumn *pCol = schemaColAt(pNewSchema, i);
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pCol->colId);
SDataCol *pDataCol = &(lastCols[i]);
if (idx != -1) {
// move col data to new last column array
SDataCol *pOldDataCol = &(pTable->lastCols[idx]);
memcpy(pDataCol, pOldDataCol, sizeof(SDataCol));
} else {
// init new colid data
pDataCol->colId = pCol->colId;
pDataCol->bytes = 0;
pDataCol->pData = NULL;
}
}
SDataCol *oldLastCols = pTable->lastCols;
int16_t oldLastColNum = pTable->maxColNum;
pTable->lastColSVersion = schemaVersion(pNewSchema);
pTable->lastCols = lastCols;
pTable->maxColNum = numOfCols;
if (oldLastCols == NULL) {
TSDB_WUNLOCK_TABLE(pTable);
return 0;
}
// free old schema last column datas
for (int16_t i = 0; i < oldLastColNum; ++i) {
SDataCol *pDataCol = &(oldLastCols[i]);
if (pDataCol->bytes == 0) {
continue;
}
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pDataCol->colId);
if (idx != -1) {
continue;
}
// free not exist column data
tfree(pDataCol->pData);
}
TSDB_WUNLOCK_TABLE(pTable);
tfree(oldLastCols);
return 0;
}
void tsdbUpdateTableSchema(STsdbRepo *pRepo, STable *pTable, STSchema *pSchema, bool insertAct) {
ASSERT(TABLE_TYPE(pTable) != TSDB_STREAM_TABLE && TABLE_TYPE(pTable) != TSDB_SUPER_TABLE);
STsdbMeta *pMeta = pRepo->tsdbMeta;
STable *pCTable = (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) ? pTable->pSuper : pTable;
ASSERT(schemaVersion(pSchema) > schemaVersion(*(STSchema **)taosArrayGetLast(pCTable->schema)));
TSDB_WLOCK_TABLE(pCTable);
tsdbAddSchema(pCTable, pSchema);
if (schemaNCols(pSchema) > pMeta->maxCols) pMeta->maxCols = schemaNCols(pSchema);
if (schemaTLen(pSchema) > pMeta->maxRowBytes) pMeta->maxRowBytes = schemaTLen(pSchema);
TSDB_WUNLOCK_TABLE(pCTable);
if (insertAct) {
if (tsdbInsertNewTableAction(pRepo, pCTable) != 0) {
tsdbError("vgId:%d table %s tid %d uid %" PRIu64 " tsdbInsertNewTableAction fail", REPO_ID(pRepo),
TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable));
}
}
}
int tsdbRestoreTable(STsdbRepo *pRepo, void *cont, int contLen) {
STable *pTable = NULL;
if (!taosCheckChecksumWhole((uint8_t *)cont, contLen)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1;
}
tsdbDecodeTable(cont, &pTable);
if (tsdbAddTableToMeta(pRepo, pTable, false, false) < 0) {
tsdbFreeTable(pTable);
return -1;
}
tsdbTrace("vgId:%d table %s tid %d uid %" PRIu64 " is restored from file", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
TABLE_TID(pTable), TABLE_UID(pTable));
return 0;
}
void tsdbOrgMeta(STsdbRepo *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable != NULL && pTable->type == TSDB_CHILD_TABLE) {
tsdbAddTableIntoIndex(pMeta, pTable, true);
}
}
}
// ------------------ LOCAL FUNCTIONS ------------------
static char *getTagIndexKey(const void *pData) {
STable *pTable = (STable *)pData;
STSchema *pSchema = tsdbGetTableTagSchema(pTable);
STColumn *pCol = schemaColAt(pSchema, DEFAULT_TAG_INDEX_COLUMN);
void * res = tdGetKVRowValOfCol(pTable->tagVal, pCol->colId);
if (res == NULL) {
// treat the column as NULL if we cannot find it
res = (char *)getNullValue(pCol->type);
}
return res;
}
static STable *tsdbNewTable() {
STable *pTable = (STable *)calloc(1, sizeof(*pTable));
if (pTable == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
pTable->lastKey = TSKEY_INITIAL_VAL;
pTable->lastCols = NULL;
pTable->restoreColumnNum = 0;
pTable->cacheLastConfigVersion = 0;
pTable->maxColNum = 0;
pTable->hasRestoreLastColumn = false;
pTable->lastColSVersion = -1;
return pTable;
}
static STable *tsdbCreateTableFromCfg(STableCfg *pCfg, bool isSuper, STable *pSTable) {
STable *pTable = NULL;
size_t tsize = 0;
pTable = tsdbNewTable();
if (pTable == NULL) goto _err;
if (isSuper) {
pTable->type = TSDB_SUPER_TABLE;
tsize = strnlen(pCfg->sname, TSDB_TABLE_NAME_LEN - 1);
pTable->name = calloc(1, tsize + VARSTR_HEADER_SIZE + 1);
if (pTable->name == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
STR_WITH_SIZE_TO_VARSTR(pTable->name, pCfg->sname, (VarDataLenT)tsize);
TABLE_UID(pTable) = pCfg->superUid;
TABLE_TID(pTable) = -1;
TABLE_SUID(pTable) = -1;
pTable->pSuper = NULL;
if (tsdbAddSchema(pTable, tdDupSchema(pCfg->schema)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pTable->tagSchema = tdDupSchema(pCfg->tagSchema);
if (pTable->tagSchema == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pTable->tagVal = NULL;
STColumn *pCol = schemaColAt(pTable->tagSchema, DEFAULT_TAG_INDEX_COLUMN);
if (pCol->type == TSDB_DATA_TYPE_JSON) {
assert(pTable->tagSchema->numOfCols == 1);
pTable->jsonKeyMap = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
if (pTable->jsonKeyMap == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbFreeTable(pTable);
return NULL;
}
// taosHashSetFreeFp(pTable->jsonKeyMap, taosArrayDestroyForHash);
} else {
pTable->pIndex = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, colType(pCol), (uint8_t)(colBytes(pCol)), NULL,
SL_ALLOW_DUP_KEY, getTagIndexKey);
if (pTable->pIndex == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
}
} else {
pTable->type = pCfg->type;
tsize = strnlen(pCfg->name, TSDB_TABLE_NAME_LEN - 1);
pTable->name = calloc(1, tsize + VARSTR_HEADER_SIZE + 1);
if (pTable->name == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
STR_WITH_SIZE_TO_VARSTR(pTable->name, pCfg->name, (VarDataLenT)tsize);
TABLE_UID(pTable) = pCfg->tableId.uid;
TABLE_TID(pTable) = pCfg->tableId.tid;
if (pCfg->type == TSDB_CHILD_TABLE) {
TABLE_SUID(pTable) = pCfg->superUid;
if (tsdbCheckTableTagVal(pCfg->tagValues, pSTable->tagSchema) < 0) {
goto _err;
}
pTable->tagVal = tdKVRowDup(pCfg->tagValues);
if (pTable->tagVal == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
} else {
TABLE_SUID(pTable) = -1;
if (tsdbAddSchema(pTable, tdDupSchema(pCfg->schema)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) {
pTable->sql = strdup(pCfg->sql);
if (pTable->sql == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
}
}
}
T_REF_INC(pTable);
tsdbDebug("table %s tid %d uid %" PRIu64 " is created", TABLE_CHAR_NAME(pTable), TABLE_TID(pTable),
TABLE_UID(pTable));
return pTable;
_err:
tsdbFreeTable(pTable);
return NULL;
}
static void tsdbFreeTable(STable *pTable) {
if (pTable) {
if (pTable->name != NULL)
tsdbTrace("table %s tid %d uid %" PRIu64 " is freed", TABLE_CHAR_NAME(pTable), TABLE_TID(pTable),
TABLE_UID(pTable));
tfree(TABLE_NAME(pTable));
if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) {
tsdbFreeTableSchema(pTable);
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tdFreeSchema(pTable->tagSchema);
}
}
kvRowFree(pTable->tagVal);
tSkipListDestroy(pTable->pIndex);
taosHashCleanup(pTable->jsonKeyMap);
taosTZfree(pTable->lastRow);
tfree(pTable->sql);
tsdbFreeLastColumns(pTable);
free(pTable);
}
}
static int tsdbAddTableToMeta(STsdbRepo *pRepo, STable *pTable, bool addIdx, bool lock) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
if (lock && tsdbWLockRepoMeta(pRepo) < 0) {
tsdbError("vgId:%d failed to add table %s to meta since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
tstrerror(terrno));
return -1;
}
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
if (tdListAppend(pMeta->superList, (void *)(&pTable)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to add table %s to meta since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
tstrerror(terrno));
goto _err;
}
} else {
if (TABLE_TID(pTable) >= pMeta->maxTables) {
if (tsdbAdjustMetaTables(pRepo, TABLE_TID(pTable)) < 0) goto _err;
}
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE && addIdx) { // add STABLE to the index
if (tsdbAddTableIntoIndex(pMeta, pTable, true) < 0) {
tsdbDebug("vgId:%d failed to add table %s to meta while add table to index since %s", REPO_ID(pRepo),
TABLE_CHAR_NAME(pTable), tstrerror(terrno));
goto _err;
}
}
ASSERT(TABLE_TID(pTable) < pMeta->maxTables);
pMeta->tables[TABLE_TID(pTable)] = pTable;
pMeta->nTables++;
}
if (taosHashPut(pMeta->uidMap, (char *)(&pTable->tableId.uid), sizeof(pTable->tableId.uid), (void *)(&pTable),
sizeof(pTable)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to add table %s to meta while put into uid map since %s", REPO_ID(pRepo),
TABLE_CHAR_NAME(pTable), tstrerror(terrno));
goto _err;
}
if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) {
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1);
if (schemaNCols(pSchema) > pMeta->maxCols) pMeta->maxCols = schemaNCols(pSchema);
if (schemaTLen(pSchema) > pMeta->maxRowBytes) pMeta->maxRowBytes = schemaTLen(pSchema);
}
if (lock && tsdbUnlockRepoMeta(pRepo) < 0) return -1;
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE && addIdx) {
pTable->cqhandle =
(*pRepo->appH.cqCreateFunc)(pRepo->appH.cqH, TABLE_UID(pTable), TABLE_TID(pTable), TABLE_NAME(pTable)->data,
pTable->sql, tsdbGetTableSchemaImpl(pTable, false, false, -1, -1), 1);
}
tsdbDebug("vgId:%d table %s tid %d uid %" PRIu64 " is added to meta", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
TABLE_TID(pTable), TABLE_UID(pTable));
return 0;
_err:
tsdbRemoveTableFromMeta(pRepo, pTable, false, false);
if (lock) tsdbUnlockRepoMeta(pRepo);
return -1;
}
static void tsdbRemoveTableFromMeta(STsdbRepo *pRepo, STable *pTable, bool rmFromIdx, bool lock) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
SListIter lIter = {0};
SListNode *pNode = NULL;
STable * tTable = NULL;
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1);
int maxCols = schemaNCols(pSchema);
int maxRowBytes = schemaTLen(pSchema);
if (lock) tsdbWLockRepoMeta(pRepo);
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tdListInitIter(pMeta->superList, &lIter, TD_LIST_BACKWARD);
while ((pNode = tdListNext(&lIter)) != NULL) {
tdListNodeGetData(pMeta->superList, pNode, (void *)(&tTable));
if (pTable == tTable) {
tdListPopNode(pMeta->superList, pNode);
free(pNode);
break;
}
}
} else {
pMeta->tables[pTable->tableId.tid] = NULL;
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE && rmFromIdx) {
tsdbRemoveTableFromIndex(pMeta, pTable);
}
pMeta->nTables--;
}
taosHashRemove(pMeta->uidMap, (char *)(&(TABLE_UID(pTable))), sizeof(TABLE_UID(pTable)));
if (maxCols == pMeta->maxCols || maxRowBytes == pMeta->maxRowBytes) {
maxCols = 0;
maxRowBytes = 0;
for (int i = 0; i < pMeta->maxTables; i++) {
STable *_pTable = pMeta->tables[i];
if (_pTable != NULL) {
pSchema = tsdbGetTableSchemaImpl(_pTable, false, false, -1, -1);
maxCols = MAX(maxCols, schemaNCols(pSchema));
maxRowBytes = MAX(maxRowBytes, schemaTLen(pSchema));
}
}
}
pMeta->maxCols = maxCols;
pMeta->maxRowBytes = maxRowBytes;
if (lock) tsdbUnlockRepoMeta(pRepo);
tsdbDebug("vgId:%d table %s uid %" PRIu64 " is removed from meta", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
TABLE_UID(pTable));
tsdbUnRefTable(pTable);
}
void *tsdbGetJsonTagValue(STable *pTable, char *key, int32_t keyLen, int16_t *retColId) {
assert(TABLE_TYPE(pTable) == TSDB_CHILD_TABLE);
STable * superTable = pTable->pSuper;
SArray **data = (SArray **)taosHashGet(superTable->jsonKeyMap, key, keyLen);
if (data == NULL) return NULL;
JsonMapValue jmvalue = {pTable, 0};
JsonMapValue *p = taosArraySearch(*data, &jmvalue, tsdbCompareJsonMapValue, TD_EQ);
if (p == NULL) return NULL;
int16_t colId = p->colId + 1;
if (retColId) *retColId = p->colId;
return tdGetKVRowValOfCol(pTable->tagVal, colId);
}
int tsdbCompareJsonMapValue(const void *a, const void *b) {
const JsonMapValue *x = (const JsonMapValue *)a;
const JsonMapValue *y = (const JsonMapValue *)b;
if (x->table > y->table) return 1;
if (x->table < y->table) return -1;
return 0;
}
static int tsdbAddTableIntoIndex(STsdbMeta *pMeta, STable *pTable, bool refSuper) {
ASSERT(pTable->type == TSDB_CHILD_TABLE && pTable != NULL);
STable *pSTable = tsdbGetTableByUid(pMeta, TABLE_SUID(pTable));
ASSERT(pSTable != NULL);
pTable->pSuper = pSTable;
if (refSuper) T_REF_INC(pSTable);
if (pSTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) {
ASSERT(pSTable->tagSchema->numOfCols == 1);
int16_t nCols = kvRowNCols(pTable->tagVal);
ASSERT(nCols % 2 == 1);
// check first
for (int j = 0; j < nCols; ++j) {
if (j != 0 && j % 2 == 0) continue; // jump value
SColIdx *pColIdx = kvRowColIdxAt(pTable->tagVal, j);
void * val = (kvRowColVal(pTable->tagVal, pColIdx));
if (j == 0) { // json value is the first
int8_t jsonPlaceHolder = *(int8_t *)val;
ASSERT(jsonPlaceHolder == TSDB_DATA_JSON_PLACEHOLDER);
continue;
}
if (j == 1) {
uint32_t jsonNULL = *(uint32_t *)(varDataVal(val));
ASSERT(jsonNULL == TSDB_DATA_JSON_NULL);
}
// then insert
char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0};
jsonKeyMd5(varDataVal(val), varDataLen(val), keyMd5);
SArray * tablistNew = NULL;
SArray **tablist = (SArray **)taosHashGet(pSTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN);
if (tablist == NULL) {
tablistNew = taosArrayInit(8, sizeof(JsonMapValue));
if (tablistNew == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("out of memory when alloc json tag array");
return -1;
}
if (taosHashPut(pSTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN, &tablistNew, sizeof(void *)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("out of memory when put json tag array");
return -1;
}
} else {
tablistNew = *tablist;
}
JsonMapValue jmvalue = {pTable, pColIdx->colId};
void * p = taosArraySearch(tablistNew, &jmvalue, tsdbCompareJsonMapValue, TD_EQ);
if (p == NULL) {
p = taosArraySearch(tablistNew, &jmvalue, tsdbCompareJsonMapValue, TD_GE);
if (p == NULL) {
taosArrayPush(tablistNew, &jmvalue);
} else {
taosArrayInsert(tablistNew, TARRAY_ELEM_IDX(tablistNew, p), &jmvalue);
}
} else {
tsdbError("insert dumplicate");
}
}
} else {
tSkipListPut(pSTable->pIndex, (void *)pTable);
}
return 0;
}
static int tsdbRemoveTableFromIndex(STsdbMeta *pMeta, STable *pTable) {
ASSERT(pTable->type == TSDB_CHILD_TABLE && pTable != NULL);
STable *pSTable = pTable->pSuper;
ASSERT(pSTable != NULL);
if (pSTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) {
ASSERT(pSTable->tagSchema->numOfCols == 1);
int16_t nCols = kvRowNCols(pTable->tagVal);
ASSERT(nCols % 2 == 1);
for (int j = 0; j < nCols; ++j) {
if (j != 0 && j % 2 == 0) continue; // jump value
SColIdx *pColIdx = kvRowColIdxAt(pTable->tagVal, j);
void * val = (kvRowColVal(pTable->tagVal, pColIdx));
if (j == 0) { // json value is the first
int8_t jsonPlaceHolder = *(int8_t *)val;
ASSERT(jsonPlaceHolder == TSDB_DATA_JSON_PLACEHOLDER);
continue;
}
if (j == 1) {
uint32_t jsonNULL = *(uint32_t *)(varDataVal(val));
ASSERT(jsonNULL == TSDB_DATA_JSON_NULL);
}
char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0};
jsonKeyMd5(varDataVal(val), varDataLen(val), keyMd5);
SArray **tablist = (SArray **)taosHashGet(pSTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN);
if (tablist == NULL) {
tsdbError("json tag no key error,%d", j);
continue;
}
JsonMapValue jmvalue = {pTable, pColIdx->colId};
void * p = taosArraySearch(*tablist, &jmvalue, tsdbCompareJsonMapValue, TD_EQ);
if (p == NULL) {
tsdbError("json tag no tableid error,%d", j);
continue;
}
taosArrayRemove(*tablist, TARRAY_ELEM_IDX(*tablist, p));
}
} else {
char * key = getTagIndexKey(pTable);
SArray *res = tSkipListGet(pSTable->pIndex, key);
size_t size = taosArrayGetSize(res);
ASSERT(size > 0);
for (int32_t i = 0; i < size; ++i) {
SSkipListNode *pNode = taosArrayGetP(res, i);
// STableIndexElem* pElem = (STableIndexElem*) SL_GET_NODE_DATA(pNode);
if ((STable *)SL_GET_NODE_DATA(pNode) == pTable) { // this is the exact what we need
tSkipListRemoveNode(pSTable->pIndex, pNode);
}
}
taosArrayDestroy(res);
}
return 0;
}
static int tsdbInitTableCfg(STableCfg *config, ETableType type, uint64_t uid, int32_t tid) {
if (type != TSDB_CHILD_TABLE && type != TSDB_NORMAL_TABLE && type != TSDB_STREAM_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_TABLE_TYPE;
return -1;
}
memset((void *)config, 0, sizeof(*config));
config->type = type;
config->superUid = TSDB_INVALID_SUPER_TABLE_ID;
config->tableId.uid = uid;
config->tableId.tid = tid;
return 0;
}
static int tsdbTableSetSchema(STableCfg *config, STSchema *pSchema, bool dup) {
if (dup) {
config->schema = tdDupSchema(pSchema);
if (config->schema == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->schema = pSchema;
}
return 0;
}
static int tsdbTableSetName(STableCfg *config, char *name, bool dup) {
if (dup) {
config->name = strdup(name);
if (config->name == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->name = name;
}
return 0;
}
static int tsdbTableSetTagSchema(STableCfg *config, STSchema *pSchema, bool dup) {
if (config->type != TSDB_CHILD_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
if (dup) {
config->tagSchema = tdDupSchema(pSchema);
if (config->tagSchema == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->tagSchema = pSchema;
}
return 0;
}
static int tsdbTableSetSName(STableCfg *config, char *sname, bool dup) {
if (config->type != TSDB_CHILD_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
if (dup) {
config->sname = strdup(sname);
if (config->sname == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->sname = sname;
}
return 0;
}
static int tsdbTableSetSuperUid(STableCfg *config, uint64_t uid) {
if (config->type != TSDB_CHILD_TABLE || uid == TSDB_INVALID_SUPER_TABLE_ID) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
config->superUid = uid;
return 0;
}
static int tsdbTableSetTagValue(STableCfg *config, SKVRow row, bool dup) {
if (config->type != TSDB_CHILD_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
if (dup) {
config->tagValues = tdKVRowDup(row);
if (config->tagValues == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->tagValues = row;
}
return 0;
}
static int tsdbTableSetStreamSql(STableCfg *config, char *sql, bool dup) {
if (config->type != TSDB_STREAM_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
if (dup) {
config->sql = strdup(sql);
if (config->sql == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->sql = sql;
}
return 0;
}
void tsdbClearTableCfg(STableCfg *config) {
if (config) {
if (config->schema) tdFreeSchema(config->schema);
if (config->tagSchema) tdFreeSchema(config->tagSchema);
if (config->tagValues) kvRowFree(config->tagValues);
tfree(config->name);
tfree(config->sname);
tfree(config->sql);
free(config);
}
}
static int tsdbEncodeTableName(void **buf, tstr *name) {
int tlen = 0;
tlen += taosEncodeFixedI16(buf, name->len);
if (buf != NULL) {
memcpy(*buf, name->data, name->len);
*buf = POINTER_SHIFT(*buf, name->len);
}
tlen += name->len;
return tlen;
}
static void *tsdbDecodeTableName(void *buf, tstr **name) {
VarDataLenT len = 0;
buf = taosDecodeFixedI16(buf, &len);
*name = calloc(1, sizeof(tstr) + len + 1);
if (*name == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
(*name)->len = len;
memcpy((*name)->data, buf, len);
buf = POINTER_SHIFT(buf, len);
return buf;
}
static int tsdbEncodeTable(void **buf, STable *pTable) {
ASSERT(pTable != NULL);
int tlen = 0;
tlen += taosEncodeFixedU8(buf, pTable->type);
tlen += tsdbEncodeTableName(buf, pTable->name);
tlen += taosEncodeFixedU64(buf, TABLE_UID(pTable));
tlen += taosEncodeFixedI32(buf, TABLE_TID(pTable));
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) {
tlen += taosEncodeFixedU64(buf, TABLE_SUID(pTable));
tlen += tdEncodeKVRow(buf, pTable->tagVal);
} else {
uint32_t arraySize = (uint32_t)taosArrayGetSize(pTable->schema);
if (arraySize > UINT8_MAX) {
tlen += taosEncodeFixedU8(buf, 0);
tlen += taosEncodeFixedU32(buf, arraySize);
} else {
tlen += taosEncodeFixedU8(buf, (uint8_t)arraySize);
}
for (uint32_t i = 0; i < arraySize; i++) {
STSchema *pSchema = taosArrayGetP(pTable->schema, i);
tlen += tdEncodeSchema(buf, pSchema);
}
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tlen += tdEncodeSchema(buf, pTable->tagSchema);
}
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) {
tlen += taosEncodeString(buf, pTable->sql);
}
}
return tlen;
}
static void *tsdbDecodeTable(void *buf, STable **pRTable) {
STable *pTable = tsdbNewTable();
if (pTable == NULL) return NULL;
uint8_t type = 0;
buf = taosDecodeFixedU8(buf, &type);
pTable->type = type;
buf = tsdbDecodeTableName(buf, &(pTable->name));
buf = taosDecodeFixedU64(buf, &TABLE_UID(pTable));
buf = taosDecodeFixedI32(buf, &TABLE_TID(pTable));
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) {
buf = taosDecodeFixedU64(buf, &TABLE_SUID(pTable));
buf = tdDecodeKVRow(buf, &(pTable->tagVal));
} else {
uint32_t nSchemas = 0;
buf = taosDecodeFixedU8(buf, (uint8_t *)&nSchemas);
if (nSchemas == 0) {
buf = taosDecodeFixedU32(buf, &nSchemas);
}
for (int i = 0; i < nSchemas; i++) {
STSchema *pSchema;
buf = tdDecodeSchema(buf, &pSchema);
tsdbAddSchema(pTable, pSchema);
}
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
buf = tdDecodeSchema(buf, &(pTable->tagSchema));
STColumn *pCol = schemaColAt(pTable->tagSchema, DEFAULT_TAG_INDEX_COLUMN);
if (pCol->type == TSDB_DATA_TYPE_JSON) {
assert(pTable->tagSchema->numOfCols == 1);
pTable->jsonKeyMap = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
if (pTable->jsonKeyMap == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbFreeTable(pTable);
return NULL;
}
// taosHashSetFreeFp(pTable->jsonKeyMap, taosArrayDestroyForHash);
} else {
pTable->pIndex = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, colType(pCol), (uint8_t)(colBytes(pCol)), NULL,
SL_ALLOW_DUP_KEY, getTagIndexKey);
if (pTable->pIndex == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbFreeTable(pTable);
return NULL;
}
}
}
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) {
buf = taosDecodeString(buf, &(pTable->sql));
}
}
T_REF_INC(pTable);
*pRTable = pTable;
return buf;
}
static SArray *getJsonTagTableList(STable *pTable) {
uint32_t key = TSDB_DATA_JSON_NULL;
char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0};
jsonKeyMd5(&key, INT_BYTES, keyMd5);
SArray **tablist = (SArray **)taosHashGet(pTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN);
return *tablist;
}
static int tsdbGetTableEncodeSize(int8_t act, STable *pTable) {
int tlen = 0;
if (act == TSDB_UPDATE_META) {
tlen = sizeof(SListNode) + sizeof(SActObj) + sizeof(SActCont) + tsdbEncodeTable(NULL, pTable) + sizeof(TSCKSUM);
} else {
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
size_t tableSize = 0;
if (pTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) {
SArray *tablist = getJsonTagTableList(pTable);
tableSize = taosArrayGetSize(tablist);
} else {
tableSize = SL_SIZE(pTable->pIndex);
}
tlen = (int)((sizeof(SListNode) + sizeof(SActObj)) * (tableSize + 1));
} else {
tlen = sizeof(SListNode) + sizeof(SActObj);
}
}
return tlen;
}
static void *tsdbInsertTableAct(STsdbRepo *pRepo, int8_t act, void *buf, STable *pTable) {
SListNode *pNode = (SListNode *)buf;
SActObj * pAct = (SActObj *)(pNode->data);
SActCont * pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(*pAct));
void * pBuf = (void *)pCont;
TD_DLIST_NODE_PREV(pNode) = TD_DLIST_NODE_NEXT(pNode) = NULL;
pAct->act = act;
pAct->uid = TABLE_UID(pTable);
if (act == TSDB_UPDATE_META) {
pBuf = (void *)(pCont->cont);
pCont->len = tsdbEncodeTable(&pBuf, pTable) + sizeof(TSCKSUM);
taosCalcChecksumAppend(0, (uint8_t *)pCont->cont, pCont->len);
pBuf = POINTER_SHIFT(pBuf, sizeof(TSCKSUM));
}
tdListAppendNode(pRepo->mem->actList, pNode);
return pBuf;
}
static int tsdbRemoveTableFromStore(STsdbRepo *pRepo, STable *pTable) {
int tlen = tsdbGetTableEncodeSize(TSDB_DROP_META, pTable);
void *buf = tsdbAllocBytes(pRepo, tlen);
if (buf == NULL) {
return -1;
}
void *pBuf = buf;
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
if (pTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) {
SArray *tablist = getJsonTagTableList(pTable);
for (int i = 0; i < taosArrayGetSize(tablist); ++i) {
JsonMapValue *p = taosArrayGet(tablist, i);
ASSERT(TABLE_TYPE((STable *)(p->table)) == TSDB_CHILD_TABLE);
pBuf = tsdbInsertTableAct(pRepo, TSDB_DROP_META, pBuf, p->table);
}
} else {
SSkipListIterator *pIter = tSkipListCreateIter(pTable->pIndex);
if (pIter == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
while (tSkipListIterNext(pIter)) {
STable *tTable = (STable *)SL_GET_NODE_DATA(tSkipListIterGet(pIter));
ASSERT(TABLE_TYPE(tTable) == TSDB_CHILD_TABLE);
pBuf = tsdbInsertTableAct(pRepo, TSDB_DROP_META, pBuf, tTable);
}
tSkipListDestroyIter(pIter);
}
}
pBuf = tsdbInsertTableAct(pRepo, TSDB_DROP_META, pBuf, pTable);
ASSERT(POINTER_DISTANCE(pBuf, buf) == tlen);
return 0;
}
static int tsdbRmTableFromMeta(STsdbRepo *pRepo, STable *pTable) {
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tsdbWLockRepoMeta(pRepo);
if (pTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) {
SArray *tablist = getJsonTagTableList(pTable);
for (int i = 0; i < taosArrayGetSize(tablist); ++i) {
JsonMapValue *p = taosArrayGet(tablist, i);
tsdbRemoveTableFromMeta(pRepo, p->table, false, false);
}
} else {
SSkipListIterator *pIter = tSkipListCreateIter(pTable->pIndex);
if (pIter == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
while (tSkipListIterNext(pIter)) {
STable *tTable = (STable *)SL_GET_NODE_DATA(tSkipListIterGet(pIter));
tsdbRemoveTableFromMeta(pRepo, tTable, false, false);
}
tSkipListDestroyIter(pIter);
}
tsdbRemoveTableFromMeta(pRepo, pTable, false, false);
tsdbUnlockRepoMeta(pRepo);
} else {
if ((TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) && pTable->cqhandle) pRepo->appH.cqDropFunc(pTable->cqhandle);
tsdbRemoveTableFromMeta(pRepo, pTable, true, true);
}
return 0;
}
static int tsdbAdjustMetaTables(STsdbRepo *pRepo, int tid) {
#if 0
STsdbMeta *pMeta = pRepo->tsdbMeta;
ASSERT(tid >= pMeta->maxTables);
int maxTables = tsdbGetNextMaxTables(tid);
STable **tables = (STable **)calloc(maxTables, sizeof(STable *));
if (tables == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
memcpy((void *)tables, (void *)pMeta->tables, sizeof(STable *) * pMeta->maxTables);
pMeta->maxTables = maxTables;
STable **tTables = pMeta->tables;
pMeta->tables = tables;
tfree(tTables);
tsdbDebug("vgId:%d tsdb meta maxTables is adjusted as %d", REPO_ID(pRepo), maxTables);
#endif
return 0;
}
static int tsdbCheckTableTagVal(SKVRow *pKVRow, STSchema *pSchema) {
for (size_t i = 0; i < kvRowNCols(pKVRow); i++) {
SColIdx * pColIdx = kvRowColIdxAt(pKVRow, i);
STColumn *pCol = tdGetColOfID(pSchema, pColIdx->colId);
if ((pCol == NULL) || (!IS_VAR_DATA_TYPE(pCol->type))) continue;
void *pValue = tdGetKVRowValOfCol(pKVRow, pCol->colId);
if (varDataTLen(pValue) > pCol->bytes) {
terrno = TSDB_CODE_TDB_IVLD_TAG_VAL;
return -1;
}
}
return 0;
}
static int tsdbAddSchema(STable *pTable, STSchema *pSchema) {
ASSERT(TABLE_TYPE(pTable) != TSDB_CHILD_TABLE);
if (pTable->schema == NULL) {
pTable->schema = taosArrayInit(TSDB_MAX_TABLE_SCHEMAS, sizeof(SSchema *));
if (pTable->schema == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
}
ASSERT(taosArrayGetSize(pTable->schema) == 0 ||
schemaVersion(pSchema) > schemaVersion(*(STSchema **)taosArrayGetLast(pTable->schema)));
if (taosArrayPush(pTable->schema, &pSchema) == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return 0;
}
static void tsdbFreeTableSchema(STable *pTable) {
ASSERT(pTable != NULL);
if (pTable->schema) {
for (size_t i = 0; i < taosArrayGetSize(pTable->schema); i++) {
STSchema *pSchema = taosArrayGetP(pTable->schema, i);
tdFreeSchema(pSchema);
}
taosArrayDestroy(pTable->schema);
}
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#include "os.h"
#include "tdataformat.h"
#include "tskiplist.h"
#include "talgo.h"
#include "tcompare.h"
#include "exception.h"
#include "taosdef.h"
#include "tlosertree.h"
#include "tsdbint.h"
// #include "texpr.h"
// #include "qFilter.h"
#include "cJSON.h"
#define EXTRA_BYTES 2
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
#define QH_GET_NUM_OF_COLS(handle) ((size_t)(taosArrayGetSize((handle)->pColumns)))
#define GET_FILE_DATA_BLOCK_INFO(_checkInfo, _block) \
((SDataBlockInfo){.window = {.skey = (_block)->keyFirst, .ekey = (_block)->keyLast}, \
.numOfCols = (_block)->numOfCols, \
.rows = (_block)->numOfRows, \
.tid = (_checkInfo)->tableId.tid, \
.uid = (_checkInfo)->tableId.uid})
// limit offset start optimization for rows read over this value
#define OFFSET_SKIP_THRESHOLD 5000
enum {
TSDB_QUERY_TYPE_ALL = 1,
TSDB_QUERY_TYPE_LAST = 2,
};
enum {
TSDB_CACHED_TYPE_NONE = 0,
TSDB_CACHED_TYPE_LASTROW = 1,
TSDB_CACHED_TYPE_LAST = 2,
};
typedef struct SQueryFilePos {
int32_t fid;
int32_t slot;
int32_t pos;
int64_t lastKey;
int32_t rows;
bool mixBlock;
bool blockCompleted;
STimeWindow win;
} SQueryFilePos;
typedef struct SDataBlockLoadInfo {
SDFileSet* fileGroup;
int32_t slot;
int32_t tid;
SArray* pLoadedCols;
} SDataBlockLoadInfo;
typedef struct SLoadCompBlockInfo {
int32_t tid; /* table tid */
int32_t fileId;
} SLoadCompBlockInfo;
enum {
CHECKINFO_CHOSEN_MEM = 0,
CHECKINFO_CHOSEN_IMEM = 1,
CHECKINFO_CHOSEN_BOTH = 2 //for update=2(merge case)
};
typedef struct STableCheckInfo {
STableId tableId;
TSKEY lastKey;
STable* pTableObj;
SBlockInfo* pCompInfo;
int32_t compSize;
int32_t numOfBlocks:29; // number of qualified data blocks not the original blocks
uint8_t chosen:2; // indicate which iterator should move forward
bool initBuf; // whether to initialize the in-memory skip list iterator or not
SSkipListIterator* iter; // mem buffer skip list iterator
SSkipListIterator* iiter; // imem buffer skip list iterator
} STableCheckInfo;
typedef struct STableBlockInfo {
SBlock *compBlock;
STableCheckInfo *pTableCheckInfo;
} STableBlockInfo;
typedef struct SBlockOrderSupporter {
int32_t numOfTables;
STableBlockInfo** pDataBlockInfo;
int32_t* blockIndexArray;
int32_t* numOfBlocksPerTable;
} SBlockOrderSupporter;
typedef struct SIOCostSummary {
int64_t blockLoadTime;
int64_t statisInfoLoadTime;
int64_t checkForNextTime;
int64_t headFileLoad;
int64_t headFileLoadTime;
} SIOCostSummary;
typedef struct STsdbQueryHandle {
STsdbRepo* pTsdb;
SQueryFilePos cur; // current position
int16_t order;
int64_t offset; // limit offset
int64_t srows; // skip offset rows
int64_t frows; // forbid skip offset rows
STimeWindow window; // the primary query time window that applies to all queries
SDataStatis* statis; // query level statistics, only one table block statistics info exists at any time
int32_t numOfBlocks;
SArray* pColumns; // column list, SColumnInfoData array list
bool locateStart;
int32_t outputCapacity;
int32_t realNumOfRows;
SArray* pTableCheckInfo; // SArray<STableCheckInfo>
int32_t activeIndex;
bool checkFiles; // check file stage
int8_t cachelastrow; // check if last row cached
bool loadExternalRow; // load time window external data rows
bool currentLoadExternalRows; // current load external rows
int32_t loadType; // block load type
uint64_t qId; // query info handle, for debug purpose
int32_t type; // query type: retrieve all data blocks, 2. retrieve only last row, 3. retrieve direct prev|next rows
SDFileSet* pFileGroup;
SFSIter fileIter;
SReadH rhelper;
STableBlockInfo* pDataBlockInfo;
SDataCols *pDataCols; // in order to hold current file data block
int32_t allocSize; // allocated data block size
SMemRef *pMemRef;
SArray *defaultLoadColumn;// default load column
SDataBlockLoadInfo dataBlockLoadInfo; /* record current block load information */
SLoadCompBlockInfo compBlockLoadInfo; /* record current compblock information in SQueryAttr */
SArray *prev; // previous row which is before than time window
SArray *next; // next row which is after the query time window
SIOCostSummary cost;
} STsdbQueryHandle;
typedef struct STableGroupSupporter {
int32_t numOfCols;
SColIndex* pCols;
STSchema* pTagSchema;
} STableGroupSupporter;
typedef struct SRange {
int32_t from;
int32_t to;
} SRange;
static STimeWindow updateLastrowForEachGroup(STableGroupInfo *groupList);
static int32_t checkForCachedLastRow(STsdbQueryHandle* pQueryHandle, STableGroupInfo *groupList);
static int32_t checkForCachedLast(STsdbQueryHandle* pQueryHandle);
static int32_t lazyLoadCacheLast(STsdbQueryHandle* pQueryHandle);
static int32_t tsdbGetCachedLastRow(STable* pTable, SMemRow* pRes, TSKEY* lastKey);
static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle);
static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SBlock* pBlock);
static int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order);
static int32_t tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int maxRowsToRead, STimeWindow* win, STsdbQueryHandle* pQueryHandle);
static int32_t tsdbCheckInfoCompar(const void* key1, const void* key2);
static int32_t doGetExternalRow(STsdbQueryHandle* pQueryHandle, int16_t type, SMemRef* pMemRef);
static void* doFreeColumnInfoData(SArray* pColumnInfoData);
static void* destroyTableCheckInfo(SArray* pTableCheckInfo);
static bool tsdbGetExternalRow(TsdbQueryHandleT pHandle);
static int32_t tsdbQueryTableList(STable* pTable, SArray* pRes, void* filterInfo);
static void tsdbInitDataBlockLoadInfo(SDataBlockLoadInfo* pBlockLoadInfo) {
pBlockLoadInfo->slot = -1;
pBlockLoadInfo->tid = -1;
pBlockLoadInfo->fileGroup = NULL;
}
static void tsdbInitCompBlockLoadInfo(SLoadCompBlockInfo* pCompBlockLoadInfo) {
pCompBlockLoadInfo->tid = -1;
pCompBlockLoadInfo->fileId = -1;
}
static SArray* getColumnIdList(STsdbQueryHandle* pQueryHandle) {
size_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle);
assert(numOfCols <= TSDB_MAX_COLUMNS);
SArray* pIdList = taosArrayInit(numOfCols, sizeof(int16_t));
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i);
taosArrayPush(pIdList, &pCol->info.colId);
}
return pIdList;
}
static SArray* getDefaultLoadColumns(STsdbQueryHandle* pQueryHandle, bool loadTS) {
SArray* pLocalIdList = getColumnIdList(pQueryHandle);
// check if the primary time stamp column needs to load
int16_t colId = *(int16_t*)taosArrayGet(pLocalIdList, 0);
// the primary timestamp column does not be included in the the specified load column list, add it
if (loadTS && colId != 0) {
int16_t columnId = 0;
taosArrayInsert(pLocalIdList, 0, &columnId);
}
return pLocalIdList;
}
static void tsdbMayTakeMemSnapshot(STsdbQueryHandle* pQueryHandle, SArray* psTable) {
assert(pQueryHandle != NULL && pQueryHandle->pMemRef != NULL);
SMemRef* pMemRef = pQueryHandle->pMemRef;
if (pQueryHandle->pMemRef->ref++ == 0) {
tsdbTakeMemSnapshot(pQueryHandle->pTsdb, &(pMemRef->snapshot), psTable);
}
taosArrayDestroy(&psTable);
}
static void tsdbMayUnTakeMemSnapshot(STsdbQueryHandle* pQueryHandle) {
assert(pQueryHandle != NULL);
SMemRef* pMemRef = pQueryHandle->pMemRef;
if (pMemRef == NULL) { // it has been freed
return;
}
if (--pMemRef->ref == 0) {
tsdbUnTakeMemSnapShot(pQueryHandle->pTsdb, &(pMemRef->snapshot));
}
pQueryHandle->pMemRef = NULL;
}
int64_t tsdbGetNumOfRowsInMemTable(TsdbQueryHandleT* pHandle) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle;
int64_t rows = 0;
SMemRef* pMemRef = pQueryHandle->pMemRef;
if (pMemRef == NULL) { return rows; }
STableData* pMem = NULL;
STableData* pIMem = NULL;
SMemTable* pMemT = pMemRef->snapshot.mem;
SMemTable* pIMemT = pMemRef->snapshot.imem;
size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
for (int32_t i = 0; i < size; ++i) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
if (pMemT && pCheckInfo->tableId.tid < pMemT->maxTables) {
pMem = pMemT->tData[pCheckInfo->tableId.tid];
rows += (pMem && pMem->uid == pCheckInfo->tableId.uid) ? pMem->numOfRows : 0;
}
if (pIMemT && pCheckInfo->tableId.tid < pIMemT->maxTables) {
pIMem = pIMemT->tData[pCheckInfo->tableId.tid];
rows += (pIMem && pIMem->uid == pCheckInfo->tableId.uid) ? pIMem->numOfRows : 0;
}
}
return rows;
}
static SArray* createCheckInfoFromTableGroup(STsdbQueryHandle* pQueryHandle, STableGroupInfo* pGroupList, STsdbMeta* pMeta, SArray** psTable) {
size_t sizeOfGroup = taosArrayGetSize(pGroupList->pGroupList);
assert(sizeOfGroup >= 1 && pMeta != NULL);
// allocate buffer in order to load data blocks from file
SArray* pTableCheckInfo = taosArrayInit(pGroupList->numOfTables, sizeof(STableCheckInfo));
if (pTableCheckInfo == NULL) {
return NULL;
}
SArray* pTable = taosArrayInit(4, sizeof(STable*));
if (pTable == NULL) {
taosArrayDestroy(&pTableCheckInfo);
return NULL;
}
// todo apply the lastkey of table check to avoid to load header file
for (int32_t i = 0; i < sizeOfGroup; ++i) {
SArray* group = *(SArray**) taosArrayGet(pGroupList->pGroupList, i);
size_t gsize = taosArrayGetSize(group);
assert(gsize > 0);
for (int32_t j = 0; j < gsize; ++j) {
STableKeyInfo* pKeyInfo = (STableKeyInfo*) taosArrayGet(group, j);
STableCheckInfo info = { .lastKey = pKeyInfo->lastKey, .pTableObj = pKeyInfo->pTable };
assert(info.pTableObj != NULL && (info.pTableObj->type == TSDB_NORMAL_TABLE ||
info.pTableObj->type == TSDB_CHILD_TABLE || info.pTableObj->type == TSDB_STREAM_TABLE));
info.tableId.tid = info.pTableObj->tableId.tid;
info.tableId.uid = info.pTableObj->tableId.uid;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
if (info.lastKey == INT64_MIN || info.lastKey < pQueryHandle->window.skey) {
info.lastKey = pQueryHandle->window.skey;
}
assert(info.lastKey >= pQueryHandle->window.skey && info.lastKey <= pQueryHandle->window.ekey);
} else {
assert(info.lastKey >= pQueryHandle->window.ekey && info.lastKey <= pQueryHandle->window.skey);
}
taosArrayPush(pTableCheckInfo, &info);
tsdbDebug("%p check table uid:%"PRId64", tid:%d from lastKey:%"PRId64" 0x%"PRIx64, pQueryHandle, info.tableId.uid,
info.tableId.tid, info.lastKey, pQueryHandle->qId);
}
}
taosArraySort(pTableCheckInfo, tsdbCheckInfoCompar);
size_t gsize = taosArrayGetSize(pTableCheckInfo);
for (int32_t i = 0; i < gsize; ++i) {
STableCheckInfo* pInfo = (STableCheckInfo*) taosArrayGet(pTableCheckInfo, i);
taosArrayPush(pTable, &pInfo->pTableObj);
}
*psTable = pTable;
return pTableCheckInfo;
}
static void resetCheckInfo(STsdbQueryHandle* pQueryHandle) {
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
assert(numOfTables >= 1);
// todo apply the lastkey of table check to avoid to load header file
for (int32_t i = 0; i < numOfTables; ++i) {
STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i);
pCheckInfo->lastKey = pQueryHandle->window.skey;
pCheckInfo->iter = tSkipListDestroyIter(pCheckInfo->iter);
pCheckInfo->iiter = tSkipListDestroyIter(pCheckInfo->iiter);
pCheckInfo->initBuf = false;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
assert(pCheckInfo->lastKey >= pQueryHandle->window.skey);
} else {
assert(pCheckInfo->lastKey <= pQueryHandle->window.skey);
}
}
}
// only one table, not need to sort again
static SArray* createCheckInfoFromCheckInfo(STableCheckInfo* pCheckInfo, TSKEY skey, SArray** psTable) {
SArray* pNew = taosArrayInit(1, sizeof(STableCheckInfo));
SArray* pTable = taosArrayInit(1, sizeof(STable*));
STableCheckInfo info = { .lastKey = skey, .pTableObj = pCheckInfo->pTableObj};
info.tableId = pCheckInfo->tableId;
taosArrayPush(pNew, &info);
taosArrayPush(pTable, &pCheckInfo->pTableObj);
*psTable = pTable;
return pNew;
}
static bool emptyQueryTimewindow(STsdbQueryHandle* pQueryHandle) {
assert(pQueryHandle != NULL);
STimeWindow* w = &pQueryHandle->window;
bool asc = ASCENDING_TRAVERSE(pQueryHandle->order);
return ((asc && w->skey > w->ekey) || (!asc && w->ekey > w->skey));
}
// Update the query time window according to the data time to live(TTL) information, in order to avoid to return
// the expired data to client, even it is queried already.
static int64_t getEarliestValidTimestamp(STsdbRepo* pTsdb) {
STsdbCfg* pCfg = &pTsdb->config;
int64_t now = taosGetTimestamp(pCfg->precision);
return now - (tsTickPerDay[pCfg->precision] * pCfg->keep) + 1; // needs to add one tick
}
static void setQueryTimewindow(STsdbQueryHandle* pQueryHandle, STsdbQueryCond* pCond) {
pQueryHandle->window = pCond->twindow;
bool updateTs = false;
int64_t startTs = getEarliestValidTimestamp(pQueryHandle->pTsdb);
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
if (startTs > pQueryHandle->window.skey) {
pQueryHandle->window.skey = startTs;
pCond->twindow.skey = startTs;
updateTs = true;
}
} else {
if (startTs > pQueryHandle->window.ekey) {
pQueryHandle->window.ekey = startTs;
pCond->twindow.ekey = startTs;
updateTs = true;
}
}
if (updateTs) {
tsdbDebug("%p update the query time window, old:%" PRId64 " - %" PRId64 ", new:%" PRId64 " - %" PRId64
", 0x%" PRIx64, pQueryHandle, pCond->twindow.skey, pCond->twindow.ekey, pQueryHandle->window.skey,
pQueryHandle->window.ekey, pQueryHandle->qId);
}
}
static STsdbQueryHandle* tsdbQueryTablesImpl(STsdbRepo* tsdb, STsdbQueryCond* pCond, uint64_t qId, SMemRef* pMemRef) {
STsdbQueryHandle* pQueryHandle = calloc(1, sizeof(STsdbQueryHandle));
if (pQueryHandle == NULL) {
goto _end;
}
pQueryHandle->order = pCond->order;
pQueryHandle->offset = pCond->offset;
pQueryHandle->srows = 0;
pQueryHandle->frows = 0;
pQueryHandle->pTsdb = tsdb;
pQueryHandle->type = TSDB_QUERY_TYPE_ALL;
pQueryHandle->cur.fid = INT32_MIN;
pQueryHandle->cur.win = TSWINDOW_INITIALIZER;
pQueryHandle->checkFiles = true;
pQueryHandle->activeIndex = 0; // current active table index
pQueryHandle->qId = qId;
pQueryHandle->allocSize = 0;
pQueryHandle->locateStart = false;
pQueryHandle->pMemRef = pMemRef;
pQueryHandle->loadType = pCond->type;
pQueryHandle->outputCapacity = ((STsdbRepo*)tsdb)->config.maxRowsPerFileBlock;
pQueryHandle->loadExternalRow = pCond->loadExternalRows;
pQueryHandle->currentLoadExternalRows = pCond->loadExternalRows;
if (tsdbInitReadH(&pQueryHandle->rhelper, (STsdbRepo*)tsdb) != 0) {
goto _end;
}
assert(pCond != NULL && pMemRef != NULL);
setQueryTimewindow(pQueryHandle, pCond);
if (pCond->numOfCols > 0) {
// allocate buffer in order to load data blocks from file
pQueryHandle->statis = calloc(pCond->numOfCols, sizeof(SDataStatis));
if (pQueryHandle->statis == NULL) {
goto _end;
}
// todo: use list instead of array?
pQueryHandle->pColumns = taosArrayInit(pCond->numOfCols, sizeof(SColumnInfoData));
if (pQueryHandle->pColumns == NULL) {
goto _end;
}
for (int32_t i = 0; i < pCond->numOfCols; ++i) {
SColumnInfoData colInfo = {{0}, 0};
colInfo.info = pCond->colList[i];
colInfo.pData = calloc(1, EXTRA_BYTES + pQueryHandle->outputCapacity * pCond->colList[i].bytes);
if (colInfo.pData == NULL) {
goto _end;
}
taosArrayPush(pQueryHandle->pColumns, &colInfo);
pQueryHandle->statis[i].colId = colInfo.info.colId;
}
pQueryHandle->defaultLoadColumn = getDefaultLoadColumns(pQueryHandle, true);
}
STsdbMeta* pMeta = tsdbGetMeta(tsdb);
assert(pMeta != NULL);
pQueryHandle->pDataCols = tdNewDataCols(pMeta->maxCols, pQueryHandle->pTsdb->config.maxRowsPerFileBlock);
if (pQueryHandle->pDataCols == NULL) {
tsdbError("%p failed to malloc buf for pDataCols, %"PRIu64, pQueryHandle, pQueryHandle->qId);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _end;
}
tsdbInitDataBlockLoadInfo(&pQueryHandle->dataBlockLoadInfo);
tsdbInitCompBlockLoadInfo(&pQueryHandle->compBlockLoadInfo);
return (TsdbQueryHandleT) pQueryHandle;
_end:
tsdbCleanupQueryHandle(pQueryHandle);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
TsdbQueryHandleT* tsdbQueryTables(STsdbRepo* tsdb, STsdbQueryCond* pCond, STableGroupInfo* groupList, uint64_t qId, SMemRef* pRef) {
STsdbQueryHandle* pQueryHandle = tsdbQueryTablesImpl(tsdb, pCond, qId, pRef);
if (pQueryHandle == NULL) {
return NULL;
}
if (emptyQueryTimewindow(pQueryHandle)) {
return (TsdbQueryHandleT*) pQueryHandle;
}
STsdbMeta* pMeta = tsdbGetMeta(tsdb);
assert(pMeta != NULL);
SArray* psTable = NULL;
// todo apply the lastkey of table check to avoid to load header file
pQueryHandle->pTableCheckInfo = createCheckInfoFromTableGroup(pQueryHandle, groupList, pMeta, &psTable);
if (pQueryHandle->pTableCheckInfo == NULL) {
tsdbCleanupQueryHandle(pQueryHandle);
taosArrayDestroy(&psTable);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
tsdbMayTakeMemSnapshot(pQueryHandle, psTable);
tsdbDebug("%p total numOfTable:%" PRIzu " in query, 0x%"PRIx64, pQueryHandle, taosArrayGetSize(pQueryHandle->pTableCheckInfo), pQueryHandle->qId);
return (TsdbQueryHandleT) pQueryHandle;
}
void tsdbResetQueryHandle(TsdbQueryHandleT queryHandle, STsdbQueryCond *pCond) {
STsdbQueryHandle* pQueryHandle = queryHandle;
if (emptyQueryTimewindow(pQueryHandle)) {
if (pCond->order != pQueryHandle->order) {
pQueryHandle->order = pCond->order;
SWAP(pQueryHandle->window.skey, pQueryHandle->window.ekey, int64_t);
}
return;
}
pQueryHandle->order = pCond->order;
pQueryHandle->offset = pCond->offset;
pQueryHandle->srows = 0;
pQueryHandle->frows = 0;
pQueryHandle->window = pCond->twindow;
pQueryHandle->type = TSDB_QUERY_TYPE_ALL;
pQueryHandle->cur.fid = -1;
pQueryHandle->cur.win = TSWINDOW_INITIALIZER;
pQueryHandle->checkFiles = true;
pQueryHandle->activeIndex = 0; // current active table index
pQueryHandle->locateStart = false;
pQueryHandle->loadExternalRow = pCond->loadExternalRows;
if (ASCENDING_TRAVERSE(pCond->order)) {
assert(pQueryHandle->window.skey <= pQueryHandle->window.ekey);
} else {
assert(pQueryHandle->window.skey >= pQueryHandle->window.ekey);
}
// allocate buffer in order to load data blocks from file
memset(pQueryHandle->statis, 0, sizeof(SDataStatis));
tsdbInitDataBlockLoadInfo(&pQueryHandle->dataBlockLoadInfo);
tsdbInitCompBlockLoadInfo(&pQueryHandle->compBlockLoadInfo);
resetCheckInfo(pQueryHandle);
}
void tsdbResetQueryHandleForNewTable(TsdbQueryHandleT queryHandle, STsdbQueryCond *pCond, STableGroupInfo* groupList) {
STsdbQueryHandle* pQueryHandle = queryHandle;
pQueryHandle->order = pCond->order;
pQueryHandle->window = pCond->twindow;
pQueryHandle->type = TSDB_QUERY_TYPE_ALL;
pQueryHandle->cur.fid = -1;
pQueryHandle->cur.win = TSWINDOW_INITIALIZER;
pQueryHandle->checkFiles = true;
pQueryHandle->activeIndex = 0; // current active table index
pQueryHandle->locateStart = false;
pQueryHandle->loadExternalRow = pCond->loadExternalRows;
if (ASCENDING_TRAVERSE(pCond->order)) {
assert(pQueryHandle->window.skey <= pQueryHandle->window.ekey);
} else {
assert(pQueryHandle->window.skey >= pQueryHandle->window.ekey);
}
// allocate buffer in order to load data blocks from file
memset(pQueryHandle->statis, 0, sizeof(SDataStatis));
tsdbInitDataBlockLoadInfo(&pQueryHandle->dataBlockLoadInfo);
tsdbInitCompBlockLoadInfo(&pQueryHandle->compBlockLoadInfo);
SArray* pTable = NULL;
STsdbMeta* pMeta = tsdbGetMeta(pQueryHandle->pTsdb);
pQueryHandle->pTableCheckInfo = destroyTableCheckInfo(pQueryHandle->pTableCheckInfo);
pQueryHandle->pTableCheckInfo = createCheckInfoFromTableGroup(pQueryHandle, groupList, pMeta, &pTable);
if (pQueryHandle->pTableCheckInfo == NULL) {
tsdbCleanupQueryHandle(pQueryHandle);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
}
pQueryHandle->prev = doFreeColumnInfoData(pQueryHandle->prev);
pQueryHandle->next = doFreeColumnInfoData(pQueryHandle->next);
}
static int32_t lazyLoadCacheLast(STsdbQueryHandle* pQueryHandle) {
STsdbRepo* pRepo = pQueryHandle->pTsdb;
if (!pQueryHandle->pTableCheckInfo) {
tsdbError("%p table check info is NULL", pQueryHandle);
terrno = TSDB_CODE_QRY_APP_ERROR;
return -1;
}
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
int32_t code = 0;
for (size_t i = 0; i < numOfTables; ++i) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
STable* pTable = pCheckInfo->pTableObj;
if (pTable->cacheLastConfigVersion == pRepo->cacheLastConfigVersion) {
continue;
}
code = tsdbLoadLastCache(pRepo, pTable);
if (code != 0) {
tsdbError("%p uid:%" PRId64 ", tid:%d, failed to load last cache since %s", pQueryHandle, pTable->tableId.uid,
pTable->tableId.tid, tstrerror(terrno));
break;
}
}
return code;
}
TsdbQueryHandleT tsdbQueryLastRow(STsdbRepo *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, SMemRef* pMemRef) {
pCond->twindow = updateLastrowForEachGroup(groupList);
// no qualified table
if (groupList->numOfTables == 0) {
return NULL;
}
STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pMemRef);
if (pQueryHandle == NULL) {
return NULL;
}
if (lazyLoadCacheLast(pQueryHandle) != TSDB_CODE_SUCCESS) {
return NULL;
}
int32_t code = checkForCachedLastRow(pQueryHandle, groupList);
if (code != TSDB_CODE_SUCCESS) { // set the numOfTables to be 0
terrno = code;
return NULL;
}
assert(pCond->order == TSDB_ORDER_ASC && pCond->twindow.skey <= pCond->twindow.ekey);
if (pQueryHandle->cachelastrow) {
pQueryHandle->type = TSDB_QUERY_TYPE_LAST;
}
return pQueryHandle;
}
TsdbQueryHandleT tsdbQueryCacheLast(STsdbRepo *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, SMemRef* pMemRef) {
STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pMemRef);
if (pQueryHandle == NULL) {
return NULL;
}
if (lazyLoadCacheLast(pQueryHandle) != TSDB_CODE_SUCCESS) {
return NULL;
}
int32_t code = checkForCachedLast(pQueryHandle);
if (code != TSDB_CODE_SUCCESS) { // set the numOfTables to be 0
terrno = code;
return NULL;
}
if (pQueryHandle->cachelastrow) {
pQueryHandle->type = TSDB_QUERY_TYPE_LAST;
}
return pQueryHandle;
}
SArray* tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle) {
assert(pHandle != NULL);
STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) pHandle;
size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
SArray* res = taosArrayInit(size, POINTER_BYTES);
for(int32_t i = 0; i < size; ++i) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
taosArrayPush(res, &pCheckInfo->pTableObj);
}
return res;
}
TsdbQueryHandleT tsdbQueryRowsInExternalWindow(STsdbRepo *tsdb, STsdbQueryCond* pCond, STableGroupInfo *groupList, uint64_t qId, SMemRef* pRef) {
STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pRef);
//pQueryHandle->loadExternalRow = true;
//pQueryHandle->currentLoadExternalRows = true;
return pQueryHandle;
}
static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCheckInfo) {
STable* pTable = pCheckInfo->pTableObj;
assert(pTable != NULL);
if (pCheckInfo->initBuf) {
return true;
}
pCheckInfo->initBuf = true;
int32_t order = pHandle->order;
// no data in buffer, abort
if (pHandle->pMemRef->snapshot.mem == NULL && pHandle->pMemRef->snapshot.imem == NULL) {
return false;
}
assert(pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL);
STableData* pMem = NULL;
STableData* pIMem = NULL;
SMemTable* pMemT = pHandle->pMemRef->snapshot.mem;
SMemTable* pIMemT = pHandle->pMemRef->snapshot.imem;
if (pMemT && pCheckInfo->tableId.tid < pMemT->maxTables) {
pMem = pMemT->tData[pCheckInfo->tableId.tid];
if (pMem != NULL && pMem->uid == pCheckInfo->tableId.uid) { // check uid
TKEY tLastKey = keyToTkey(pCheckInfo->lastKey);
pCheckInfo->iter =
tSkipListCreateIterFromVal(pMem->pData, (const char*)&tLastKey, TSDB_DATA_TYPE_TIMESTAMP, order);
}
}
if (pIMemT && pCheckInfo->tableId.tid < pIMemT->maxTables) {
pIMem = pIMemT->tData[pCheckInfo->tableId.tid];
if (pIMem != NULL && pIMem->uid == pCheckInfo->tableId.uid) { // check uid
TKEY tLastKey = keyToTkey(pCheckInfo->lastKey);
pCheckInfo->iiter =
tSkipListCreateIterFromVal(pIMem->pData, (const char*)&tLastKey, TSDB_DATA_TYPE_TIMESTAMP, order);
}
}
// both iterators are NULL, no data in buffer right now
if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) {
return false;
}
bool memEmpty = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter));
bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter));
if (memEmpty && imemEmpty) { // buffer is empty
return false;
}
if (!memEmpty) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
assert(node != NULL);
SMemRow row = (SMemRow)SL_GET_NODE_DATA(node);
TSKEY key = memRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64 ", tid:%d check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
"-%" PRId64 ", lastKey:%" PRId64 ", numOfRows:%"PRId64", 0x%"PRIx64,
pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, key, order, pMem->keyFirst, pMem->keyLast,
pCheckInfo->lastKey, pMem->numOfRows, pHandle->qId);
if (ASCENDING_TRAVERSE(order)) {
assert(pCheckInfo->lastKey <= key);
} else {
assert(pCheckInfo->lastKey >= key);
}
} else {
tsdbDebug("%p uid:%"PRId64", tid:%d no data in mem, 0x%"PRIx64, pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid,
pHandle->qId);
}
if (!imemEmpty) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
assert(node != NULL);
SMemRow row = (SMemRow)SL_GET_NODE_DATA(node);
TSKEY key = memRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64 ", tid:%d check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64
"-%" PRId64 ", lastKey:%" PRId64 ", numOfRows:%"PRId64", 0x%"PRIx64,
pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, key, order, pIMem->keyFirst, pIMem->keyLast,
pCheckInfo->lastKey, pIMem->numOfRows, pHandle->qId);
if (ASCENDING_TRAVERSE(order)) {
assert(pCheckInfo->lastKey <= key);
} else {
assert(pCheckInfo->lastKey >= key);
}
} else {
tsdbDebug("%p uid:%"PRId64", tid:%d no data in imem, 0x%"PRIx64, pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid,
pHandle->qId);
}
return true;
}
static void destroyTableMemIterator(STableCheckInfo* pCheckInfo) {
tSkipListDestroyIter(pCheckInfo->iter);
tSkipListDestroyIter(pCheckInfo->iiter);
}
static TSKEY extractFirstTraverseKey(STableCheckInfo* pCheckInfo, int32_t order, int32_t update) {
SMemRow rmem = NULL, rimem = NULL;
if (pCheckInfo->iter) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
if (node != NULL) {
rmem = (SMemRow)SL_GET_NODE_DATA(node);
}
}
if (pCheckInfo->iiter) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
if (node != NULL) {
rimem = (SMemRow)SL_GET_NODE_DATA(node);
}
}
if (rmem == NULL && rimem == NULL) {
return TSKEY_INITIAL_VAL;
}
if (rmem != NULL && rimem == NULL) {
pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
return memRowKey(rmem);
}
if (rmem == NULL && rimem != NULL) {
pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
return memRowKey(rimem);
}
TSKEY r1 = memRowKey(rmem);
TSKEY r2 = memRowKey(rimem);
if (r1 == r2) {
if(update == TD_ROW_DISCARD_UPDATE){
pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
tSkipListIterNext(pCheckInfo->iter);
return r2;
}
else if(update == TD_ROW_OVERWRITE_UPDATE) {
pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
tSkipListIterNext(pCheckInfo->iiter);
return r1;
} else {
pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
return r1;
}
} else {
if (ASCENDING_TRAVERSE(order)) {
if (r1 < r2) {
pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
return r1;
} else {
pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
return r2;
}
} else {
if (r1 < r2) {
pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
return r2;
} else {
pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
return r1;
}
}
}
}
static SMemRow getSMemRowInTableMem(STableCheckInfo* pCheckInfo, int32_t order, int32_t update, SMemRow* extraRow) {
SMemRow rmem = NULL, rimem = NULL;
if (pCheckInfo->iter) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
if (node != NULL) {
rmem = (SMemRow)SL_GET_NODE_DATA(node);
}
}
if (pCheckInfo->iiter) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
if (node != NULL) {
rimem = (SMemRow)SL_GET_NODE_DATA(node);
}
}
if (rmem == NULL && rimem == NULL) {
return NULL;
}
if (rmem != NULL && rimem == NULL) {
pCheckInfo->chosen = 0;
return rmem;
}
if (rmem == NULL && rimem != NULL) {
pCheckInfo->chosen = 1;
return rimem;
}
TSKEY r1 = memRowKey(rmem);
TSKEY r2 = memRowKey(rimem);
if (r1 == r2) {
if (update == TD_ROW_DISCARD_UPDATE) {
tSkipListIterNext(pCheckInfo->iter);
pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
return rimem;
} else if(update == TD_ROW_OVERWRITE_UPDATE){
tSkipListIterNext(pCheckInfo->iiter);
pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
return rmem;
} else {
pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH;
extraRow = rimem;
return rmem;
}
} else {
if (ASCENDING_TRAVERSE(order)) {
if (r1 < r2) {
pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
return rmem;
} else {
pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
return rimem;
}
} else {
if (r1 < r2) {
pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM;
return rimem;
} else {
pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM;
return rmem;
}
}
}
}
static bool moveToNextRowInMem(STableCheckInfo* pCheckInfo) {
bool hasNext = false;
if (pCheckInfo->chosen == CHECKINFO_CHOSEN_MEM) {
if (pCheckInfo->iter != NULL) {
hasNext = tSkipListIterNext(pCheckInfo->iter);
}
if (hasNext) {
return hasNext;
}
if (pCheckInfo->iiter != NULL) {
return tSkipListIterGet(pCheckInfo->iiter) != NULL;
}
} else if (pCheckInfo->chosen == CHECKINFO_CHOSEN_IMEM){
if (pCheckInfo->iiter != NULL) {
hasNext = tSkipListIterNext(pCheckInfo->iiter);
}
if (hasNext) {
return hasNext;
}
if (pCheckInfo->iter != NULL) {
return tSkipListIterGet(pCheckInfo->iter) != NULL;
}
} else {
if (pCheckInfo->iter != NULL) {
hasNext = tSkipListIterNext(pCheckInfo->iter);
}
if (pCheckInfo->iiter != NULL) {
hasNext = tSkipListIterNext(pCheckInfo->iiter) || hasNext;
}
}
return hasNext;
}
static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) {
STsdbCfg *pCfg = &pHandle->pTsdb->config;
size_t size = taosArrayGetSize(pHandle->pTableCheckInfo);
assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1);
pHandle->cur.fid = INT32_MIN;
STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex);
STable* pTable = pCheckInfo->pTableObj;
assert(pTable != NULL);
if (!pCheckInfo->initBuf) {
initTableMemIterator(pHandle, pCheckInfo);
}
SMemRow row = getSMemRowInTableMem(pCheckInfo, pHandle->order, pCfg->update, NULL);
if (row == NULL) {
return false;
}
pCheckInfo->lastKey = memRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64", tid:%d check data in buffer from skey:%" PRId64 ", order:%d, 0x%"PRIx64, pHandle,
pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pCheckInfo->lastKey, pHandle->order, pHandle->qId);
// all data in mem are checked already.
if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) ||
(pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) {
return false;
}
int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1;
STimeWindow* win = &pHandle->cur.win;
pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle);
// update the last key value
pCheckInfo->lastKey = win->ekey + step;
pHandle->cur.lastKey = win->ekey + step;
pHandle->cur.mixBlock = true;
if (!ASCENDING_TRAVERSE(pHandle->order)) {
SWAP(win->skey, win->ekey, TSKEY);
}
return true;
}
static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precision) {
assert(precision >= TSDB_TIME_PRECISION_MICRO || precision <= TSDB_TIME_PRECISION_NANO);
if (key == TSKEY_INITIAL_VAL) {
return INT32_MIN;
}
if (key < 0) {
key -= (daysPerFile * tsTickPerDay[precision]);
}
int64_t fid = (int64_t)(key / (daysPerFile * tsTickPerDay[precision])); // set the starting fileId
if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32
fid = INT32_MIN;
}
if (fid > 0L && fid > INT32_MAX) {
fid = INT32_MAX;
}
return (int32_t)fid;
}
static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
int32_t firstSlot = 0;
int32_t lastSlot = numOfBlocks - 1;
int32_t midSlot = firstSlot;
while (1) {
numOfBlocks = lastSlot - firstSlot + 1;
midSlot = (firstSlot + (numOfBlocks >> 1));
if (numOfBlocks == 1) break;
if (skey > pBlock[midSlot].keyLast) {
if (numOfBlocks == 2) break;
if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break;
firstSlot = midSlot + 1;
} else if (skey < pBlock[midSlot].keyFirst) {
if ((order == TSDB_ORDER_ASC) && (skey > pBlock[midSlot - 1].keyLast)) break;
lastSlot = midSlot - 1;
} else {
break; // got the slot
}
}
return midSlot;
}
// array :1 2 3 5 7 -2 (8 9) skip 4 and 6
int32_t memMoveByArray(SBlock *blocks, SArray *pArray) {
// pArray is NULL or size is zero , no need block to move
if(pArray == NULL)
return 0;
size_t count = taosArrayGetSize(pArray);
if(count == 0)
return 0;
// memmove
int32_t num = 0;
SRange* ranges = (SRange*)TARRAY_GET_START(pArray);
for(size_t i = 0; i < count; i++) {
int32_t step = ranges[i].to - ranges[i].from + 1;
memmove(blocks + num, blocks + ranges[i].from, sizeof(SBlock) * step);
num += step;
}
return num;
}
// if block data in memory return false else true
bool blockNoItemInMem(STsdbQueryHandle* q, SBlock* pBlock) {
if(q->pMemRef == NULL) {
return false;
}
// mem
if(q->pMemRef->snapshot.mem) {
SMemTable* mem = q->pMemRef->snapshot.mem;
if(timeIntersect(mem->keyFirst, mem->keyLast, pBlock->keyFirst, pBlock->keyLast))
return false;
}
// imem
if(q->pMemRef->snapshot.imem) {
SMemTable* imem = q->pMemRef->snapshot.imem;
if(timeIntersect(imem->keyFirst, imem->keyLast, pBlock->keyFirst, pBlock->keyLast))
return false;
}
return true;
}
#define MAYBE_IN_MEMORY_ROWS 4000 // approximately the capacity of one block
// skip blocks . return value is skip blocks number, skip rows reduce from *pOffset
static int32_t offsetSkipBlock(STsdbQueryHandle* q, SBlockInfo* pBlockInfo, int64_t skey, int64_t ekey,
int32_t sblock, int32_t eblock, SArray** ppArray, bool order) {
int32_t num = 0;
SBlock* blocks = pBlockInfo->blocks;
SArray* pArray = NULL;
SRange range;
range.from = -1;
//
// ASC
//
if(order) {
for(int32_t i = sblock; i < eblock; i++) {
bool skip = false;
SBlock* pBlock = &blocks[i];
if(i == sblock && skey > pBlock->keyFirst) {
q->frows += pBlock->numOfRows; // some rows time < s
} else {
// check can skip
if(q->srows + q->frows + pBlock->numOfRows + MAYBE_IN_MEMORY_ROWS < q->offset) { // approximately calculate
if(blockNoItemInMem(q, pBlock)) {
// can skip
q->srows += pBlock->numOfRows;
skip = true;
} else {
q->frows += pBlock->numOfRows; // maybe have some row in memroy
}
} else {
// the remainder be put to pArray
if(pArray == NULL)
pArray = taosArrayInit(1, sizeof(SRange));
if(range.from == -1) {
range.from = i;
} else {
if(range.to + 1 != i) {
// add the previous
taosArrayPush(pArray, &range);
range.from = i;
}
}
range.to = eblock - 1;
taosArrayPush(pArray, &range);
range.from = -1;
break;
}
}
if(skip) {
num ++;
} else {
// can't skip, append block index to pArray
if(pArray == NULL)
pArray = taosArrayInit(10, sizeof(SRange));
if(range.from == -1) {
range.from = i;
} else {
if(range.to + 1 != i) {
// add the previous
taosArrayPush(pArray, &range);
range.from = i;
}
}
range.to = i;
}
}
// end append
if(range.from != -1) {
if(pArray == NULL)
pArray = taosArrayInit(1, sizeof(SRange));
taosArrayPush(pArray, &range);
}
// ASC return
*ppArray = pArray;
return num;
}
// DES
for(int32_t i = eblock - 1; i >= sblock; i--) {
bool skip = false;
SBlock* pBlock = &blocks[i];
if(i == eblock - 1 && ekey < pBlock->keyLast) {
q->frows += pBlock->numOfRows; // some rows time > e
} else {
// check can skip
if(q->srows + q->frows + pBlock->numOfRows + MAYBE_IN_MEMORY_ROWS < q->offset) { // approximately calculate
if(blockNoItemInMem(q, pBlock)) {
// can skip
q->srows += pBlock->numOfRows;
skip = true;
} else {
q->frows += pBlock->numOfRows; // maybe have some row in memroy
}
} else {
// the remainder be put to pArray
if(pArray == NULL)
pArray = taosArrayInit(1, sizeof(SRange));
if(range.from == -1) {
range.from = i;
} else {
if(range.to - 1 != i) {
// add the previous
taosArrayPush(pArray, &range);
range.from = i;
}
}
range.to = 0;
taosArrayPush(pArray, &range);
range.from = -1;
break;
}
}
if(skip) {
num ++;
} else {
// can't skip, append block index to pArray
if(pArray == NULL)
pArray = taosArrayInit(10, sizeof(SRange));
if(range.from == -1) {
range.from = i;
} else {
if(range.to + 1 != i) {
// add the previous
taosArrayPush(pArray, &range);
range.from = i;
}
}
range.to = i;
}
}
// end append
if(range.from != -1) {
if(pArray == NULL)
pArray = taosArrayInit(1, sizeof(SRange));
taosArrayPush(pArray, &range);
}
if(pArray == NULL)
return num;
// reverse array
size_t count = taosArrayGetSize(pArray);
SRange* ranges = TARRAY_GET_START(pArray);
SArray* pArray1 = taosArrayInit(count, sizeof(SRange));
size_t i = count - 1;
while(i >= 0) {
range.from = ranges[i].to;
range.to = ranges[i].from;
taosArrayPush(pArray1, &range);
if(i == 0)
break;
i --;
}
*ppArray = pArray1;
taosArrayDestroy(&pArray);
return num;
}
// shrink blocks by condition of query
static void shrinkBlocksByQuery(STsdbQueryHandle *pQueryHandle, STableCheckInfo *pCheckInfo) {
SBlockInfo *pCompInfo = pCheckInfo->pCompInfo;
SBlockIdx *compIndex = pQueryHandle->rhelper.pBlkIdx;
bool order = ASCENDING_TRAVERSE(pQueryHandle->order);
if (order) {
assert(pCheckInfo->lastKey <= pQueryHandle->window.ekey && pQueryHandle->window.skey <= pQueryHandle->window.ekey);
} else {
assert(pCheckInfo->lastKey >= pQueryHandle->window.ekey && pQueryHandle->window.skey >= pQueryHandle->window.ekey);
}
TSKEY s = TSKEY_INITIAL_VAL, e = TSKEY_INITIAL_VAL;
s = MIN(pCheckInfo->lastKey, pQueryHandle->window.ekey);
e = MAX(pCheckInfo->lastKey, pQueryHandle->window.ekey);
// discard the unqualified data block based on the query time window
int32_t start = binarySearchForBlock(pCompInfo->blocks, compIndex->numOfBlocks, s, TSDB_ORDER_ASC);
if (s > pCompInfo->blocks[start].keyLast) {
return ;
}
int32_t end = start;
// locate e index of blocks -> end
while (end < (int32_t)compIndex->numOfBlocks && (pCompInfo->blocks[end].keyFirst <= e)) {
end += 1;
}
// calc offset can skip blocks number
int32_t nSkip = 0;
SArray *pArray = NULL;
if(pQueryHandle->offset > 0) {
nSkip = offsetSkipBlock(pQueryHandle, pCompInfo, s, e, start, end, &pArray, order);
}
if(nSkip > 0) { // have offset and can skip
pCheckInfo->numOfBlocks = memMoveByArray(pCompInfo->blocks, pArray);
} else { // no offset
pCheckInfo->numOfBlocks = end - start;
if(start > 0)
memmove(pCompInfo->blocks, &pCompInfo->blocks[start], pCheckInfo->numOfBlocks * sizeof(SBlock));
}
if(pArray)
taosArrayDestroy(&pArray);
}
// load one table (tsd_index point to) need load blocks info and put into pCheckInfo->pCompInfo->blocks
static int32_t loadBlockInfo(STsdbQueryHandle * pQueryHandle, int32_t tsd_index, int32_t* numOfBlocks) {
//
// ONE PART. Load all blocks info from one table of tsd_index
//
int32_t code = 0;
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, tsd_index);
pCheckInfo->numOfBlocks = 0;
if (tsdbSetReadTable(&pQueryHandle->rhelper, pCheckInfo->pTableObj) != TSDB_CODE_SUCCESS) {
code = terrno;
return code;
}
SBlockIdx* compIndex = pQueryHandle->rhelper.pBlkIdx;
// no data block in this file, try next file
if (compIndex == NULL || compIndex->uid != pCheckInfo->tableId.uid) {
return 0; // no data blocks in the file belongs to pCheckInfo->pTable
}
if (pCheckInfo->compSize < (int32_t)compIndex->len) {
assert(compIndex->len > 0);
char* t = realloc(pCheckInfo->pCompInfo, compIndex->len);
if (t == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
code = TSDB_CODE_TDB_OUT_OF_MEMORY;
return code;
}
pCheckInfo->pCompInfo = (SBlockInfo*)t;
pCheckInfo->compSize = compIndex->len;
}
if (tsdbLoadBlockInfo(&(pQueryHandle->rhelper), (void**)(&pCheckInfo->pCompInfo),
(uint32_t*)(&pCheckInfo->compSize)) < 0) {
return terrno;
}
//
// TWO PART. shrink no need blocks from all blocks by condition of query
//
shrinkBlocksByQuery(pQueryHandle, pCheckInfo);
(*numOfBlocks) += pCheckInfo->numOfBlocks;
return 0;
}
static int32_t getFileCompInfo(STsdbQueryHandle* pQueryHandle, int32_t* numOfBlocks) {
// load all the comp offset value for all tables in this file
int32_t code = TSDB_CODE_SUCCESS;
*numOfBlocks = 0;
pQueryHandle->cost.headFileLoad += 1;
int64_t s = taosGetTimestampUs();
size_t numOfTables = 0;
if (pQueryHandle->loadType == BLOCK_LOAD_TABLE_SEQ_ORDER) {
code = loadBlockInfo(pQueryHandle, pQueryHandle->activeIndex, numOfBlocks);
} else if (pQueryHandle->loadType == BLOCK_LOAD_OFFSET_SEQ_ORDER) {
numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
for (int32_t i = 0; i < numOfTables; ++i) {
code = loadBlockInfo(pQueryHandle, i, numOfBlocks);
if (code != TSDB_CODE_SUCCESS) {
int64_t e = taosGetTimestampUs();
pQueryHandle->cost.headFileLoadTime += (e - s);
return code;
}
}
} else {
assert(0);
}
int64_t e = taosGetTimestampUs();
pQueryHandle->cost.headFileLoadTime += (e - s);
return code;
}
static int32_t doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo, int32_t slotIndex) {
int64_t st = taosGetTimestampUs();
STSchema *pSchema = tsdbGetTableSchema(pCheckInfo->pTableObj);
int32_t code = tdInitDataCols(pQueryHandle->pDataCols, pSchema);
if (code != TSDB_CODE_SUCCESS) {
tsdbError("%p failed to malloc buf for pDataCols, 0x%"PRIx64, pQueryHandle, pQueryHandle->qId);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _error;
}
code = tdInitDataCols(pQueryHandle->rhelper.pDCols[0], pSchema);
if (code != TSDB_CODE_SUCCESS) {
tsdbError("%p failed to malloc buf for rhelper.pDataCols[0], 0x%"PRIx64, pQueryHandle, pQueryHandle->qId);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _error;
}
code = tdInitDataCols(pQueryHandle->rhelper.pDCols[1], pSchema);
if (code != TSDB_CODE_SUCCESS) {
tsdbError("%p failed to malloc buf for rhelper.pDataCols[1], 0x%"PRIx64, pQueryHandle, pQueryHandle->qId);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _error;
}
int16_t* colIds = pQueryHandle->defaultLoadColumn->pData;
int32_t ret = tsdbLoadBlockDataCols(&(pQueryHandle->rhelper), pBlock, pCheckInfo->pCompInfo, colIds, (int)(QH_GET_NUM_OF_COLS(pQueryHandle)));
if (ret != TSDB_CODE_SUCCESS) {
int32_t c = terrno;
assert(c != TSDB_CODE_SUCCESS);
goto _error;
}
SDataBlockLoadInfo* pBlockLoadInfo = &pQueryHandle->dataBlockLoadInfo;
pBlockLoadInfo->fileGroup = pQueryHandle->pFileGroup;
pBlockLoadInfo->slot = pQueryHandle->cur.slot;
pBlockLoadInfo->tid = pCheckInfo->pTableObj->tableId.tid;
SDataCols* pCols = pQueryHandle->rhelper.pDCols[0];
assert(pCols->numOfRows != 0 && pCols->numOfRows <= pBlock->numOfRows);
pBlock->numOfRows = pCols->numOfRows;
// Convert from TKEY to TSKEY for primary timestamp column if current block has timestamp before 1970-01-01T00:00:00Z
if(pBlock->keyFirst < 0 && colIds[0] == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
int64_t* src = pCols->cols[0].pData;
for(int32_t i = 0; i < pBlock->numOfRows; ++i) {
src[i] = tdGetKey(src[i]);
}
}
int64_t elapsedTime = (taosGetTimestampUs() - st);
pQueryHandle->cost.blockLoadTime += elapsedTime;
tsdbDebug("%p load file block into buffer, index:%d, brange:%"PRId64"-%"PRId64", rows:%d, elapsed time:%"PRId64 " us, 0x%"PRIx64,
pQueryHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime, pQueryHandle->qId);
return TSDB_CODE_SUCCESS;
_error:
pBlock->numOfRows = 0;
tsdbError("%p error occurs in loading file block, index:%d, brange:%"PRId64"-%"PRId64", rows:%d, 0x%"PRIx64,
pQueryHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, pQueryHandle->qId);
return terrno;
}
static int32_t getEndPosInDataBlock(STsdbQueryHandle* pQueryHandle, SDataBlockInfo* pBlockInfo);
static int32_t doCopyRowsFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end);
static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols);
static void doCheckGeneratedBlockRange(STsdbQueryHandle* pQueryHandle);
static void copyAllRemainRowsFromFileBlock(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SDataBlockInfo* pBlockInfo, int32_t endPos);
static int32_t handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo){
SQueryFilePos* cur = &pQueryHandle->cur;
STsdbCfg* pCfg = &pQueryHandle->pTsdb->config;
SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
TSKEY key;
int32_t code = TSDB_CODE_SUCCESS;
/*bool hasData = */ initTableMemIterator(pQueryHandle, pCheckInfo);
assert(cur->pos >= 0 && cur->pos <= binfo.rows);
key = extractFirstTraverseKey(pCheckInfo, pQueryHandle->order, pCfg->update);
if (key != TSKEY_INITIAL_VAL) {
tsdbDebug("%p key in mem:%"PRId64", 0x%"PRIx64, pQueryHandle, key, pQueryHandle->qId);
} else {
tsdbDebug("%p no data in mem, 0x%"PRIx64, pQueryHandle, pQueryHandle->qId);
}
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) {
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) {
// do not load file block into buffer
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1;
TSKEY maxKey = ASCENDING_TRAVERSE(pQueryHandle->order)? (binfo.window.skey - step):(binfo.window.ekey - step);
cur->rows = tsdbReadRowsFromCache(pCheckInfo, maxKey, pQueryHandle->outputCapacity, &cur->win, pQueryHandle);
pQueryHandle->realNumOfRows = cur->rows;
// update the last key value
pCheckInfo->lastKey = cur->win.ekey + step;
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
SWAP(cur->win.skey, cur->win.ekey, TSKEY);
}
cur->mixBlock = true;
cur->blockCompleted = false;
return code;
}
// return error, add test cases
if ((code = doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
return code;
}
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else {
/*
* no data in cache, only load data from file
* during the query processing, data in cache will not be checked anymore.
*
* Here the buffer is not enough, so only part of file block can be loaded into memory buffer
*/
assert(pQueryHandle->outputCapacity >= binfo.rows);
int32_t endPos = getEndPosInDataBlock(pQueryHandle, &binfo);
if ((cur->pos == 0 && endPos == binfo.rows -1 && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
(cur->pos == (binfo.rows - 1) && endPos == 0 && (!ASCENDING_TRAVERSE(pQueryHandle->order)))) {
pQueryHandle->realNumOfRows = binfo.rows;
cur->rows = binfo.rows;
cur->win = binfo.window;
cur->mixBlock = false;
cur->blockCompleted = true;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
cur->lastKey = binfo.window.ekey + 1;
cur->pos = binfo.rows;
} else {
cur->lastKey = binfo.window.skey - 1;
cur->pos = -1;
}
} else { // partially copy to dest buffer
copyAllRemainRowsFromFileBlock(pQueryHandle, pCheckInfo, &binfo, endPos);
cur->mixBlock = true;
}
assert(cur->blockCompleted);
if (cur->rows == binfo.rows) {
tsdbDebug("%p whole file block qualified, brange:%"PRId64"-%"PRId64", rows:%d, lastKey:%"PRId64", tid:%d, %"PRIx64,
pQueryHandle, cur->win.skey, cur->win.ekey, cur->rows, cur->lastKey, binfo.tid, pQueryHandle->qId);
} else {
tsdbDebug("%p create data block from remain file block, brange:%"PRId64"-%"PRId64", rows:%d, total:%d, lastKey:%"PRId64", tid:%d, %"PRIx64,
pQueryHandle, cur->win.skey, cur->win.ekey, cur->rows, binfo.rows, cur->lastKey, binfo.tid, pQueryHandle->qId);
}
}
return code;
}
static int32_t loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo, bool* exists) {
SQueryFilePos* cur = &pQueryHandle->cur;
int32_t code = TSDB_CODE_SUCCESS;
bool asc = ASCENDING_TRAVERSE(pQueryHandle->order);
if (asc) {
// query ended in/started from current block
if (pQueryHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) {
if ((code = doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
*exists = false;
return code;
}
SDataCols* pTSCol = pQueryHandle->rhelper.pDCols[0];
assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows);
if (pCheckInfo->lastKey > pBlock->keyFirst) {
cur->pos =
binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order);
} else {
cur->pos = 0;
}
assert(pCheckInfo->lastKey <= pBlock->keyLast);
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else { // the whole block is loaded in to buffer
cur->pos = asc? 0:(pBlock->numOfRows - 1);
code = handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo);
}
} else { //desc order, query ended in current block
if (pQueryHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) {
if ((code = doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) {
*exists = false;
return code;
}
SDataCols* pTsCol = pQueryHandle->rhelper.pDCols[0];
if (pCheckInfo->lastKey < pBlock->keyLast) {
cur->pos = binarySearchForKey(pTsCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order);
} else {
cur->pos = pBlock->numOfRows - 1;
}
assert(pCheckInfo->lastKey >= pBlock->keyFirst);
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else {
cur->pos = asc? 0:(pBlock->numOfRows-1);
code = handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo);
}
}
*exists = pQueryHandle->realNumOfRows > 0;
return code;
}
// search last keyList[ret] < key order asc and keyList[ret] > key order desc
static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int order) {
// start end posistion
int s, e;
s = pos;
// check
assert(pos >=0 && pos < num);
assert(num > 0);
if (order == TSDB_ORDER_ASC) {
// find the first position which is smaller than the key
e = num - 1;
if (key < keyList[pos])
return -1;
while (1) {
// check can return
if (key >= keyList[e])
return e;
if (key <= keyList[s])
return s;
if (e - s <= 1)
return s;
// change start or end position
int mid = s + (e - s + 1)/2;
if (keyList[mid] > key)
e = mid;
else if(keyList[mid] < key)
s = mid;
else
return mid;
}
} else { // DESC
// find the first position which is bigger than the key
e = 0;
if (key > keyList[pos])
return -1;
while (1) {
// check can return
if (key <= keyList[e])
return e;
if (key >= keyList[s])
return s;
if (s - e <= 1)
return s;
// change start or end position
int mid = s - (s - e + 1)/2;
if (keyList[mid] < key)
e = mid;
else if(keyList[mid] > key)
s = mid;
else
return mid;
}
}
}
static int32_t doCopyRowsFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) {
char* pData = NULL;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1;
SDataCols* pCols = pQueryHandle->rhelper.pDCols[0];
TSKEY* tsArray = pCols->cols[0].pData;
int32_t num = end - start + 1;
assert(num >= 0);
if (num == 0) {
return numOfRows;
}
int32_t requiredNumOfCols = (int32_t)taosArrayGetSize(pQueryHandle->pColumns);
//data in buffer has greater timestamp, copy data in file block
int32_t i = 0, j = 0;
while(i < requiredNumOfCols && j < pCols->numOfCols) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
SDataCol* src = &pCols->cols[j];
if (src->colId < pColInfo->info.colId) {
j++;
continue;
}
int32_t bytes = pColInfo->info.bytes;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
} else {
pData = (char*)pColInfo->pData + (capacity - numOfRows - num) * pColInfo->info.bytes;
}
if (!isAllRowsNull(src) && pColInfo->info.colId == src->colId) {
if (pColInfo->info.type != TSDB_DATA_TYPE_BINARY && pColInfo->info.type != TSDB_DATA_TYPE_NCHAR) {
memmove(pData, (char*)src->pData + bytes * start, bytes * num);
} else { // handle the var-string
char* dst = pData;
// todo refactor, only copy one-by-one
for (int32_t k = start; k < num + start; ++k) {
const char* p = tdGetColDataOfRow(src, k);
memcpy(dst, p, varDataTLen(p));
dst += bytes;
}
}
j++;
i++;
} else { // pColInfo->info.colId < src->colId, it is a NULL data
if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
char* dst = pData;
for(int32_t k = start; k < num + start; ++k) {
setVardataNull(dst, pColInfo->info.type);
dst += bytes;
}
} else {
setNullN(pData, pColInfo->info.type, pColInfo->info.bytes, num);
}
i++;
}
}
while (i < requiredNumOfCols) { // the remain columns are all null data
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
} else {
pData = (char*)pColInfo->pData + (capacity - numOfRows - num) * pColInfo->info.bytes;
}
if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
char* dst = pData;
for(int32_t k = start; k < num + start; ++k) {
setVardataNull(dst, pColInfo->info.type);
dst += pColInfo->info.bytes;
}
} else {
setNullN(pData, pColInfo->info.type, pColInfo->info.bytes, num);
}
i++;
}
pQueryHandle->cur.win.ekey = tsArray[end];
pQueryHandle->cur.lastKey = tsArray[end] + step;
return numOfRows + num;
}
// Note: row1 always has high priority
static void mergeTwoRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows,
SMemRow row1, SMemRow row2, int32_t numOfCols, STable* pTable,
STSchema* pSchema1, STSchema* pSchema2, bool forceSetNull) {
char* pData = NULL;
STSchema* pSchema;
SMemRow row;
int16_t colId;
int16_t offset;
bool isRow1DataRow = isDataRow(row1);
bool isRow2DataRow = false;
bool isChosenRowDataRow;
int32_t chosen_itr;
void *value;
// the schema version info is embeded in SDataRow
int32_t numOfColsOfRow1 = 0;
if (pSchema1 == NULL) {
pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1), (int8_t)memRowType(row1));
}
if(isRow1DataRow) {
numOfColsOfRow1 = schemaNCols(pSchema1);
} else {
numOfColsOfRow1 = kvRowNCols(memRowKvBody(row1));
}
int32_t numOfColsOfRow2 = 0;
if(row2) {
isRow2DataRow = isDataRow(row2);
if (pSchema2 == NULL) {
pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2), (int8_t)memRowType(row2));
}
if(isRow2DataRow) {
numOfColsOfRow2 = schemaNCols(pSchema2);
} else {
numOfColsOfRow2 = kvRowNCols(memRowKvBody(row2));
}
}
int32_t i = 0, j = 0, k = 0;
while(i < numOfCols && (j < numOfColsOfRow1 || k < numOfColsOfRow2)) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
} else {
pData = (char*)pColInfo->pData + (capacity - numOfRows - 1) * pColInfo->info.bytes;
}
int32_t colIdOfRow1;
if(j >= numOfColsOfRow1) {
colIdOfRow1 = INT32_MAX;
} else if(isRow1DataRow) {
colIdOfRow1 = pSchema1->columns[j].colId;
} else {
void *rowBody = memRowKvBody(row1);
SColIdx *pColIdx = kvRowColIdxAt(rowBody, j);
colIdOfRow1 = pColIdx->colId;
}
int32_t colIdOfRow2;
if(k >= numOfColsOfRow2) {
colIdOfRow2 = INT32_MAX;
} else if(isRow2DataRow) {
colIdOfRow2 = pSchema2->columns[k].colId;
} else {
void *rowBody = memRowKvBody(row2);
SColIdx *pColIdx = kvRowColIdxAt(rowBody, k);
colIdOfRow2 = pColIdx->colId;
}
if(colIdOfRow1 == colIdOfRow2) {
if(colIdOfRow1 < pColInfo->info.colId) {
j++;
k++;
continue;
}
row = row1;
pSchema = pSchema1;
isChosenRowDataRow = isRow1DataRow;
chosen_itr = j;
} else if(colIdOfRow1 < colIdOfRow2) {
if(colIdOfRow1 < pColInfo->info.colId) {
j++;
continue;
}
row = row1;
pSchema = pSchema1;
isChosenRowDataRow = isRow1DataRow;
chosen_itr = j;
} else {
if(colIdOfRow2 < pColInfo->info.colId) {
k++;
continue;
}
row = row2;
pSchema = pSchema2;
chosen_itr = k;
isChosenRowDataRow = isRow2DataRow;
}
if(isChosenRowDataRow) {
colId = pSchema->columns[chosen_itr].colId;
offset = pSchema->columns[chosen_itr].offset;
void *rowBody = memRowDataBody(row);
value = tdGetRowDataOfCol(rowBody, (int8_t)pColInfo->info.type, TD_DATA_ROW_HEAD_SIZE + offset);
} else {
void *rowBody = memRowKvBody(row);
SColIdx *pColIdx = kvRowColIdxAt(rowBody, chosen_itr);
colId = pColIdx->colId;
offset = pColIdx->offset;
value = tdGetKvRowDataOfCol(rowBody, offset);
}
if (colId == pColInfo->info.colId) {
if(forceSetNull || (!isNull(value, (int8_t)pColInfo->info.type))) {
switch (pColInfo->info.type) {
case TSDB_DATA_TYPE_BINARY:
case TSDB_DATA_TYPE_NCHAR:
memcpy(pData, value, varDataTLen(value));
break;
case TSDB_DATA_TYPE_NULL:
case TSDB_DATA_TYPE_BOOL:
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_UTINYINT:
*(uint8_t *)pData = *(uint8_t *)value;
break;
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_USMALLINT:
*(uint16_t *)pData = *(uint16_t *)value;
break;
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_UINT:
*(uint32_t *)pData = *(uint32_t *)value;
break;
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_UBIGINT:
*(uint64_t *)pData = *(uint64_t *)value;
break;
case TSDB_DATA_TYPE_FLOAT:
SET_FLOAT_PTR(pData, value);
break;
case TSDB_DATA_TYPE_DOUBLE:
SET_DOUBLE_PTR(pData, value);
break;
case TSDB_DATA_TYPE_TIMESTAMP:
if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
*(TSKEY *)pData = tdGetKey(*(TKEY *)value);
} else {
*(TSKEY *)pData = *(TSKEY *)value;
}
break;
default:
memcpy(pData, value, pColInfo->info.bytes);
}
}
i++;
if(row == row1) {
j++;
} else {
k++;
}
} else {
if(forceSetNull) {
if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
setVardataNull(pData, pColInfo->info.type);
} else {
setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
}
}
i++;
}
}
if(forceSetNull) {
while (i < numOfCols) { // the remain columns are all null data
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
} else {
pData = (char*)pColInfo->pData + (capacity - numOfRows - 1) * pColInfo->info.bytes;
}
if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
setVardataNull(pData, pColInfo->info.type);
} else {
setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
}
i++;
}
}
}
static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols) {
if (numOfRows == 0 || ASCENDING_TRAVERSE(pQueryHandle->order)) {
return;
}
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (numOfRows < pQueryHandle->outputCapacity) {
int32_t emptySize = pQueryHandle->outputCapacity - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
}
static void getQualifiedRowsPos(STsdbQueryHandle* pQueryHandle, int32_t startPos, int32_t endPos, int32_t numOfExisted,
int32_t* start, int32_t* end) {
*start = -1;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
int32_t remain = endPos - startPos + 1;
if (remain + numOfExisted > pQueryHandle->outputCapacity) {
*end = (pQueryHandle->outputCapacity - numOfExisted) + startPos - 1;
} else {
*end = endPos;
}
*start = startPos;
} else {
int32_t remain = (startPos - endPos) + 1;
if (remain + numOfExisted > pQueryHandle->outputCapacity) {
*end = startPos + 1 - (pQueryHandle->outputCapacity - numOfExisted);
} else {
*end = endPos;
}
*start = *end;
*end = startPos;
}
}
static void updateInfoAfterMerge(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, int32_t numOfRows, int32_t endPos) {
SQueryFilePos* cur = &pQueryHandle->cur;
pCheckInfo->lastKey = cur->lastKey;
pQueryHandle->realNumOfRows = numOfRows;
cur->rows = numOfRows;
cur->pos = endPos;
}
static void doCheckGeneratedBlockRange(STsdbQueryHandle* pQueryHandle) {
SQueryFilePos* cur = &pQueryHandle->cur;
if (cur->rows > 0) {
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
assert(cur->win.skey >= pQueryHandle->window.skey && cur->win.ekey <= pQueryHandle->window.ekey);
} else {
assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey);
}
SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, 0);
assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]);
} else {
cur->win = pQueryHandle->window;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1;
cur->lastKey = pQueryHandle->window.ekey + step;
}
}
static void copyAllRemainRowsFromFileBlock(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SDataBlockInfo* pBlockInfo, int32_t endPos) {
SQueryFilePos* cur = &pQueryHandle->cur;
SDataCols* pCols = pQueryHandle->rhelper.pDCols[0];
TSKEY* tsArray = pCols->cols[0].pData;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1;
int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pQueryHandle));
int32_t pos = cur->pos;
int32_t start = cur->pos;
int32_t end = endPos;
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
SWAP(start, end, int32_t);
}
assert(pQueryHandle->outputCapacity >= (end - start + 1));
int32_t numOfRows = doCopyRowsFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, 0, start, end);
// the time window should always be ascending order: skey <= ekey
cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]};
cur->mixBlock = (numOfRows != pBlockInfo->rows);
cur->lastKey = tsArray[endPos] + step;
cur->blockCompleted = true;
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
moveDataToFront(pQueryHandle, numOfRows, numOfCols);
// The value of pos may be -1 or pBlockInfo->rows, and it is invalid in both cases.
pos = endPos + step;
updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos);
doCheckGeneratedBlockRange(pQueryHandle);
tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, mixblock:%d, brange:%"PRIu64"-%"PRIu64" rows:%d, 0x%"PRIx64,
pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->mixBlock, cur->win.skey,
cur->win.ekey, cur->rows, pQueryHandle->qId);
}
int32_t getEndPosInDataBlock(STsdbQueryHandle* pQueryHandle, SDataBlockInfo* pBlockInfo) {
// NOTE: reverse the order to find the end position in data block
int32_t endPos = -1;
SQueryFilePos* cur = &pQueryHandle->cur;
SDataCols* pCols = pQueryHandle->rhelper.pDCols[0];
if (ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey >= pBlockInfo->window.ekey) {
endPos = pBlockInfo->rows - 1;
cur->mixBlock = (cur->pos != 0);
} else if (!ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey <= pBlockInfo->window.skey) {
endPos = 0;
cur->mixBlock = (cur->pos != pBlockInfo->rows - 1);
} else {
assert(pCols->numOfRows > 0);
int pos = ASCENDING_TRAVERSE(pQueryHandle->order)? 0 : pBlockInfo->rows - 1;
endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pos, pQueryHandle->window.ekey, pQueryHandle->order);
assert(endPos != -1);
cur->mixBlock = true;
}
return endPos;
}
// only return the qualified data to client in terms of query time window, data rows in the same block but do not
// be included in the query time window will be discarded
static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SBlock* pBlock) {
SQueryFilePos* cur = &pQueryHandle->cur;
SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
STsdbCfg* pCfg = &pQueryHandle->pTsdb->config;
initTableMemIterator(pQueryHandle, pCheckInfo);
SDataCols* pCols = pQueryHandle->rhelper.pDCols[0];
assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
cur->pos >= 0 && cur->pos < pBlock->numOfRows);
// key read from file
TSKEY* keyFile = pCols->cols[0].pData;
assert(pCols->numOfRows == pBlock->numOfRows && keyFile[0] == pBlock->keyFirst && keyFile[pBlock->numOfRows-1] == pBlock->keyLast);
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1;
int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pQueryHandle));
STable* pTable = pCheckInfo->pTableObj;
int32_t endPos = getEndPosInDataBlock(pQueryHandle, &blockInfo);
tsdbDebug("%p uid:%" PRIu64",tid:%d start merge data block, file block range:%"PRIu64"-%"PRIu64" rows:%d, start:%d,"
"end:%d, 0x%"PRIx64,
pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, blockInfo.window.skey, blockInfo.window.ekey,
blockInfo.rows, cur->pos, endPos, pQueryHandle->qId);
// compared with the data from in-memory buffer, to generate the correct timestamp array list
int32_t numOfRows = 0;
int16_t rv1 = -1;
int16_t rv2 = -1;
STSchema* pSchema1 = NULL;
STSchema* pSchema2 = NULL;
// position in file ->fpos
int32_t pos = cur->pos;
cur->win = TSWINDOW_INITIALIZER;
// no data in buffer, load data from file directly
if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) {
copyAllRemainRowsFromFileBlock(pQueryHandle, pCheckInfo, &blockInfo, endPos);
return;
} else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) {
SSkipListNode* node = NULL;
do {
SMemRow row2 = NULL;
SMemRow row1 = getSMemRowInTableMem(pCheckInfo, pQueryHandle->order, pCfg->update, &row2);
if (row1 == NULL) {
break;
}
TSKEY keyMem = memRowKey(row1);
if ((keyMem > pQueryHandle->window.ekey && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
(keyMem < pQueryHandle->window.ekey && !ASCENDING_TRAVERSE(pQueryHandle->order))) {
break;
}
// break if pos not in this block endPos range. note old code when pos is -1 can crash.
if(ASCENDING_TRAVERSE(pQueryHandle->order)) { //ASC
if(pos > endPos || keyFile[pos] > pQueryHandle->window.ekey)
break;
} else { //DESC
if(pos < endPos || keyFile[pos] < pQueryHandle->window.ekey)
break;
}
if ((keyMem < keyFile[pos] && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
(keyMem > keyFile[pos] && !ASCENDING_TRAVERSE(pQueryHandle->order))) {
if (rv1 != memRowVersion(row1)) {
pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1), (int8_t)memRowType(row1));
rv1 = memRowVersion(row1);
}
if(row2 && rv2 != memRowVersion(row2)) {
pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2), (int8_t)memRowType(row2));
rv2 = memRowVersion(row2);
}
mergeTwoRowFromMem(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, row1, row2, numOfCols, pTable, pSchema1, pSchema2, true);
numOfRows += 1;
// record start key with memory key if not
if (cur->win.skey == TSKEY_INITIAL_VAL) {
cur->win.skey = keyMem;
}
cur->win.ekey = keyMem;
cur->lastKey = keyMem + step;
cur->mixBlock = true;
moveToNextRowInMem(pCheckInfo);
// same select mem key if update is true
} else if (keyMem == keyFile[pos]) {
if (pCfg->update) {
if(pCfg->update == TD_ROW_PARTIAL_UPDATE) {
doCopyRowsFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, pos, pos);
}
if (rv1 != memRowVersion(row1)) {
pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1), (int8_t)memRowType(row1));
rv1 = memRowVersion(row1);
}
if(row2 && rv2 != memRowVersion(row2)) {
pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2), (int8_t)memRowType(row2));
rv2 = memRowVersion(row2);
}
bool forceSetNull = pCfg->update != TD_ROW_PARTIAL_UPDATE;
mergeTwoRowFromMem(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, row1, row2, numOfCols, pTable, pSchema1, pSchema2, forceSetNull);
numOfRows += 1;
if (cur->win.skey == TSKEY_INITIAL_VAL) {
cur->win.skey = keyMem;
}
cur->win.ekey = keyMem;
cur->lastKey = keyMem + step;
cur->mixBlock = true;
//mem move next
moveToNextRowInMem(pCheckInfo);
//file move next, discard file row
pos += step;
} else {
// not update, only mem move to next, discard mem row
moveToNextRowInMem(pCheckInfo);
}
// put file row
} else if ((keyMem > keyFile[pos] && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
(keyMem < keyFile[pos] && !ASCENDING_TRAVERSE(pQueryHandle->order))) {
if (cur->win.skey == TSKEY_INITIAL_VAL) {
cur->win.skey = keyFile[pos];
}
int32_t end = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pos, keyMem, pQueryHandle->order);
assert(end != -1);
if (keyFile[end] == keyMem) { // the value of key in cache equals to the end timestamp value, ignore it
if (pCfg->update == TD_ROW_DISCARD_UPDATE) {
moveToNextRowInMem(pCheckInfo);
} else {
// can update, don't copy then deal on next loop with keyMem == keyFile[pos]
end -= step;
}
}
int32_t qstart = 0, qend = 0;
getQualifiedRowsPos(pQueryHandle, pos, end, numOfRows, &qstart, &qend);
if(qend >= qstart) {
// copy qend - qstart + 1 rows from file
numOfRows = doCopyRowsFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, qstart, qend);
int32_t num = qend - qstart + 1;
pos += num * step;
} else {
// nothing copy from file
pos += step;
}
cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? keyFile[qend] : keyFile[qstart];
cur->lastKey = cur->win.ekey + step;
}
} while (numOfRows < pQueryHandle->outputCapacity);
if (numOfRows < pQueryHandle->outputCapacity) {
/**
* if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT
* copy them all to result buffer, since it may be overlapped with file data block.
*/
if (node == NULL ||
((memRowKey((SMemRow)SL_GET_NODE_DATA(node)) > pQueryHandle->window.ekey) &&
ASCENDING_TRAVERSE(pQueryHandle->order)) ||
((memRowKey((SMemRow)SL_GET_NODE_DATA(node)) < pQueryHandle->window.ekey) &&
!ASCENDING_TRAVERSE(pQueryHandle->order))) {
// no data in cache or data in cache is greater than the ekey of time window, load data from file block
if (cur->win.skey == TSKEY_INITIAL_VAL) {
cur->win.skey = keyFile[pos];
}
int32_t start = -1, end = -1;
getQualifiedRowsPos(pQueryHandle, pos, endPos, numOfRows, &start, &end);
numOfRows = doCopyRowsFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end);
pos += (end - start + 1) * step;
cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? keyFile[end] : keyFile[start];
cur->lastKey = cur->win.ekey + step;
cur->mixBlock = true;
}
}
}
cur->blockCompleted =
(((pos > endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
((pos < endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order)));
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
SWAP(cur->win.skey, cur->win.ekey, TSKEY);
}
moveDataToFront(pQueryHandle, numOfRows, numOfCols);
updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos);
doCheckGeneratedBlockRange(pQueryHandle);
tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, mixblock:%d, brange:%"PRIu64"-%"PRIu64" rows:%d, 0x%"PRIx64,
pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->mixBlock, cur->win.skey,
cur->win.ekey, cur->rows, pQueryHandle->qId);
}
int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order) {
int firstPos, lastPos, midPos = -1;
int numOfRows;
TSKEY* keyList;
if (num <= 0) return -1;
keyList = (TSKEY*)pValue;
firstPos = 0;
lastPos = num - 1;
if (order == TSDB_ORDER_DESC) {
// find the first position which is smaller than the key
while (1) {
if (key >= keyList[lastPos]) return lastPos;
if (key == keyList[firstPos]) return firstPos;
if (key < keyList[firstPos]) return firstPos - 1;
numOfRows = lastPos - firstPos + 1;
midPos = (numOfRows >> 1) + firstPos;
if (key < keyList[midPos]) {
lastPos = midPos - 1;
} else if (key > keyList[midPos]) {
firstPos = midPos + 1;
} else {
break;
}
}
} else {
// find the first position which is bigger than the key
while (1) {
if (key <= keyList[firstPos]) return firstPos;
if (key == keyList[lastPos]) return lastPos;
if (key > keyList[lastPos]) {
lastPos = lastPos + 1;
if (lastPos >= num)
return -1;
else
return lastPos;
}
numOfRows = lastPos - firstPos + 1;
midPos = (numOfRows >> 1) + firstPos;
if (key < keyList[midPos]) {
lastPos = midPos - 1;
} else if (key > keyList[midPos]) {
firstPos = midPos + 1;
} else {
break;
}
}
}
return midPos;
}
static void cleanBlockOrderSupporter(SBlockOrderSupporter* pSupporter, int32_t numOfTables) {
tfree(pSupporter->numOfBlocksPerTable);
tfree(pSupporter->blockIndexArray);
for (int32_t i = 0; i < numOfTables; ++i) {
STableBlockInfo* pBlockInfo = pSupporter->pDataBlockInfo[i];
tfree(pBlockInfo);
}
tfree(pSupporter->pDataBlockInfo);
}
static int32_t dataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) {
int32_t leftTableIndex = *(int32_t*)pLeft;
int32_t rightTableIndex = *(int32_t*)pRight;
SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param;
int32_t leftTableBlockIndex = pSupporter->blockIndexArray[leftTableIndex];
int32_t rightTableBlockIndex = pSupporter->blockIndexArray[rightTableIndex];
if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftTableIndex]) {
/* left block is empty */
return 1;
} else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightTableIndex]) {
/* right block is empty */
return -1;
}
STableBlockInfo* pLeftBlockInfoEx = &pSupporter->pDataBlockInfo[leftTableIndex][leftTableBlockIndex];
STableBlockInfo* pRightBlockInfoEx = &pSupporter->pDataBlockInfo[rightTableIndex][rightTableBlockIndex];
// assert(pLeftBlockInfoEx->compBlock->offset != pRightBlockInfoEx->compBlock->offset);
#if 0 // TODO: temporarily comment off requested by Dr. Liao
if (pLeftBlockInfoEx->compBlock->offset == pRightBlockInfoEx->compBlock->offset &&
pLeftBlockInfoEx->compBlock->last == pRightBlockInfoEx->compBlock->last) {
tsdbError("error in header file, two block with same offset:%" PRId64, (int64_t)pLeftBlockInfoEx->compBlock->offset);
}
#endif
return pLeftBlockInfoEx->compBlock->offset > pRightBlockInfoEx->compBlock->offset ? 1 : -1;
}
static int32_t createDataBlocksInfo(STsdbQueryHandle* pQueryHandle, int32_t numOfBlocks, int32_t* numOfAllocBlocks) {
size_t size = sizeof(STableBlockInfo) * numOfBlocks;
if (pQueryHandle->allocSize < size) {
pQueryHandle->allocSize = (int32_t)size;
char* tmp = realloc(pQueryHandle->pDataBlockInfo, pQueryHandle->allocSize);
if (tmp == NULL) {
return TSDB_CODE_TDB_OUT_OF_MEMORY;
}
pQueryHandle->pDataBlockInfo = (STableBlockInfo*) tmp;
}
memset(pQueryHandle->pDataBlockInfo, 0, size);
*numOfAllocBlocks = numOfBlocks;
// access data blocks according to the offset of each block in asc/desc order.
int32_t numOfTables = (int32_t)taosArrayGetSize(pQueryHandle->pTableCheckInfo);
SBlockOrderSupporter sup = {0};
sup.numOfTables = numOfTables;
sup.numOfBlocksPerTable = calloc(1, sizeof(int32_t) * numOfTables);
sup.blockIndexArray = calloc(1, sizeof(int32_t) * numOfTables);
sup.pDataBlockInfo = calloc(1, POINTER_BYTES * numOfTables);
if (sup.numOfBlocksPerTable == NULL || sup.blockIndexArray == NULL || sup.pDataBlockInfo == NULL) {
cleanBlockOrderSupporter(&sup, 0);
return TSDB_CODE_TDB_OUT_OF_MEMORY;
}
int32_t cnt = 0;
int32_t numOfQualTables = 0;
for (int32_t j = 0; j < numOfTables; ++j) {
STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pQueryHandle->pTableCheckInfo, j);
if (pTableCheck->numOfBlocks <= 0) {
continue;
}
SBlock* pBlock = pTableCheck->pCompInfo->blocks;
sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks;
char* buf = malloc(sizeof(STableBlockInfo) * pTableCheck->numOfBlocks);
if (buf == NULL) {
cleanBlockOrderSupporter(&sup, numOfQualTables);
return TSDB_CODE_TDB_OUT_OF_MEMORY;
}
sup.pDataBlockInfo[numOfQualTables] = (STableBlockInfo*)buf;
for (int32_t k = 0; k < pTableCheck->numOfBlocks; ++k) {
STableBlockInfo* pBlockInfo = &sup.pDataBlockInfo[numOfQualTables][k];
pBlockInfo->compBlock = &pBlock[k];
pBlockInfo->pTableCheckInfo = pTableCheck;
cnt++;
}
numOfQualTables++;
}
assert(numOfBlocks == cnt);
// since there is only one table qualified, blocks are not sorted
if (numOfQualTables == 1) {
memcpy(pQueryHandle->pDataBlockInfo, sup.pDataBlockInfo[0], sizeof(STableBlockInfo) * numOfBlocks);
cleanBlockOrderSupporter(&sup, numOfQualTables);
tsdbDebug("%p create data blocks info struct completed for 1 table, %d blocks not sorted 0x%"PRIx64, pQueryHandle, cnt,
pQueryHandle->qId);
return TSDB_CODE_SUCCESS;
}
tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables 0x%"PRIx64, pQueryHandle, cnt,
numOfQualTables, pQueryHandle->qId);
assert(cnt <= numOfBlocks && numOfQualTables <= numOfTables); // the pTableQueryInfo[j]->numOfBlocks may be 0
sup.numOfTables = numOfQualTables;
SLoserTreeInfo* pTree = NULL;
uint8_t ret = tLoserTreeCreate(&pTree, sup.numOfTables, &sup, dataBlockOrderCompar);
if (ret != TSDB_CODE_SUCCESS) {
cleanBlockOrderSupporter(&sup, numOfTables);
return TSDB_CODE_TDB_OUT_OF_MEMORY;
}
int32_t numOfTotal = 0;
while (numOfTotal < cnt) {
int32_t pos = pTree->pNode[0].index;
int32_t index = sup.blockIndexArray[pos]++;
STableBlockInfo* pBlocksInfo = sup.pDataBlockInfo[pos];
pQueryHandle->pDataBlockInfo[numOfTotal++] = pBlocksInfo[index];
// set data block index overflow, in order to disable the offset comparator
if (sup.blockIndexArray[pos] >= sup.numOfBlocksPerTable[pos]) {
sup.blockIndexArray[pos] = sup.numOfBlocksPerTable[pos] + 1;
}
tLoserTreeAdjust(pTree, pos + sup.numOfTables);
}
/*
* available when no import exists
* for(int32_t i = 0; i < cnt - 1; ++i) {
* assert((*pDataBlockInfo)[i].compBlock->offset < (*pDataBlockInfo)[i+1].compBlock->offset);
* }
*/
tsdbDebug("%p %d data blocks sort completed, 0x%"PRIx64, pQueryHandle, cnt, pQueryHandle->qId);
cleanBlockOrderSupporter(&sup, numOfTables);
free(pTree);
return TSDB_CODE_SUCCESS;
}
static int32_t getFirstFileDataBlock(STsdbQueryHandle* pQueryHandle, bool* exists);
static int32_t getDataBlockRv(STsdbQueryHandle* pQueryHandle, STableBlockInfo* pNext, bool *exists) {
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1;
SQueryFilePos* cur = &pQueryHandle->cur;
while(1) {
int32_t code = loadFileDataBlock(pQueryHandle, pNext->compBlock, pNext->pTableCheckInfo, exists);
if (code != TSDB_CODE_SUCCESS || *exists) {
return code;
}
if ((cur->slot == pQueryHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
(cur->slot == 0 && !ASCENDING_TRAVERSE(pQueryHandle->order))) {
// all data blocks in current file has been checked already, try next file if exists
return getFirstFileDataBlock(pQueryHandle, exists);
} else { // next block of the same file
cur->slot += step;
cur->mixBlock = false;
cur->blockCompleted = false;
pNext = &pQueryHandle->pDataBlockInfo[cur->slot];
}
}
}
static int32_t getFirstFileDataBlock(STsdbQueryHandle* pQueryHandle, bool* exists) {
pQueryHandle->numOfBlocks = 0;
SQueryFilePos* cur = &pQueryHandle->cur;
int32_t code = TSDB_CODE_SUCCESS;
int32_t numOfBlocks = 0;
int32_t numOfTables = (int32_t)taosArrayGetSize(pQueryHandle->pTableCheckInfo);
STsdbCfg* pCfg = &pQueryHandle->pTsdb->config;
STimeWindow win = TSWINDOW_INITIALIZER;
while (true) {
tsdbRLockFS(REPO_FS(pQueryHandle->pTsdb));
if ((pQueryHandle->pFileGroup = tsdbFSIterNext(&pQueryHandle->fileIter)) == NULL) {
tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb));
break;
}
tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, pQueryHandle->pFileGroup->fid, &win.skey, &win.ekey);
// current file are not overlapped with query time window, ignore remain files
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) {
tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb));
tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, 0x%"PRIx64, pQueryHandle,
pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qId);
pQueryHandle->pFileGroup = NULL;
assert(pQueryHandle->numOfBlocks == 0);
break;
}
if (tsdbSetAndOpenReadFSet(&pQueryHandle->rhelper, pQueryHandle->pFileGroup) < 0) {
tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb));
code = terrno;
break;
}
tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb));
if (tsdbLoadBlockIdx(&pQueryHandle->rhelper) < 0) {
code = terrno;
break;
}
if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) {
break;
}
tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, 0x%"PRIx64, pQueryHandle, numOfBlocks, numOfTables,
pQueryHandle->pFileGroup->fid, pQueryHandle->qId);
assert(numOfBlocks >= 0);
if (numOfBlocks == 0) {
continue;
}
// todo return error code to query engine
if ((code = createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks)) != TSDB_CODE_SUCCESS) {
break;
}
assert(numOfBlocks >= pQueryHandle->numOfBlocks);
if (pQueryHandle->numOfBlocks > 0) {
break;
}
}
// no data in file anymore
if (pQueryHandle->numOfBlocks <= 0 || code != TSDB_CODE_SUCCESS) {
if (code == TSDB_CODE_SUCCESS) {
assert(pQueryHandle->pFileGroup == NULL);
}
cur->fid = INT32_MIN; // denote that there are no data in file anymore
*exists = false;
return code;
}
assert(pQueryHandle->pFileGroup != NULL && pQueryHandle->numOfBlocks > 0);
cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1;
cur->fid = pQueryHandle->pFileGroup->fid;
STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot];
return getDataBlockRv(pQueryHandle, pBlockInfo, exists);
}
static bool isEndFileDataBlock(SQueryFilePos* cur, int32_t numOfBlocks, bool ascTrav) {
assert(cur != NULL && numOfBlocks > 0);
return (cur->slot == numOfBlocks - 1 && ascTrav) || (cur->slot == 0 && !ascTrav);
}
static void moveToNextDataBlockInCurrentFile(STsdbQueryHandle* pQueryHandle) {
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1;
SQueryFilePos* cur = &pQueryHandle->cur;
assert(cur->slot < pQueryHandle->numOfBlocks && cur->slot >= 0);
cur->slot += step;
cur->mixBlock = false;
cur->blockCompleted = false;
}
int32_t tsdbGetFileBlocksDistInfo(TsdbQueryHandleT* queryHandle, STableBlockDist* pTableBlockInfo) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) queryHandle;
pTableBlockInfo->totalSize = 0;
pTableBlockInfo->totalRows = 0;
STsdbFS* pFileHandle = REPO_FS(pQueryHandle->pTsdb);
// find the start data block in file
pQueryHandle->locateStart = true;
STsdbCfg* pCfg = &pQueryHandle->pTsdb->config;
int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision);
tsdbRLockFS(pFileHandle);
tsdbFSIterInit(&pQueryHandle->fileIter, pFileHandle, pQueryHandle->order);
tsdbFSIterSeek(&pQueryHandle->fileIter, fid);
tsdbUnLockFS(pFileHandle);
pTableBlockInfo->numOfFiles += 1;
int32_t code = TSDB_CODE_SUCCESS;
int32_t numOfBlocks = 0;
int32_t numOfTables = (int32_t)taosArrayGetSize(pQueryHandle->pTableCheckInfo);
int defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock);
STimeWindow win = TSWINDOW_INITIALIZER;
while (true) {
numOfBlocks = 0;
tsdbRLockFS(REPO_FS(pQueryHandle->pTsdb));
if ((pQueryHandle->pFileGroup = tsdbFSIterNext(&pQueryHandle->fileIter)) == NULL) {
tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb));
break;
}
tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, pQueryHandle->pFileGroup->fid, &win.skey, &win.ekey);
// current file are not overlapped with query time window, ignore remain files
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) {
tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb));
tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, 0x%"PRIx64, pQueryHandle,
pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qId);
pQueryHandle->pFileGroup = NULL;
break;
}
pTableBlockInfo->numOfFiles += 1;
if (tsdbSetAndOpenReadFSet(&pQueryHandle->rhelper, pQueryHandle->pFileGroup) < 0) {
tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb));
code = terrno;
break;
}
tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb));
if (tsdbLoadBlockIdx(&pQueryHandle->rhelper) < 0) {
code = terrno;
break;
}
if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) {
break;
}
tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, 0x%"PRIx64, pQueryHandle, numOfBlocks, numOfTables,
pQueryHandle->pFileGroup->fid, pQueryHandle->qId);
if (numOfBlocks == 0) {
continue;
}
for (int32_t i = 0; i < numOfTables; ++i) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
SBlock* pBlock = pCheckInfo->pCompInfo->blocks;
for (int32_t j = 0; j < pCheckInfo->numOfBlocks; ++j) {
pTableBlockInfo->totalSize += pBlock[j].len;
int32_t numOfRows = pBlock[j].numOfRows;
pTableBlockInfo->totalRows += numOfRows;
if (numOfRows > pTableBlockInfo->maxRows) pTableBlockInfo->maxRows = numOfRows;
if (numOfRows < pTableBlockInfo->minRows) pTableBlockInfo->minRows = numOfRows;
if (numOfRows < defaultRows) pTableBlockInfo->numOfSmallBlocks+=1;
int32_t stepIndex = (numOfRows-1)/TSDB_BLOCK_DIST_STEP_ROWS;
SFileBlockInfo *blockInfo = (SFileBlockInfo*)taosArrayGet(pTableBlockInfo->dataBlockInfos, stepIndex);
blockInfo->numBlocksOfStep++;
}
}
}
return code;
}
static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists) {
STsdbFS* pFileHandle = REPO_FS(pQueryHandle->pTsdb);
SQueryFilePos* cur = &pQueryHandle->cur;
// find the start data block in file
if (!pQueryHandle->locateStart) {
pQueryHandle->locateStart = true;
STsdbCfg* pCfg = &pQueryHandle->pTsdb->config;
int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision);
tsdbRLockFS(pFileHandle);
tsdbFSIterInit(&pQueryHandle->fileIter, pFileHandle, pQueryHandle->order);
tsdbFSIterSeek(&pQueryHandle->fileIter, fid);
tsdbUnLockFS(pFileHandle);
return getFirstFileDataBlock(pQueryHandle, exists);
} else {
// check if current file block is all consumed
STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot];
STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo;
// current block is done, try next
if ((!cur->mixBlock) || cur->blockCompleted) {
// all data blocks in current file has been checked already, try next file if exists
} else {
tsdbDebug("%p continue in current data block, index:%d, pos:%d, 0x%"PRIx64, pQueryHandle, cur->slot, cur->pos,
pQueryHandle->qId);
int32_t code = handleDataMergeIfNeeded(pQueryHandle, pBlockInfo->compBlock, pCheckInfo);
*exists = (pQueryHandle->realNumOfRows > 0);
if (code != TSDB_CODE_SUCCESS || *exists) {
return code;
}
}
// current block is empty, try next block in file
// all data blocks in current file has been checked already, try next file if exists
if (isEndFileDataBlock(cur, pQueryHandle->numOfBlocks, ASCENDING_TRAVERSE(pQueryHandle->order))) {
return getFirstFileDataBlock(pQueryHandle, exists);
} else {
moveToNextDataBlockInCurrentFile(pQueryHandle);
STableBlockInfo* pNext = &pQueryHandle->pDataBlockInfo[cur->slot];
return getDataBlockRv(pQueryHandle, pNext, exists);
}
}
}
static bool doHasDataInBuffer(STsdbQueryHandle* pQueryHandle) {
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
while (pQueryHandle->activeIndex < numOfTables) {
if (hasMoreDataInCache(pQueryHandle)) {
return true;
}
pQueryHandle->activeIndex += 1;
}
// no data in memtable or imemtable, decrease the memory reference.
// TODO !!
// tsdbMayUnTakeMemSnapshot(pQueryHandle);
return false;
}
//todo not unref yet, since it is not support multi-group interpolation query
static UNUSED_FUNC void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) {
// filter the queried time stamp in the first place
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle;
// starts from the buffer in case of descending timestamp order check data blocks
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
int32_t i = 0;
while(i < numOfTables) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
// the first qualified table for interpolation query
if ((pQueryHandle->window.skey <= pCheckInfo->pTableObj->lastKey) &&
(pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) {
break;
}
i++;
}
// there are no data in all the tables
if (i == numOfTables) {
return;
}
STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i);
taosArrayClear(pQueryHandle->pTableCheckInfo);
info.lastKey = pQueryHandle->window.skey;
taosArrayPush(pQueryHandle->pTableCheckInfo, &info);
}
static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int maxRowsToRead, STimeWindow* win,
STsdbQueryHandle* pQueryHandle) {
int numOfRows = 0;
int32_t numOfCols = (int32_t)taosArrayGetSize(pQueryHandle->pColumns);
STsdbCfg* pCfg = &pQueryHandle->pTsdb->config;
win->skey = TSKEY_INITIAL_VAL;
int64_t st = taosGetTimestampUs();
STable* pTable = pCheckInfo->pTableObj;
int16_t rv = -1;
STSchema* pSchema = NULL;
do {
SMemRow row = getSMemRowInTableMem(pCheckInfo, pQueryHandle->order, pCfg->update, NULL);
if (row == NULL) {
break;
}
TSKEY key = memRowKey(row);
if ((key > maxKey && ASCENDING_TRAVERSE(pQueryHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pQueryHandle->order))) {
tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pQueryHandle, key, pQueryHandle->window.skey,
pQueryHandle->window.ekey);
break;
}
if (win->skey == INT64_MIN) {
win->skey = key;
}
win->ekey = key;
if (rv != memRowVersion(row)) {
pSchema = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row), (int8_t)memRowType(row));
rv = memRowVersion(row);
}
mergeTwoRowFromMem(pQueryHandle, maxRowsToRead, numOfRows, row, NULL, numOfCols, pTable, pSchema, NULL, true);
if (++numOfRows >= maxRowsToRead) {
moveToNextRowInMem(pCheckInfo);
break;
}
} while(moveToNextRowInMem(pCheckInfo));
assert(numOfRows <= maxRowsToRead);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < maxRowsToRead) {
int32_t emptySize = maxRowsToRead - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
int64_t elapsedTime = taosGetTimestampUs() - st;
tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d, 0x%"PRIx64, pQueryHandle,
elapsedTime, numOfRows, numOfCols, pQueryHandle->qId);
return numOfRows;
}
static int32_t getAllTableList(STable* pSuperTable, SArray* list) {
STSchema* pTagSchema = tsdbGetTableTagSchema(pSuperTable);
if(pTagSchema && pTagSchema->numOfCols == 1 && pTagSchema->columns[0].type == TSDB_DATA_TYPE_JSON){
uint32_t key = TSDB_DATA_JSON_NULL;
char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0};
jsonKeyMd5(&key, INT_BYTES, keyMd5);
SArray** tablist = (SArray**)taosHashGet(pSuperTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN);
for (int i = 0; i < taosArrayGetSize(*tablist); ++i) {
JsonMapValue* p = taosArrayGet(*tablist, i);
STableKeyInfo info = {.pTable = p->table, .lastKey = TSKEY_INITIAL_VAL};
taosArrayPush(list, &info);
}
}else{
SSkipListIterator* iter = tSkipListCreateIter(pSuperTable->pIndex);
while (tSkipListIterNext(iter)) {
SSkipListNode* pNode = tSkipListIterGet(iter);
STable* pTable = (STable*) SL_GET_NODE_DATA((SSkipListNode*) pNode);
STableKeyInfo info = {.pTable = pTable, .lastKey = TSKEY_INITIAL_VAL};
taosArrayPush(list, &info);
}
tSkipListDestroyIter(iter);
}
return TSDB_CODE_SUCCESS;
}
static bool loadBlockOfActiveTable(STsdbQueryHandle* pQueryHandle) {
if (pQueryHandle->checkFiles) {
// check if the query range overlaps with the file data block
bool exists = true;
int32_t code = getDataBlocksInFiles(pQueryHandle, &exists);
if (code != TSDB_CODE_SUCCESS) {
pQueryHandle->checkFiles = false;
return false;
}
if (exists) {
tsdbRetrieveDataBlock((TsdbQueryHandleT*) pQueryHandle, NULL);
if (pQueryHandle->currentLoadExternalRows && pQueryHandle->window.skey == pQueryHandle->window.ekey) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, 0);
assert(*(int64_t*)pColInfo->pData == pQueryHandle->window.skey);
}
pQueryHandle->currentLoadExternalRows = false; // clear the flag, since the exact matched row is found.
return exists;
}
pQueryHandle->checkFiles = false;
}
if (hasMoreDataInCache(pQueryHandle)) {
pQueryHandle->currentLoadExternalRows = false;
return true;
}
// current result is empty
if (pQueryHandle->currentLoadExternalRows && pQueryHandle->window.skey == pQueryHandle->window.ekey && pQueryHandle->cur.rows == 0) {
SMemRef* pMemRef = pQueryHandle->pMemRef;
doGetExternalRow(pQueryHandle, TSDB_PREV_ROW, pMemRef);
doGetExternalRow(pQueryHandle, TSDB_NEXT_ROW, pMemRef);
bool result = tsdbGetExternalRow(pQueryHandle);
pQueryHandle->prev = doFreeColumnInfoData(pQueryHandle->prev);
pQueryHandle->next = doFreeColumnInfoData(pQueryHandle->next);
pQueryHandle->currentLoadExternalRows = false;
return result;
}
return false;
}
static bool loadCachedLastRow(STsdbQueryHandle* pQueryHandle) {
// the last row is cached in buffer, return it directly.
// here note that the pQueryHandle->window must be the TS_INITIALIZER
int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pQueryHandle));
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
assert(numOfTables > 0 && numOfCols > 0);
SQueryFilePos* cur = &pQueryHandle->cur;
SMemRow pRow = NULL;
TSKEY key = TSKEY_INITIAL_VAL;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1;
if (++pQueryHandle->activeIndex < numOfTables) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex);
int32_t ret = tsdbGetCachedLastRow(pCheckInfo->pTableObj, &pRow, &key);
if (ret != TSDB_CODE_SUCCESS) {
return false;
}
mergeTwoRowFromMem(pQueryHandle, pQueryHandle->outputCapacity, 0, pRow, NULL, numOfCols, pCheckInfo->pTableObj, NULL, NULL, true);
tfree(pRow);
// update the last key value
pCheckInfo->lastKey = key + step;
cur->rows = 1; // only one row
cur->lastKey = key + step;
cur->mixBlock = true;
cur->win.skey = key;
cur->win.ekey = key;
return true;
}
return false;
}
static bool loadCachedLast(STsdbQueryHandle* pQueryHandle) {
// the last row is cached in buffer, return it directly.
// here note that the pQueryHandle->window must be the TS_INITIALIZER
int32_t tgNumOfCols = (int32_t)QH_GET_NUM_OF_COLS(pQueryHandle);
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
int32_t numOfRows = 0;
assert(numOfTables > 0 && tgNumOfCols > 0);
SQueryFilePos* cur = &pQueryHandle->cur;
TSKEY priKey = TSKEY_INITIAL_VAL;
int32_t priIdx = -1;
SColumnInfoData* pColInfo = NULL;
while (++pQueryHandle->activeIndex < numOfTables) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex);
STable* pTable = pCheckInfo->pTableObj;
char* pData = NULL;
int32_t numOfCols = pTable->maxColNum;
if (pTable->lastCols == NULL || pTable->maxColNum <= 0) {
tsdbWarn("no last cached for table %s, uid:%" PRIu64 ",tid:%d", pTable->name->data, pTable->tableId.uid, pTable->tableId.tid);
continue;
}
int32_t i = 0, j = 0;
// lock pTable->lastCols[i] as it would be released when schema update(tsdbUpdateLastColSchema)
TSDB_RLOCK_TABLE(pTable);
while(i < tgNumOfCols && j < numOfCols) {
pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
if (pTable->lastCols[j].colId < pColInfo->info.colId) {
j++;
continue;
} else if (pTable->lastCols[j].colId > pColInfo->info.colId) {
i++;
continue;
}
pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
if (pTable->lastCols[j].bytes > 0) {
void* value = pTable->lastCols[j].pData;
switch (pColInfo->info.type) {
case TSDB_DATA_TYPE_BINARY:
case TSDB_DATA_TYPE_NCHAR:
memcpy(pData, value, varDataTLen(value));
break;
case TSDB_DATA_TYPE_NULL:
case TSDB_DATA_TYPE_BOOL:
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_UTINYINT:
*(uint8_t *)pData = *(uint8_t *)value;
break;
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_USMALLINT:
*(uint16_t *)pData = *(uint16_t *)value;
break;
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_UINT:
*(uint32_t *)pData = *(uint32_t *)value;
break;
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_UBIGINT:
*(uint64_t *)pData = *(uint64_t *)value;
break;
case TSDB_DATA_TYPE_FLOAT:
SET_FLOAT_PTR(pData, value);
break;
case TSDB_DATA_TYPE_DOUBLE:
SET_DOUBLE_PTR(pData, value);
break;
case TSDB_DATA_TYPE_TIMESTAMP:
if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
priKey = tdGetKey(*(TKEY *)value);
priIdx = i;
i++;
j++;
continue;
} else {
*(TSKEY *)pData = *(TSKEY *)value;
}
break;
default:
memcpy(pData, value, pColInfo->info.bytes);
}
for (int32_t n = 0; n < tgNumOfCols; ++n) {
if (n == i) {
continue;
}
pColInfo = taosArrayGet(pQueryHandle->pColumns, n);
pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;;
if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
*(TSKEY *)pData = pTable->lastCols[j].ts;
continue;
}
if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
setVardataNull(pData, pColInfo->info.type);
} else {
setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
}
}
numOfRows++;
assert(numOfRows < pQueryHandle->outputCapacity);
}
i++;
j++;
}
TSDB_RUNLOCK_TABLE(pTable);
// leave the real ts column as the last row, because last function only (not stable) use the last row as res
if (priKey != TSKEY_INITIAL_VAL) {
pColInfo = taosArrayGet(pQueryHandle->pColumns, priIdx);
pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;
*(TSKEY *)pData = priKey;
for (int32_t n = 0; n < tgNumOfCols; ++n) {
if (n == priIdx) {
continue;
}
pColInfo = taosArrayGet(pQueryHandle->pColumns, n);
pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;;
assert (pColInfo->info.colId != PRIMARYKEY_TIMESTAMP_COL_INDEX);
if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) {
setVardataNull(pData, pColInfo->info.type);
} else {
setNull(pData, pColInfo->info.type, pColInfo->info.bytes);
}
}
numOfRows++;
}
if (numOfRows > 0) {
cur->rows = numOfRows;
cur->mixBlock = true;
return true;
}
}
return false;
}
void tsdbSwitchTable(TsdbQueryHandleT queryHandle) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) queryHandle;
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex);
pCheckInfo->numOfBlocks = 0;
pQueryHandle->locateStart = false;
pQueryHandle->checkFiles = true;
pQueryHandle->cur.rows = 0;
pQueryHandle->currentLoadExternalRows = pQueryHandle->loadExternalRow;
terrno = TSDB_CODE_SUCCESS;
++pQueryHandle->activeIndex;
}
static bool loadDataBlockFromTableSeq(STsdbQueryHandle* pQueryHandle) {
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
assert(numOfTables > 0);
int64_t stime = taosGetTimestampUs();
while(pQueryHandle->activeIndex < numOfTables) {
if (loadBlockOfActiveTable(pQueryHandle)) {
return true;
}
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex);
pCheckInfo->numOfBlocks = 0;
pQueryHandle->activeIndex += 1;
pQueryHandle->locateStart = false;
pQueryHandle->checkFiles = true;
pQueryHandle->cur.rows = 0;
pQueryHandle->currentLoadExternalRows = pQueryHandle->loadExternalRow;
terrno = TSDB_CODE_SUCCESS;
int64_t elapsedTime = taosGetTimestampUs() - stime;
pQueryHandle->cost.checkForNextTime += elapsedTime;
}
return false;
}
// handle data in cache situation
bool tsdbNextDataBlock(TsdbQueryHandleT pHandle) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle;
if (pQueryHandle == NULL) {
return false;
}
if (emptyQueryTimewindow(pQueryHandle)) {
tsdbDebug("%p query window not overlaps with the data set, no result returned, 0x%"PRIx64, pQueryHandle, pQueryHandle->qId);
return false;
}
int64_t stime = taosGetTimestampUs();
int64_t elapsedTime = stime;
// TODO refactor: remove "type"
if (pQueryHandle->type == TSDB_QUERY_TYPE_LAST) {
if (pQueryHandle->cachelastrow == TSDB_CACHED_TYPE_LASTROW) {
return loadCachedLastRow(pQueryHandle);
} else if (pQueryHandle->cachelastrow == TSDB_CACHED_TYPE_LAST) {
return loadCachedLast(pQueryHandle);
}
}
if (pQueryHandle->loadType == BLOCK_LOAD_TABLE_SEQ_ORDER) {
return loadDataBlockFromTableSeq(pQueryHandle);
} else { // loadType == RR and Offset Order
if (pQueryHandle->checkFiles) {
// check if the query range overlaps with the file data block
bool exists = true;
int32_t code = getDataBlocksInFiles(pQueryHandle, &exists);
if (code != TSDB_CODE_SUCCESS) {
pQueryHandle->activeIndex = 0;
pQueryHandle->checkFiles = false;
return false;
}
if (exists) {
pQueryHandle->cost.checkForNextTime += (taosGetTimestampUs() - stime);
return exists;
}
pQueryHandle->activeIndex = 0;
pQueryHandle->checkFiles = false;
}
// TODO: opt by consider the scan order
bool ret = doHasDataInBuffer(pQueryHandle);
terrno = TSDB_CODE_SUCCESS;
elapsedTime = taosGetTimestampUs() - stime;
pQueryHandle->cost.checkForNextTime += elapsedTime;
return ret;
}
}
static int32_t doGetExternalRow(STsdbQueryHandle* pQueryHandle, int16_t type, SMemRef* pMemRef) {
STsdbQueryHandle* pSecQueryHandle = NULL;
if (type == TSDB_PREV_ROW && pQueryHandle->prev) {
return TSDB_CODE_SUCCESS;
}
if (type == TSDB_NEXT_ROW && pQueryHandle->next) {
return TSDB_CODE_SUCCESS;
}
// prepare the structure
int32_t numOfCols = (int32_t) QH_GET_NUM_OF_COLS(pQueryHandle);
if (type == TSDB_PREV_ROW) {
pQueryHandle->prev = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
if (pQueryHandle->prev == NULL) {
terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
goto out_of_memory;
}
} else {
pQueryHandle->next = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
if (pQueryHandle->next == NULL) {
terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
goto out_of_memory;
}
}
SArray* row = (type == TSDB_PREV_ROW)? pQueryHandle->prev : pQueryHandle->next;
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i);
SColumnInfoData colInfo = {{0}, 0};
colInfo.info = pCol->info;
colInfo.pData = calloc(1, pCol->info.bytes);
if (colInfo.pData == NULL) {
terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
goto out_of_memory;
}
taosArrayPush(row, &colInfo);
}
// load the previous row
STsdbQueryCond cond = {.numOfCols = numOfCols, .loadExternalRows = false, .type = BLOCK_LOAD_OFFSET_SEQ_ORDER};
if (type == TSDB_PREV_ROW) {
cond.order = TSDB_ORDER_DESC;
cond.twindow = (STimeWindow){pQueryHandle->window.skey, INT64_MIN};
} else {
cond.order = TSDB_ORDER_ASC;
cond.twindow = (STimeWindow){pQueryHandle->window.skey, INT64_MAX};
}
cond.colList = calloc(cond.numOfCols, sizeof(SColumnInfo));
if (cond.colList == NULL) {
terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
goto out_of_memory;
}
for (int32_t i = 0; i < cond.numOfCols; ++i) {
SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, i);
memcpy(&cond.colList[i], &pColInfoData->info, sizeof(SColumnInfo));
}
pSecQueryHandle = tsdbQueryTablesImpl(pQueryHandle->pTsdb, &cond, pQueryHandle->qId, pMemRef);
tfree(cond.colList);
if (pSecQueryHandle == NULL) {
goto out_of_memory;
}
// current table, only one table
STableCheckInfo* pCurrent = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex);
SArray* psTable = NULL;
pSecQueryHandle->pTableCheckInfo = createCheckInfoFromCheckInfo(pCurrent, pSecQueryHandle->window.skey, &psTable);
if (pSecQueryHandle->pTableCheckInfo == NULL) {
taosArrayDestroy(&psTable);
terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
goto out_of_memory;
}
tsdbMayTakeMemSnapshot(pSecQueryHandle, psTable);
if (!tsdbNextDataBlock((void*)pSecQueryHandle)) {
// no result in current query, free the corresponding result rows structure
if (type == TSDB_PREV_ROW) {
pQueryHandle->prev = doFreeColumnInfoData(pQueryHandle->prev);
} else {
pQueryHandle->next = doFreeColumnInfoData(pQueryHandle->next);
}
goto out_of_memory;
}
SDataBlockInfo blockInfo = {{0}, 0};
tsdbRetrieveDataBlockInfo((void*)pSecQueryHandle, &blockInfo);
tsdbRetrieveDataBlock((void*)pSecQueryHandle, pSecQueryHandle->defaultLoadColumn);
row = (type == TSDB_PREV_ROW)? pQueryHandle->prev:pQueryHandle->next;
int32_t pos = (type == TSDB_PREV_ROW)?pSecQueryHandle->cur.rows - 1:0;
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pCol = taosArrayGet(row, i);
SColumnInfoData* s = taosArrayGet(pSecQueryHandle->pColumns, i);
memcpy((char*)pCol->pData, (char*)s->pData + s->info.bytes * pos, pCol->info.bytes);
}
out_of_memory:
tsdbCleanupQueryHandle(pSecQueryHandle);
return terrno;
}
bool tsdbGetExternalRow(TsdbQueryHandleT pHandle) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle;
SQueryFilePos* cur = &pQueryHandle->cur;
cur->fid = INT32_MIN;
cur->mixBlock = true;
if (pQueryHandle->prev == NULL || pQueryHandle->next == NULL) {
cur->rows = 0;
return false;
}
int32_t numOfCols = (int32_t) QH_GET_NUM_OF_COLS(pQueryHandle);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, i);
SColumnInfoData* first = taosArrayGet(pQueryHandle->prev, i);
memcpy(pColInfoData->pData, first->pData, pColInfoData->info.bytes);
SColumnInfoData* sec = taosArrayGet(pQueryHandle->next, i);
memcpy(((char*)pColInfoData->pData) + pColInfoData->info.bytes, sec->pData, pColInfoData->info.bytes);
if (i == 0 && pColInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP) {
cur->win.skey = *(TSKEY*)pColInfoData->pData;
cur->win.ekey = *(TSKEY*)(((char*)pColInfoData->pData) + TSDB_KEYSIZE);
}
}
cur->rows = 2;
return true;
}
/*
* if lastRow == NULL, return TSDB_CODE_TDB_NO_CACHE_LAST_ROW
* else set pRes and return TSDB_CODE_SUCCESS and save lastKey
*/
int32_t tsdbGetCachedLastRow(STable* pTable, SMemRow* pRes, TSKEY* lastKey) {
int32_t code = TSDB_CODE_SUCCESS;
TSDB_RLOCK_TABLE(pTable);
if (!pTable->lastRow) {
code = TSDB_CODE_TDB_NO_CACHE_LAST_ROW;
goto out;
}
if (pRes) {
*pRes = tdMemRowDup(pTable->lastRow);
if (*pRes == NULL) {
code = TSDB_CODE_TDB_OUT_OF_MEMORY;
}
}
out:
TSDB_RUNLOCK_TABLE(pTable);
return code;
}
bool isTsdbCacheLastRow(TsdbQueryHandleT* pQueryHandle) {
return ((STsdbQueryHandle *)pQueryHandle)->cachelastrow > TSDB_CACHED_TYPE_NONE;
}
int32_t checkForCachedLastRow(STsdbQueryHandle* pQueryHandle, STableGroupInfo *groupList) {
assert(pQueryHandle != NULL && groupList != NULL);
TSKEY key = TSKEY_INITIAL_VAL;
SArray* group = taosArrayGetP(groupList->pGroupList, 0);
assert(group != NULL);
STableKeyInfo* pInfo = (STableKeyInfo*)taosArrayGet(group, 0);
int32_t code = 0;
if (((STable*)pInfo->pTable)->lastRow) {
code = tsdbGetCachedLastRow(pInfo->pTable, NULL, &key);
if (code != TSDB_CODE_SUCCESS) {
pQueryHandle->cachelastrow = TSDB_CACHED_TYPE_NONE;
} else {
pQueryHandle->cachelastrow = TSDB_CACHED_TYPE_LASTROW;
}
}
// update the tsdb query time range
if (pQueryHandle->cachelastrow != TSDB_CACHED_TYPE_NONE) {
pQueryHandle->window = TSWINDOW_INITIALIZER;
pQueryHandle->checkFiles = false;
pQueryHandle->activeIndex = -1; // start from -1
}
return code;
}
int32_t checkForCachedLast(STsdbQueryHandle* pQueryHandle) {
assert(pQueryHandle != NULL);
int32_t code = 0;
STsdbRepo* pRepo = pQueryHandle->pTsdb;
if (pRepo && CACHE_LAST_NULL_COLUMN(&(pRepo->config))) {
pQueryHandle->cachelastrow = TSDB_CACHED_TYPE_LAST;
}
// update the tsdb query time range
if (pQueryHandle->cachelastrow) {
pQueryHandle->checkFiles = false;
pQueryHandle->activeIndex = -1; // start from -1
}
return code;
}
STimeWindow updateLastrowForEachGroup(STableGroupInfo *groupList) {
STimeWindow window = {INT64_MAX, INT64_MIN};
int32_t totalNumOfTable = 0;
SArray* emptyGroup = taosArrayInit(16, sizeof(int32_t));
// NOTE: starts from the buffer in case of descending timestamp order check data blocks
size_t numOfGroups = taosArrayGetSize(groupList->pGroupList);
for(int32_t j = 0; j < numOfGroups; ++j) {
SArray* pGroup = taosArrayGetP(groupList->pGroupList, j);
TSKEY key = TSKEY_INITIAL_VAL;
STableKeyInfo keyInfo = {0};
size_t numOfTables = taosArrayGetSize(pGroup);
for(int32_t i = 0; i < numOfTables; ++i) {
STableKeyInfo* pInfo = (STableKeyInfo*) taosArrayGet(pGroup, i);
// if the lastKey equals to INT64_MIN, there is no data in this table
TSKEY lastKey = ((STable*)(pInfo->pTable))->lastKey;
if (key < lastKey) {
key = lastKey;
keyInfo.pTable = pInfo->pTable;
keyInfo.lastKey = key;
pInfo->lastKey = key;
if (key < window.skey) {
window.skey = key;
}
if (key > window.ekey) {
window.ekey = key;
}
}
}
// clear current group, unref unused table
for (int32_t i = 0; i < numOfTables; ++i) {
STableKeyInfo* pInfo = (STableKeyInfo*)taosArrayGet(pGroup, i);
// keyInfo.pTable may be NULL here.
if (pInfo->pTable != keyInfo.pTable) {
tsdbUnRefTable(pInfo->pTable);
}
}
// more than one table in each group, only one table left for each group
if (keyInfo.pTable != NULL) {
totalNumOfTable++;
if (taosArrayGetSize(pGroup) == 1) {
// do nothing
} else {
taosArrayClear(pGroup);
taosArrayPush(pGroup, &keyInfo);
}
} else { // mark all the empty groups, and remove it later
taosArrayDestroy(&pGroup);
taosArrayPush(emptyGroup, &j);
}
}
// window does not being updated, so set the original
if (window.skey == INT64_MAX && window.ekey == INT64_MIN) {
window = TSWINDOW_INITIALIZER;
assert(totalNumOfTable == 0 && taosArrayGetSize(groupList->pGroupList) == numOfGroups);
}
taosArrayRemoveBatch(groupList->pGroupList, TARRAY_GET_START(emptyGroup), (int32_t) taosArrayGetSize(emptyGroup));
taosArrayDestroy(&emptyGroup);
groupList->numOfTables = totalNumOfTable;
return window;
}
void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* pDataBlockInfo) {
STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle;
SQueryFilePos* cur = &pHandle->cur;
STable* pTable = NULL;
// there are data in file
if (pHandle->cur.fid != INT32_MIN) {
STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot];
pTable = pBlockInfo->pTableCheckInfo->pTableObj;
} else {
STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex);
pTable = pCheckInfo->pTableObj;
}
pDataBlockInfo->uid = pTable->tableId.uid;
pDataBlockInfo->tid = pTable->tableId.tid;
pDataBlockInfo->rows = cur->rows;
pDataBlockInfo->window = cur->win;
pDataBlockInfo->numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pHandle));
}
/*
* return null for mixed data block, if not a complete file data block, the statistics value will always return NULL
*/
int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataStatis** pBlockStatis) {
STsdbQueryHandle* pHandle = (STsdbQueryHandle*) pQueryHandle;
SQueryFilePos* c = &pHandle->cur;
if (c->mixBlock) {
*pBlockStatis = NULL;
return TSDB_CODE_SUCCESS;
}
STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot];
assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0)));
// file block with sub-blocks has no statistics data
if (pBlockInfo->compBlock->numOfSubBlocks > 1) {
*pBlockStatis = NULL;
return TSDB_CODE_SUCCESS;
}
int64_t stime = taosGetTimestampUs();
int statisStatus = tsdbLoadBlockStatis(&pHandle->rhelper, pBlockInfo->compBlock);
if (statisStatus < TSDB_STATIS_OK) {
return terrno;
} else if (statisStatus > TSDB_STATIS_OK) {
*pBlockStatis = NULL;
return TSDB_CODE_SUCCESS;
}
int16_t* colIds = pHandle->defaultLoadColumn->pData;
size_t numOfCols = QH_GET_NUM_OF_COLS(pHandle);
memset(pHandle->statis, 0, numOfCols * sizeof(SDataStatis));
for(int32_t i = 0; i < numOfCols; ++i) {
pHandle->statis[i].colId = colIds[i];
}
tsdbGetBlockStatis(&pHandle->rhelper, pHandle->statis, (int)numOfCols, pBlockInfo->compBlock);
// always load the first primary timestamp column data
SDataStatis* pPrimaryColStatis = &pHandle->statis[0];
assert(pPrimaryColStatis->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
pPrimaryColStatis->numOfNull = 0;
pPrimaryColStatis->min = pBlockInfo->compBlock->keyFirst;
pPrimaryColStatis->max = pBlockInfo->compBlock->keyLast;
//update the number of NULL data rows
for(int32_t i = 1; i < numOfCols; ++i) {
if (pHandle->statis[i].numOfNull == -1) { // set the column data are all NULL
pHandle->statis[i].numOfNull = pBlockInfo->compBlock->numOfRows;
}
}
int64_t elapsed = taosGetTimestampUs() - stime;
pHandle->cost.statisInfoLoadTime += elapsed;
*pBlockStatis = pHandle->statis;
return TSDB_CODE_SUCCESS;
}
SArray* tsdbRetrieveDataBlock(TsdbQueryHandleT* pQueryHandle, SArray* pIdList) {
/**
* In the following two cases, the data has been loaded to SColumnInfoData.
* 1. data is from cache, 2. data block is not completed qualified to query time range
*/
STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle;
if (pHandle->cur.fid == INT32_MIN) {
return pHandle->pColumns;
} else {
STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[pHandle->cur.slot];
STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo;
if (pHandle->cur.mixBlock) {
return pHandle->pColumns;
} else {
SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock);
assert(pHandle->realNumOfRows <= binfo.rows);
// data block has been loaded, todo extract method
SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo;
if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fid == pHandle->cur.fid &&
pBlockLoadInfo->tid == pCheckInfo->pTableObj->tableId.tid) {
return pHandle->pColumns;
} else { // only load the file block
SBlock* pBlock = pBlockInfo->compBlock;
if (doLoadFileDataBlock(pHandle, pBlock, pCheckInfo, pHandle->cur.slot) != TSDB_CODE_SUCCESS) {
return NULL;
}
// todo refactor
int32_t numOfRows = doCopyRowsFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) {
int32_t emptySize = pHandle->outputCapacity - numOfRows;
int32_t reqNumOfCols = (int32_t)taosArrayGetSize(pHandle->pColumns);
for(int32_t i = 0; i < reqNumOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i);
memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
return pHandle->pColumns;
}
}
}
}
void filterPrepare(void* expr, void* param) {
tExprNode* pExpr = (tExprNode*)expr;
if (pExpr->_node.info != NULL) {
return;
}
pExpr->_node.info = calloc(1, sizeof(tQueryInfo));
STSchema* pTSSchema = (STSchema*) param;
tQueryInfo* pInfo = pExpr->_node.info;
tVariant* pCond = pExpr->_node.pRight->pVal;
SSchema* pSchema = pExpr->_node.pLeft->pSchema;
pInfo->sch = *pSchema;
pInfo->optr = pExpr->_node.optr;
pInfo->compare = getComparFunc(pInfo->sch.type, pInfo->optr);
pInfo->indexed = pTSSchema->columns->colId == pInfo->sch.colId;
if (pInfo->optr == TSDB_RELATION_IN) {
int dummy = -1;
SHashObj *pObj = NULL;
if (pInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) {
SArray *arr = (SArray *)(pCond->arr);
size_t size = taosArrayGetSize(arr);
pObj = taosHashInit(size * 2, taosGetDefaultHashFunction(pInfo->sch.type), true, false);
for (size_t i = 0; i < size; i++) {
char* p = taosArrayGetP(arr, i);
strntolower_s(varDataVal(p), varDataVal(p), varDataLen(p));
taosHashPut(pObj, varDataVal(p), varDataLen(p), &dummy, sizeof(dummy));
}
} else {
buildFilterSetFromBinary((void **)&pObj, pCond->pz, pCond->nLen);
}
pInfo->q = (char *)pObj;
} else if (pCond != NULL) {
uint32_t size = pCond->nLen * TSDB_NCHAR_SIZE;
if (size < (uint32_t)pSchema->bytes) {
size = pSchema->bytes;
}
// to make sure tonchar does not cause invalid write, since the '\0' needs at least sizeof(wchar_t) space.
pInfo->q = calloc(1, size + TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE);
tVariantDump(pCond, pInfo->q, pSchema->type, true);
}
}
static int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) {
STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param;
STable* pTable1 = ((STableKeyInfo*) p1)->pTable;
STable* pTable2 = ((STableKeyInfo*) p2)->pTable;
for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) {
SColIndex* pColIndex = &pTableGroupSupp->pCols[i];
int32_t colIndex = pColIndex->colIndex;
assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX);
char * f1 = NULL;
char * f2 = NULL;
int32_t type = 0;
int32_t bytes = 0;
if (colIndex == TSDB_TBNAME_COLUMN_INDEX) {
f1 = (char*) TABLE_NAME(pTable1);
f2 = (char*) TABLE_NAME(pTable2);
type = TSDB_DATA_TYPE_BINARY;
bytes = tGetTbnameColumnSchema()->bytes;
} else {
if (pTableGroupSupp->pTagSchema && colIndex < pTableGroupSupp->pTagSchema->numOfCols) {
STColumn* pCol = schemaColAt(pTableGroupSupp->pTagSchema, colIndex);
bytes = pCol->bytes;
type = pCol->type;
if (type == TSDB_DATA_TYPE_JSON){
f1 = getJsonTagValueElment(pTable1, pColIndex->name, (int32_t)strlen(pColIndex->name), NULL, TSDB_MAX_JSON_TAGS_LEN);
f2 = getJsonTagValueElment(pTable2, pColIndex->name, (int32_t)strlen(pColIndex->name), NULL, TSDB_MAX_JSON_TAGS_LEN);
}else{
f1 = tdGetKVRowValOfCol(pTable1->tagVal, pCol->colId);
f2 = tdGetKVRowValOfCol(pTable2->tagVal, pCol->colId);
}
}
}
// this tags value may be NULL
if (f1 == NULL && f2 == NULL) {
continue;
}
if (f1 == NULL) {
return -1;
}
if (f2 == NULL) {
return 1;
}
int32_t ret = doCompare(f1, f2, type, bytes);
if (ret == 0) {
continue;
} else {
return ret;
}
}
return 0;
}
static int tsdbCheckInfoCompar(const void* key1, const void* key2) {
if (((STableCheckInfo*)key1)->tableId.tid < ((STableCheckInfo*)key2)->tableId.tid) {
return -1;
} else if (((STableCheckInfo*)key1)->tableId.tid > ((STableCheckInfo*)key2)->tableId.tid) {
return 1;
} else {
ASSERT(false);
return 0;
}
}
void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, TSKEY skey,
STableGroupSupporter* pSupp, __ext_compar_fn_t compareFn) {
STable* pTable = taosArrayGetP(pTableList, 0);
SArray* g = taosArrayInit(16, sizeof(STableKeyInfo));
STableKeyInfo info = {.pTable = pTable, .lastKey = skey};
taosArrayPush(g, &info);
tsdbRefTable(pTable);
for (int32_t i = 1; i < numOfTables; ++i) {
STable** prev = taosArrayGet(pTableList, i - 1);
STable** p = taosArrayGet(pTableList, i);
int32_t ret = compareFn(prev, p, pSupp);
assert(ret == 0 || ret == -1);
tsdbRefTable(*p);
assert((*p)->type == TSDB_CHILD_TABLE);
if (ret == 0) {
STableKeyInfo info1 = {.pTable = *p, .lastKey = skey};
taosArrayPush(g, &info1);
} else {
taosArrayPush(pGroups, &g); // current group is ended, start a new group
g = taosArrayInit(16, sizeof(STableKeyInfo));
STableKeyInfo info1 = {.pTable = *p, .lastKey = skey};
taosArrayPush(g, &info1);
}
}
taosArrayPush(pGroups, &g);
}
SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols, TSKEY skey) {
assert(pTableList != NULL);
SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);
size_t size = taosArrayGetSize(pTableList);
if (size == 0) {
tsdbDebug("no qualified tables");
return pTableGroup;
}
if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table
SArray* sa = taosArrayInit(size, sizeof(STableKeyInfo));
if (sa == NULL) {
taosArrayDestroy(&pTableGroup);
return NULL;
}
for(int32_t i = 0; i < size; ++i) {
STableKeyInfo *pKeyInfo = taosArrayGet(pTableList, i);
tsdbRefTable(pKeyInfo->pTable);
STableKeyInfo info = {.pTable = pKeyInfo->pTable, .lastKey = skey};
taosArrayPush(sa, &info);
}
taosArrayPush(pTableGroup, &sa);
tsdbDebug("all %" PRIzu " tables belong to one group", size);
} else {
STableGroupSupporter sup = {0};
sup.numOfCols = numOfOrderCols;
sup.pTagSchema = pTagSchema;
sup.pCols = pCols;
taosqsort(pTableList->pData, size, sizeof(STableKeyInfo), &sup, tableGroupComparFn);
createTableGroupImpl(pTableGroup, pTableList, size, skey, &sup, tableGroupComparFn);
}
return pTableGroup;
}
int32_t tsdbQuerySTableByTagCond(STsdbRepo* tsdb, uint64_t uid, TSKEY skey, const char* pTagCond, size_t len,
STableGroupInfo* pGroupInfo, SColIndex* pColIndex, int32_t numOfCols) {
SArray* res = NULL;
if (tsdbRLockRepoMeta(tsdb) < 0) goto _error;
STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), uid);
if (pTable == NULL) {
tsdbError("%p failed to get stable, uid:%" PRIu64, tsdb, uid);
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
tsdbUnlockRepoMeta(tsdb);
goto _error;
}
if (pTable->type != TSDB_SUPER_TABLE) {
tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", tid:%d, name:%s", tsdb, uid, pTable->tableId.tid,
pTable->name->data);
terrno = TSDB_CODE_COM_OPS_NOT_SUPPORT; //basically, this error is caused by invalid sql issued by client
tsdbUnlockRepoMeta(tsdb);
goto _error;
}
//NOTE: not add ref count for super table
res = taosArrayInit(8, sizeof(STableKeyInfo));
STSchema* pTagSchema = tsdbGetTableTagSchema(pTable);
// no tags and tbname condition, all child tables of this stable are involved
if (pTagCond == NULL || len == 0) {
int32_t ret = getAllTableList(pTable, res);
if (ret != TSDB_CODE_SUCCESS) {
tsdbUnlockRepoMeta(tsdb);
goto _error;
}
pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(res);
pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey);
tsdbDebug("%p no table name/tag condition, all tables qualified, numOfTables:%u, group:%zu", tsdb,
pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList));
taosArrayDestroy(&res);
if (tsdbUnlockRepoMeta(tsdb) < 0) goto _error;
return ret;
}
int32_t ret = TSDB_CODE_SUCCESS;
tExprNode* expr = NULL;
TRY(TSDB_MAX_TAG_CONDITIONS) {
expr = exprTreeFromBinary(pTagCond, len);
CLEANUP_EXECUTE();
} CATCH( code ) {
CLEANUP_EXECUTE();
terrno = code;
tsdbUnlockRepoMeta(tsdb); // unlock tsdb in any cases
goto _error;
// TODO: more error handling
} END_TRY
void *filterInfo = calloc(1, sizeof(SFilterInfo));
((SFilterInfo*)filterInfo)->pTable = pTable;
ret = filterInitFromTree(expr, &filterInfo, 0);
tExprTreeDestroy(expr, NULL);
if (ret != TSDB_CODE_SUCCESS) {
terrno = ret;
tsdbUnlockRepoMeta(tsdb);
filterFreeInfo(filterInfo);
goto _error;
}
ret = tsdbQueryTableList(pTable, res, filterInfo);
if (ret != TSDB_CODE_SUCCESS) {
terrno = ret;
tsdbUnlockRepoMeta(tsdb);
filterFreeInfo(filterInfo);
goto _error;
}
filterFreeInfo(filterInfo);
pGroupInfo->numOfTables = (uint32_t)taosArrayGetSize(res);
pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey);
tsdbDebug("%p stable tid:%d, uid:%"PRIu64" query, numOfTables:%u, belong to %" PRIzu " groups", tsdb, pTable->tableId.tid,
pTable->tableId.uid, pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList));
taosArrayDestroy(&res);
if (tsdbUnlockRepoMeta(tsdb) < 0) goto _error;
return ret;
_error:
taosArrayDestroy(&res);
return terrno;
}
int32_t tsdbGetOneTableGroup(STsdbRepo* tsdb, uint64_t uid, TSKEY startKey, STableGroupInfo* pGroupInfo) {
if (tsdbRLockRepoMeta(tsdb) < 0) goto _error;
STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), uid);
if (pTable == NULL) {
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
tsdbUnlockRepoMeta(tsdb);
goto _error;
}
assert(pTable->type == TSDB_CHILD_TABLE || pTable->type == TSDB_NORMAL_TABLE || pTable->type == TSDB_STREAM_TABLE);
tsdbRefTable(pTable);
if (tsdbUnlockRepoMeta(tsdb) < 0) goto _error;
pGroupInfo->numOfTables = 1;
pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
SArray* group = taosArrayInit(1, sizeof(STableKeyInfo));
STableKeyInfo info = {.pTable = pTable, .lastKey = startKey};
taosArrayPush(group, &info);
taosArrayPush(pGroupInfo->pGroupList, &group);
return TSDB_CODE_SUCCESS;
_error:
return terrno;
}
int32_t tsdbGetTableGroupFromIdList(STsdbRepo* tsdb, SArray* pTableIdList, STableGroupInfo* pGroupInfo) {
if (tsdbRLockRepoMeta(tsdb) < 0) {
return terrno;
}
assert(pTableIdList != NULL);
size_t size = taosArrayGetSize(pTableIdList);
pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
SArray* group = taosArrayInit(1, sizeof(STableKeyInfo));
for(int32_t i = 0; i < size; ++i) {
STableIdInfo *id = taosArrayGet(pTableIdList, i);
STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), id->uid);
if (pTable == NULL) {
tsdbWarn("table uid:%"PRIu64", tid:%d has been drop already", id->uid, id->tid);
continue;
}
if (pTable->type == TSDB_SUPER_TABLE) {
tsdbError("direct query on super tale is not allowed, table uid:%"PRIu64", tid:%d", id->uid, id->tid);
terrno = TSDB_CODE_QRY_INVALID_MSG;
tsdbUnlockRepoMeta(tsdb);
taosArrayDestroy(&group);
return terrno;
}
tsdbRefTable(pTable);
STableKeyInfo info = {.pTable = pTable, .lastKey = id->key};
taosArrayPush(group, &info);
}
if (tsdbUnlockRepoMeta(tsdb) < 0) {
taosArrayDestroy(&group);
return terrno;
}
pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(group);
if (pGroupInfo->numOfTables > 0) {
taosArrayPush(pGroupInfo->pGroupList, &group);
} else {
taosArrayDestroy(&group);
}
return TSDB_CODE_SUCCESS;
}
static void* doFreeColumnInfoData(SArray* pColumnInfoData) {
if (pColumnInfoData == NULL) {
return NULL;
}
size_t cols = taosArrayGetSize(pColumnInfoData);
for (int32_t i = 0; i < cols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pColumnInfoData, i);
tfree(pColInfo->pData);
}
taosArrayDestroy(&pColumnInfoData);
return NULL;
}
static void* destroyTableCheckInfo(SArray* pTableCheckInfo) {
size_t size = taosArrayGetSize(pTableCheckInfo);
for (int32_t i = 0; i < size; ++i) {
STableCheckInfo* p = taosArrayGet(pTableCheckInfo, i);
destroyTableMemIterator(p);
tfree(p->pCompInfo);
}
taosArrayDestroy(&pTableCheckInfo);
return NULL;
}
void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*)queryHandle;
if (pQueryHandle == NULL) {
return;
}
pQueryHandle->pColumns = doFreeColumnInfoData(pQueryHandle->pColumns);
taosArrayDestroy(&pQueryHandle->defaultLoadColumn);
tfree(pQueryHandle->pDataBlockInfo);
tfree(pQueryHandle->statis);
if (!emptyQueryTimewindow(pQueryHandle)) {
tsdbMayUnTakeMemSnapshot(pQueryHandle);
} else {
assert(pQueryHandle->pTableCheckInfo == NULL);
}
if (pQueryHandle->pTableCheckInfo != NULL) {
pQueryHandle->pTableCheckInfo = destroyTableCheckInfo(pQueryHandle->pTableCheckInfo);
}
tsdbDestroyReadH(&pQueryHandle->rhelper);
tdFreeDataCols(pQueryHandle->pDataCols);
pQueryHandle->pDataCols = NULL;
pQueryHandle->prev = doFreeColumnInfoData(pQueryHandle->prev);
pQueryHandle->next = doFreeColumnInfoData(pQueryHandle->next);
SIOCostSummary* pCost = &pQueryHandle->cost;
tsdbDebug("%p :io-cost summary: head-file read cnt:%"PRIu64", head-file time:%"PRIu64" us, statis-info:%"PRId64" us, datablock:%" PRId64" us, check data:%"PRId64" us, 0x%"PRIx64,
pQueryHandle, pCost->headFileLoad, pCost->headFileLoadTime, pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pQueryHandle->qId);
tfree(pQueryHandle);
}
void tsdbDestroyTableGroup(STableGroupInfo *pGroupList) {
assert(pGroupList != NULL);
size_t numOfGroup = taosArrayGetSize(pGroupList->pGroupList);
for(int32_t i = 0; i < numOfGroup; ++i) {
SArray* p = taosArrayGetP(pGroupList->pGroupList, i);
size_t numOfTables = taosArrayGetSize(p);
for(int32_t j = 0; j < numOfTables; ++j) {
STable* pTable = taosArrayGetP(p, j);
if (pTable != NULL) { // in case of handling retrieve data from tsdb
tsdbUnRefTable(pTable);
}
//assert(pTable != NULL);
}
taosArrayDestroy(&p);
}
taosHashCleanup(pGroupList->map);
taosArrayDestroy(&pGroupList->pGroupList);
pGroupList->numOfTables = 0;
}
static FORCE_INLINE int32_t tsdbGetTagDataFromId(void *param, int32_t id, void **data) {
STable* pTable = (STable*)(SL_GET_NODE_DATA((SSkipListNode *)param));
if (id == TSDB_TBNAME_COLUMN_INDEX) {
*data = TABLE_NAME(pTable);
} else {
*data = tdGetKVRowValOfCol(pTable->tagVal, id);
}
return TSDB_CODE_SUCCESS;
}
static void queryIndexedColumn(SSkipList* pSkipList, void* filterInfo, SArray* res) {
SSkipListIterator* iter = NULL;
char *startVal = NULL;
int32_t order = 0;
int32_t inRange = 0;
int32_t flag = 0;
bool all = false;
int8_t *addToResult = NULL;
filterGetIndexedColumnInfo(filterInfo, &startVal, &order, &flag);
tsdbDebug("filter index column start, order:%d, flag:%d", order, flag);
while (order) {
if (FILTER_GET_FLAG(order, TSDB_ORDER_ASC)) {
iter = tSkipListCreateIterFromVal(pSkipList, startVal, pSkipList->type, TSDB_ORDER_ASC);
FILTER_CLR_FLAG(order, TSDB_ORDER_ASC);
} else {
iter = tSkipListCreateIterFromVal(pSkipList, startVal, pSkipList->type, TSDB_ORDER_DESC);
FILTER_CLR_FLAG(order, TSDB_ORDER_DESC);
}
while (tSkipListIterNext(iter)) {
SSkipListNode *pNode = tSkipListIterGet(iter);
if (inRange == 0 || !FILTER_GET_FLAG(flag, FI_ACTION_NO_NEED)) {
tsdbDebug("filter index column, filter it");
filterSetColFieldData(filterInfo, pNode, tsdbGetTagDataFromId);
all = filterExecute(filterInfo, 1, &addToResult, NULL, 0);
}
char *pData = SL_GET_NODE_DATA(pNode);
tsdbDebug("filter index column, table:%s, result:%d", ((STable *)pData)->name->data, all);
if (all || (addToResult && *addToResult)) {
STableKeyInfo info = {.pTable = (void*)pData, .lastKey = TSKEY_INITIAL_VAL};
taosArrayPush(res, &info);
inRange = 1;
} else if (inRange){
break;
}
}
inRange = 0;
tfree(addToResult);
tSkipListDestroyIter(iter);
}
tsdbDebug("filter index column end");
}
static void queryIndexlessColumn(SSkipList* pSkipList, void* filterInfo, SArray* res) {
SSkipListIterator* iter = tSkipListCreateIter(pSkipList);
int8_t *addToResult = NULL;
while (tSkipListIterNext(iter)) {
SSkipListNode *pNode = tSkipListIterGet(iter);
filterSetColFieldData(filterInfo, pNode, tsdbGetTagDataFromId);
char *pData = SL_GET_NODE_DATA(pNode);
bool all = filterExecute(filterInfo, 1, &addToResult, NULL, 0);
if (all || (addToResult && *addToResult)) {
STableKeyInfo info = {.pTable = (void*)pData, .lastKey = TSKEY_INITIAL_VAL};
taosArrayPush(res, &info);
}
}
tfree(addToResult);
tSkipListDestroyIter(iter);
}
static FORCE_INLINE int32_t tsdbGetJsonTagDataFromId(void *param, int32_t id, char* name, void **data) {
JsonMapValue* jsonMapV = (JsonMapValue*)(param);
STable* pTable = (STable*)(jsonMapV->table);
if (id == TSDB_TBNAME_COLUMN_INDEX) {
*data = TABLE_NAME(pTable);
} else {
void* jsonData = tsdbGetJsonTagValue(pTable, name, TSDB_MAX_JSON_KEY_MD5_LEN, NULL);
// jsonData == NULL for ? operation
// if(jsonData != NULL) jsonData += CHAR_BYTES; // jump type
*data = jsonData;
}
return TSDB_CODE_SUCCESS;
}
static int32_t queryByJsonTag(STable* pTable, void* filterInfo, SArray* res){
// get all table in fields, and dumplicate it
SArray* tabList = NULL;
bool needQueryAll = false;
SFilterInfo* info = (SFilterInfo*)filterInfo;
for (uint16_t i = 0; i < info->fields[FLD_TYPE_COLUMN].num; ++i) {
SFilterField* fi = &info->fields[FLD_TYPE_COLUMN].fields[i];
SSchema* sch = fi->desc;
if (sch->colId == TSDB_TBNAME_COLUMN_INDEX) {
tabList = taosArrayInit(32, sizeof(JsonMapValue));
getAllTableList(pTable, tabList); // query all table
needQueryAll = true;
break;
}
}
for (uint16_t i = 0; i < info->unitNum; ++i) { // is null operation need query all table
SFilterUnit* unit = &info->units[i];
if (unit->compare.optr == TSDB_RELATION_ISNULL) {
tabList = taosArrayInit(32, sizeof(JsonMapValue));
getAllTableList(pTable, tabList); // query all table
needQueryAll = true;
break;
}
}
for (uint16_t i = 0; i < info->fields[FLD_TYPE_COLUMN].num; ++i) {
if (needQueryAll) break; // query all table
SFilterField* fi = &info->fields[FLD_TYPE_COLUMN].fields[i];
SSchema* sch = fi->desc;
char* key = sch->name;
SArray** data = (SArray**)taosHashGet(pTable->jsonKeyMap, key, TSDB_MAX_JSON_KEY_MD5_LEN);
if(data == NULL) continue;
if(tabList == NULL) {
tabList = taosArrayDup(*data);
}else{
for(int j = 0; j < taosArrayGetSize(*data); j++){
void* element = taosArrayGet(*data, j);
void* p = taosArraySearch(tabList, element, tsdbCompareJsonMapValue, TD_EQ);
if (p == NULL) {
p = taosArraySearch(tabList, element, tsdbCompareJsonMapValue, TD_GE);
if(p == NULL){
taosArrayPush(tabList, element);
}else{
taosArrayInsert(tabList, TARRAY_ELEM_IDX(tabList, p), element);
}
}
}
}
}
if(tabList == NULL){
tsdbError("json key not exist, no candidate table");
return TSDB_CODE_SUCCESS;
}
size_t size = taosArrayGetSize(tabList);
int8_t *addToResult = NULL;
for(int i = 0; i < size; i++){
JsonMapValue* data = taosArrayGet(tabList, i);
filterSetJsonColFieldData(filterInfo, data, tsdbGetJsonTagDataFromId);
bool all = filterExecute(filterInfo, 1, &addToResult, NULL, 0);
if (all || (addToResult && *addToResult)) {
STableKeyInfo kInfo = {.pTable = (void*)(data->table), .lastKey = TSKEY_INITIAL_VAL};
taosArrayPush(res, &kInfo);
}
}
tfree(addToResult);
taosArrayDestroy(&tabList);
return TSDB_CODE_SUCCESS;
}
static int32_t tsdbQueryTableList(STable* pTable, SArray* pRes, void* filterInfo) {
STSchema* pTSSchema = pTable->tagSchema;
if(pTSSchema->columns->type == TSDB_DATA_TYPE_JSON){
return queryByJsonTag(pTable, filterInfo, pRes);
}else{
bool indexQuery = false;
SSkipList *pSkipList = pTable->pIndex;
filterIsIndexedColumnQuery(filterInfo, pTSSchema->columns->colId, &indexQuery);
if (indexQuery) {
queryIndexedColumn(pSkipList, filterInfo, pRes);
} else {
queryIndexlessColumn(pSkipList, filterInfo, pRes);
}
}
return TSDB_CODE_SUCCESS;
}
void* getJsonTagValueElment(void* data, char* key, int32_t keyLen, char* dst, int16_t bytes){
char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0};
jsonKeyMd5(key, keyLen, keyMd5);
void* result = tsdbGetJsonTagValue(data, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN, NULL);
if (result == NULL){ // json key no result
if(!dst) return NULL;
*dst = TSDB_DATA_TYPE_JSON;
setNull(dst + CHAR_BYTES, TSDB_DATA_TYPE_JSON, 0);
return dst;
}
char* realData = POINTER_SHIFT(result, CHAR_BYTES);
if(*(char*)result == TSDB_DATA_TYPE_NCHAR || *(char*)result == TSDB_DATA_TYPE_BINARY) {
assert(varDataTLen(realData) < bytes);
if(!dst) return result;
memcpy(dst, result, CHAR_BYTES + varDataTLen(realData));
return dst;
}else if (*(char*)result == TSDB_DATA_TYPE_DOUBLE || *(char*)result == TSDB_DATA_TYPE_BIGINT) {
if(!dst) return result;
memcpy(dst, result, CHAR_BYTES + LONG_BYTES);
return dst;
}else if (*(char*)result == TSDB_DATA_TYPE_BOOL) {
if(!dst) return result;
memcpy(dst, result, CHAR_BYTES + CHAR_BYTES);
return dst;
}else {
assert(0);
}
return result;
}
void getJsonTagValueAll(void* data, void* dst, int16_t bytes) {
char* json = parseTagDatatoJson(data);
char* tagData = POINTER_SHIFT(dst, CHAR_BYTES);
*(char*)dst = TSDB_DATA_TYPE_JSON;
if(json == NULL){
setNull(tagData, TSDB_DATA_TYPE_JSON, 0);
return;
}
int32_t length = 0;
if(!taosMbsToUcs4(json, strlen(json), varDataVal(tagData), bytes - VARSTR_HEADER_SIZE - CHAR_BYTES, &length)){
tsdbError("getJsonTagValueAll mbstoucs4 error! length:%d", length);
}
varDataSetLen(tagData, length);
assert(varDataTLen(tagData) <= bytes);
tfree(json);
}
char* parseTagDatatoJson(void *p){
char* string = NULL;
cJSON *json = cJSON_CreateObject();
if (json == NULL)
{
goto end;
}
int16_t nCols = kvRowNCols(p);
ASSERT(nCols%2 == 1);
char tagJsonKey[TSDB_MAX_JSON_KEY_LEN + 1] = {0};
for (int j = 0; j < nCols; ++j) {
SColIdx * pColIdx = kvRowColIdxAt(p, j);
void* val = (kvRowColVal(p, pColIdx));
if (j == 0){
int8_t jsonPlaceHolder = *(int8_t*)val;
ASSERT(jsonPlaceHolder == TSDB_DATA_JSON_PLACEHOLDER);
continue;
}
if(j == 1){
uint32_t jsonNULL = *(uint32_t*)(varDataVal(val));
ASSERT(jsonNULL == TSDB_DATA_JSON_NULL);
continue;
}
if (j == 2){
if(*(uint32_t*)(varDataVal(val + CHAR_BYTES)) == TSDB_DATA_JSON_NULL) goto end;
continue;
}
if (j%2 == 1) { // json key encode by binary
ASSERT(varDataLen(val) <= TSDB_MAX_JSON_KEY_LEN);
memset(tagJsonKey, 0, sizeof(tagJsonKey));
memcpy(tagJsonKey, varDataVal(val), varDataLen(val));
}else{ // json value
char* realData = POINTER_SHIFT(val, CHAR_BYTES);
char type = *(char*)val;
if(type == TSDB_DATA_TYPE_BINARY) {
assert(*(uint32_t*)varDataVal(realData) == TSDB_DATA_JSON_null); // json null value
assert(varDataLen(realData) == INT_BYTES);
cJSON* value = cJSON_CreateNull();
if (value == NULL)
{
goto end;
}
cJSON_AddItemToObject(json, tagJsonKey, value);
}else if(type == TSDB_DATA_TYPE_NCHAR) {
cJSON* value = NULL;
if (varDataLen(realData) > 0){
char *tagJsonValue = calloc(varDataLen(realData), 1);
int32_t length = taosUcs4ToMbs(varDataVal(realData), varDataLen(realData), tagJsonValue);
if (length < 0) {
tsdbError("charset:%s to %s. val:%s convert json value failed.", DEFAULT_UNICODE_ENCODEC, tsCharset,
(char*)val);
free(tagJsonValue);
goto end;
}
value = cJSON_CreateString(tagJsonValue);
free(tagJsonValue);
if (value == NULL)
{
goto end;
}
}else if(varDataLen(realData) == 0){
value = cJSON_CreateString("");
}else{
assert(0);
}
cJSON_AddItemToObject(json, tagJsonKey, value);
}else if(type == TSDB_DATA_TYPE_DOUBLE){
double jsonVd = *(double*)(realData);
cJSON* value = cJSON_CreateNumber(jsonVd);
if (value == NULL)
{
goto end;
}
cJSON_AddItemToObject(json, tagJsonKey, value);
}else if(type == TSDB_DATA_TYPE_BIGINT){
int64_t jsonVd = *(int64_t*)(realData);
cJSON* value = cJSON_CreateNumber((double)jsonVd);
if (value == NULL)
{
goto end;
}
cJSON_AddItemToObject(json, tagJsonKey, value);
}else if (type == TSDB_DATA_TYPE_BOOL) {
char jsonVd = *(char*)(realData);
cJSON* value = cJSON_CreateBool(jsonVd);
if (value == NULL)
{
goto end;
}
cJSON_AddItemToObject(json, tagJsonKey, value);
}
else{
tsdbError("unsupportted json value");
}
}
}
string = cJSON_PrintUnformatted(json);
end:
cJSON_Delete(json);
return string;
}
// obtain queryHandle attribute
int64_t tsdbSkipOffset(TsdbQueryHandleT queryHandle) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*)queryHandle;
if (pQueryHandle) {
return pQueryHandle->srows;
}
return 0;
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbint.h"
#define TSDB_KEY_COL_OFFSET 0
static void tsdbResetReadTable(SReadH *pReadh);
static void tsdbResetReadFile(SReadH *pReadh);
static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols);
static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32_t len, int8_t comp, int numOfRows,
int maxPoints, char *buffer, int bufferSize);
static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, int16_t *colIds,
int numOfColIds);
static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol);
static int tsdbLoadBlockStatisFromDFile(SReadH *pReadh, SBlock *pBlock);
static int tsdbLoadBlockStatisFromAggr(SReadH *pReadh, SBlock *pBlock);
int tsdbInitReadH(SReadH *pReadh, STsdb *pRepo) {
ASSERT(pReadh != NULL && pRepo != NULL);
STsdbCfg *pCfg = REPO_CFG(pRepo);
memset((void *)pReadh, 0, sizeof(*pReadh));
pReadh->pRepo = pRepo;
TSDB_FSET_SET_CLOSED(TSDB_READ_FSET(pReadh));
pReadh->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx));
if (pReadh->aBlkIdx == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
pReadh->pDCols[0] = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pReadh->pDCols[0] == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyReadH(pReadh);
return -1;
}
pReadh->pDCols[1] = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pReadh->pDCols[1] == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyReadH(pReadh);
return -1;
}
return 0;
}
void tsdbDestroyReadH(SReadH *pReadh) {
if (pReadh == NULL) return;
pReadh->pExBuf = taosTZfree(pReadh->pExBuf);
pReadh->pCBuf = taosTZfree(pReadh->pCBuf);
pReadh->pBuf = taosTZfree(pReadh->pBuf);
pReadh->pDCols[0] = tdFreeDataCols(pReadh->pDCols[0]);
pReadh->pDCols[1] = tdFreeDataCols(pReadh->pDCols[1]);
pReadh->pAggrBlkData = taosTZfree(pReadh->pAggrBlkData);
pReadh->pBlkData = taosTZfree(pReadh->pBlkData);
pReadh->pBlkInfo = taosTZfree(pReadh->pBlkInfo);
pReadh->cidx = 0;
pReadh->pBlkIdx = NULL;
pReadh->pTable = NULL;
pReadh->aBlkIdx = taosArrayDestroy(pReadh->aBlkIdx);
tsdbCloseDFileSet(TSDB_READ_FSET(pReadh));
pReadh->pRepo = NULL;
}
int tsdbSetAndOpenReadFSet(SReadH *pReadh, SDFileSet *pSet) {
ASSERT(pSet != NULL);
tsdbResetReadFile(pReadh);
pReadh->rSet = *pSet;
TSDB_FSET_SET_CLOSED(TSDB_READ_FSET(pReadh));
if (tsdbOpenDFileSet(TSDB_READ_FSET(pReadh), O_RDONLY) < 0) {
tsdbError("vgId:%d failed to open file set %d since %s", TSDB_READ_REPO_ID(pReadh), TSDB_FSET_FID(pSet),
tstrerror(terrno));
return -1;
}
return 0;
}
void tsdbCloseAndUnsetFSet(SReadH *pReadh) { tsdbResetReadFile(pReadh); }
int tsdbLoadBlockIdx(SReadH *pReadh) {
SDFile * pHeadf = TSDB_READ_HEAD_FILE(pReadh);
SBlockIdx blkIdx;
ASSERT(taosArrayGetSize(pReadh->aBlkIdx) == 0);
// No data at all, just return
if (pHeadf->info.offset <= 0) return 0;
if (tsdbSeekDFile(pHeadf, pHeadf->info.offset, SEEK_SET) < 0) {
tsdbError("vgId:%d failed to load SBlockIdx part while seek file %s since %s, offset:%u len :%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pHeadf->info.offset,
pHeadf->info.len);
return -1;
}
if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pHeadf->info.len) < 0) return -1;
int64_t nread = tsdbReadDFile(pHeadf, TSDB_READ_BUF(pReadh), pHeadf->info.len);
if (nread < 0) {
tsdbError("vgId:%d failed to load SBlockIdx part while read file %s since %s, offset:%u len :%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pHeadf->info.offset,
pHeadf->info.len);
return -1;
}
if (nread < pHeadf->info.len) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d SBlockIdx part in file %s is corrupted, offset:%u expected bytes:%u read bytes: %" PRId64,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pHeadf->info.offset, pHeadf->info.len, nread);
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)TSDB_READ_BUF(pReadh), pHeadf->info.len)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d SBlockIdx part in file %s is corrupted since wrong checksum, offset:%u len :%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pHeadf->info.offset, pHeadf->info.len);
return -1;
}
void *ptr = TSDB_READ_BUF(pReadh);
int tsize = 0;
while (POINTER_DISTANCE(ptr, TSDB_READ_BUF(pReadh)) < (pHeadf->info.len - sizeof(TSCKSUM))) {
ptr = tsdbDecodeSBlockIdx(ptr, &blkIdx);
ASSERT(ptr != NULL);
if (taosArrayPush(pReadh->aBlkIdx, (void *)(&blkIdx)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
tsize++;
ASSERT(tsize == 1 || ((SBlockIdx *)taosArrayGet(pReadh->aBlkIdx, tsize - 2))->tid <
((SBlockIdx *)taosArrayGet(pReadh->aBlkIdx, tsize - 1))->tid);
}
return 0;
}
int tsdbSetReadTable(SReadH *pReadh, STable *pTable) {
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1);
pReadh->pTable = pTable;
if (tdInitDataCols(pReadh->pDCols[0], pSchema) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (tdInitDataCols(pReadh->pDCols[1], pSchema) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
size_t size = taosArrayGetSize(pReadh->aBlkIdx);
if (size > 0) {
while (true) {
if (pReadh->cidx >= size) {
pReadh->pBlkIdx = NULL;
break;
}
SBlockIdx *pBlkIdx = taosArrayGet(pReadh->aBlkIdx, pReadh->cidx);
if (pBlkIdx->tid == TABLE_TID(pTable)) {
if (pBlkIdx->uid == TABLE_UID(pTable)) {
pReadh->pBlkIdx = pBlkIdx;
} else {
pReadh->pBlkIdx = NULL;
}
pReadh->cidx++;
break;
} else if (pBlkIdx->tid > TABLE_TID(pTable)) {
pReadh->pBlkIdx = NULL;
break;
} else {
pReadh->cidx++;
}
}
} else {
pReadh->pBlkIdx = NULL;
}
return 0;
}
#if 0
int tsdbLoadBlockInfo(SReadH *pReadh, void *pTarget) {
ASSERT(pReadh->pBlkIdx != NULL);
SDFile * pHeadf = TSDB_READ_HEAD_FILE(pReadh);
SBlockIdx *pBlkIdx = pReadh->pBlkIdx;
if (tsdbSeekDFile(pHeadf, pBlkIdx->offset, SEEK_SET) < 0) {
tsdbError("vgId:%d failed to load SBlockInfo part while seek file %s since %s, offset:%u len:%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pBlkIdx->offset, pBlkIdx->len);
return -1;
}
if (tsdbMakeRoom((void **)(&(pReadh->pBlkInfo)), pBlkIdx->len) < 0) return -1;
int64_t nread = tsdbReadDFile(pHeadf, (void *)(pReadh->pBlkInfo), pBlkIdx->len);
if (nread < 0) {
tsdbError("vgId:%d failed to load SBlockInfo part while read file %s since %s, offset:%u len :%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pBlkIdx->offset, pBlkIdx->len);
return -1;
}
if (nread < pBlkIdx->len) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d SBlockInfo part in file %s is corrupted, offset:%u expected bytes:%u read bytes:%" PRId64,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len, nread);
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)(pReadh->pBlkInfo), pBlkIdx->len)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d SBlockInfo part in file %s is corrupted since wrong checksum, offset:%u len :%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len);
return -1;
}
ASSERT(pBlkIdx->tid == pReadh->pBlkInfo->tid && pBlkIdx->uid == pReadh->pBlkInfo->uid);
if (pTarget) {
memcpy(pTarget, (void *)(pReadh->pBlkInfo), pBlkIdx->len);
}
return 0;
}
#endif
static FORCE_INLINE int32_t tsdbGetSBlockVer(int32_t fver) {
switch (fver) {
case TSDB_FS_VER_0:
return TSDB_SBLK_VER_0;
case TSDB_FS_VER_1:
return TSDB_SBLK_VER_1;
default:
return SBlockVerLatest;
}
}
static FORCE_INLINE size_t tsdbSizeOfSBlock(int32_t sBlkVer) {
switch (sBlkVer) {
case TSDB_SBLK_VER_0:
return sizeof(SBlockV0);
case TSDB_SBLK_VER_1:
return sizeof(SBlockV1);
default:
return sizeof(SBlock);
}
}
static int tsdbSBlkInfoRefactor(SDFile *pHeadf, SBlockInfo **pDstBlkInfo, SBlockIdx *pBlkIdx, uint32_t *dstBlkInfoLen) {
int sBlkVer = tsdbGetSBlockVer(pHeadf->info.fver);
if (sBlkVer > TSDB_SBLK_VER_0) {
*dstBlkInfoLen = pBlkIdx->len;
return TSDB_CODE_SUCCESS;
}
size_t originBlkSize = tsdbSizeOfSBlock(sBlkVer);
size_t nBlks = (pBlkIdx->len - sizeof(SBlockInfo)) / originBlkSize;
*dstBlkInfoLen = (uint32_t)(sizeof(SBlockInfo) + nBlks * sizeof(SBlock));
if (pBlkIdx->len == *dstBlkInfoLen) {
return TSDB_CODE_SUCCESS;
}
ASSERT(*dstBlkInfoLen >= pBlkIdx->len);
SBlockInfo *tmpBlkInfo = NULL;
if (tsdbMakeRoom((void **)(&tmpBlkInfo), *dstBlkInfoLen) < 0) return -1;
memset(tmpBlkInfo, 0, *dstBlkInfoLen); // the blkVer is set to 0
memcpy(tmpBlkInfo, *pDstBlkInfo, sizeof(SBlockInfo)); // copy header
uint32_t nSubBlks = 0;
for (int i = 0; i < nBlks; ++i) {
SBlock *tmpBlk = tmpBlkInfo->blocks + i;
memcpy(tmpBlk, POINTER_SHIFT((*pDstBlkInfo)->blocks, i * originBlkSize), originBlkSize);
if (i < pBlkIdx->numOfBlocks) { // super blocks
if (tmpBlk->numOfSubBlocks > 1) { // has sub blocks
tmpBlk->offset = sizeof(SBlockInfo) + (pBlkIdx->numOfBlocks + nSubBlks) * sizeof(SBlock);
nSubBlks += tmpBlk->numOfSubBlocks;
}
}
// TODO: update the fields if the SBlock definition change later
}
taosTZfree(*pDstBlkInfo);
*pDstBlkInfo = tmpBlkInfo;
return TSDB_CODE_SUCCESS;
}
int tsdbLoadBlockInfo(SReadH *pReadh, void **pTarget, uint32_t *extendedLen) {
ASSERT(pReadh->pBlkIdx != NULL);
SDFile * pHeadf = TSDB_READ_HEAD_FILE(pReadh);
SBlockIdx * pBlkIdx = pReadh->pBlkIdx;
if (tsdbSeekDFile(pHeadf, pBlkIdx->offset, SEEK_SET) < 0) {
tsdbError("vgId:%d failed to load SBlockInfo part while seek file %s since %s, offset:%u len:%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pBlkIdx->offset, pBlkIdx->len);
return -1;
}
if (tsdbMakeRoom((void **)(&pReadh->pBlkInfo), pBlkIdx->len) < 0) return -1;
int64_t nread = tsdbReadDFile(pHeadf, (void *)(pReadh->pBlkInfo), pBlkIdx->len);
if (nread < 0) {
tsdbError("vgId:%d failed to load SBlockInfo part while read file %s since %s, offset:%u len :%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pBlkIdx->offset, pBlkIdx->len);
return -1;
}
if (nread < pBlkIdx->len) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d SBlockInfo part in file %s is corrupted, offset:%u expected bytes:%u read bytes:%" PRId64,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len, nread);
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)(pReadh->pBlkInfo), pBlkIdx->len)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d SBlockInfo part in file %s is corrupted since wrong checksum, offset:%u len :%u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len);
return -1;
}
ASSERT(pBlkIdx->tid == pReadh->pBlkInfo->tid && pBlkIdx->uid == pReadh->pBlkInfo->uid);
uint32_t dstBlkInfoLen = 0;
if (tsdbSBlkInfoRefactor(pHeadf, &(pReadh->pBlkInfo), pBlkIdx, &dstBlkInfoLen) < 0) {
return -1;
}
if (extendedLen != NULL) {
if (pTarget != NULL) {
if (*extendedLen < dstBlkInfoLen) {
char *t = realloc(*pTarget, dstBlkInfoLen);
if (t == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
*pTarget = t;
}
memcpy(*pTarget, (void *)(pReadh->pBlkInfo), dstBlkInfoLen);
}
*extendedLen = dstBlkInfoLen;
}
return TSDB_CODE_SUCCESS;
}
int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) {
ASSERT(pBlock->numOfSubBlocks > 0);
int8_t update = pReadh->pRepo->config.update;
SBlock *iBlock = pBlock;
if (pBlock->numOfSubBlocks > 1) {
if (pBlkInfo) {
iBlock = (SBlock *)POINTER_SHIFT(pBlkInfo, pBlock->offset);
} else {
iBlock = (SBlock *)POINTER_SHIFT(pReadh->pBlkInfo, pBlock->offset);
}
}
if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[0]) < 0) return -1;
for (int i = 1; i < pBlock->numOfSubBlocks; i++) {
iBlock++;
if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[1]) < 0) return -1;
if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, update != TD_ROW_PARTIAL_UPDATE) < 0) return -1;
}
ASSERT(pReadh->pDCols[0]->numOfRows == pBlock->numOfRows);
ASSERT(dataColsKeyFirst(pReadh->pDCols[0]) == pBlock->keyFirst);
ASSERT(dataColsKeyLast(pReadh->pDCols[0]) == pBlock->keyLast);
return 0;
}
int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, int16_t *colIds, int numOfColsIds) {
ASSERT(pBlock->numOfSubBlocks > 0);
int8_t update = pReadh->pRepo->config.update;
SBlock *iBlock = pBlock;
if (pBlock->numOfSubBlocks > 1) {
if (pBlkInfo) {
iBlock = POINTER_SHIFT(pBlkInfo, pBlock->offset);
} else {
iBlock = POINTER_SHIFT(pReadh->pBlkInfo, pBlock->offset);
}
}
if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[0], colIds, numOfColsIds) < 0) return -1;
for (int i = 1; i < pBlock->numOfSubBlocks; i++) {
iBlock++;
if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[1], colIds, numOfColsIds) < 0) return -1;
if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, update != TD_ROW_PARTIAL_UPDATE) < 0) return -1;
}
ASSERT(pReadh->pDCols[0]->numOfRows == pBlock->numOfRows);
ASSERT(dataColsKeyFirst(pReadh->pDCols[0]) == pBlock->keyFirst);
ASSERT(dataColsKeyLast(pReadh->pDCols[0]) == pBlock->keyLast);
return 0;
}
static int tsdbLoadBlockStatisFromDFile(SReadH *pReadh, SBlock *pBlock) {
SDFile *pDFile = (pBlock->last) ? TSDB_READ_LAST_FILE(pReadh) : TSDB_READ_DATA_FILE(pReadh);
if (tsdbSeekDFile(pDFile, pBlock->offset, SEEK_SET) < 0) {
tsdbError("vgId:%d failed to load block statis part while seek file %s to offset %" PRId64 " since %s",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, tstrerror(terrno));
return -1;
}
size_t size = tsdbBlockStatisSize(pBlock->numOfCols, (uint32_t)pBlock->blkVer);
if (tsdbMakeRoom((void **)(&(pReadh->pBlkData)), size) < 0) return -1;
int64_t nread = tsdbReadDFile(pDFile, (void *)(pReadh->pBlkData), size);
if (nread < 0) {
tsdbError("vgId:%d failed to load block statis part while read file %s since %s, offset:%" PRId64 " len :%" PRIzu,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno), (int64_t)pBlock->offset, size);
return -1;
}
if (nread < size) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d block statis part in file %s is corrupted, offset:%" PRId64 " expected bytes:%" PRIzu
" read bytes: %" PRId64,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, size, nread);
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)(pReadh->pBlkData), (uint32_t)size)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d block statis part in file %s is corrupted since wrong checksum, offset:%" PRId64 " len :%" PRIzu,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, size);
return -1;
}
return 0;
}
static int tsdbLoadBlockStatisFromAggr(SReadH *pReadh, SBlock *pBlock) {
ASSERT((pBlock->blkVer > TSDB_SBLK_VER_0) && (pBlock->aggrStat)); // TODO: remove after pass all the test
SDFile *pDFileAggr = pBlock->last ? TSDB_READ_SMAL_FILE(pReadh) : TSDB_READ_SMAD_FILE(pReadh);
if (tsdbSeekDFile(pDFileAggr, pBlock->aggrOffset, SEEK_SET) < 0) {
tsdbError("vgId:%d failed to load block aggr part while seek file %s to offset %" PRIu64 " since %s",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFileAggr), (uint64_t)pBlock->aggrOffset,
tstrerror(terrno));
return -1;
}
size_t sizeAggr = tsdbBlockAggrSize(pBlock->numOfCols, (uint32_t)pBlock->blkVer);
if (tsdbMakeRoom((void **)(&(pReadh->pAggrBlkData)), sizeAggr) < 0) return -1;
int64_t nreadAggr = tsdbReadDFile(pDFileAggr, (void *)(pReadh->pAggrBlkData), sizeAggr);
if (nreadAggr < 0) {
tsdbError("vgId:%d failed to load block aggr part while read file %s since %s, offset:%" PRIu64 " len :%" PRIzu,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFileAggr), tstrerror(terrno),
(uint64_t)pBlock->aggrOffset, sizeAggr);
return -1;
}
if (nreadAggr < sizeAggr) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d block aggr part in file %s is corrupted, offset:%" PRIu64 " expected bytes:%" PRIzu
" read bytes: %" PRId64,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFileAggr), (uint64_t)pBlock->aggrOffset, sizeAggr,
nreadAggr);
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)(pReadh->pAggrBlkData), (uint32_t)sizeAggr)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d block aggr part in file %s is corrupted since wrong checksum, offset:%" PRIu64 " len :%" PRIzu,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFileAggr), (uint64_t)pBlock->aggrOffset, sizeAggr);
return -1;
}
return 0;
}
int tsdbLoadBlockStatis(SReadH *pReadh, SBlock *pBlock) {
ASSERT(pBlock->numOfSubBlocks <= 1);
if (pBlock->blkVer > TSDB_SBLK_VER_0) {
if (pBlock->aggrStat) {
return tsdbLoadBlockStatisFromAggr(pReadh, pBlock);
}
return 1;
}
return tsdbLoadBlockStatisFromDFile(pReadh, pBlock);
}
int tsdbLoadBlockOffset(SReadH *pReadh, SBlock *pBlock) {
ASSERT(pBlock->numOfSubBlocks <= 1);
return tsdbLoadBlockStatisFromDFile(pReadh, pBlock);
}
int tsdbEncodeSBlockIdx(void **buf, SBlockIdx *pIdx) {
int tlen = 0;
tlen += taosEncodeVariantI32(buf, pIdx->tid);
tlen += taosEncodeVariantU32(buf, pIdx->len);
tlen += taosEncodeVariantU32(buf, pIdx->offset);
tlen += taosEncodeFixedU8(buf, pIdx->hasLast);
tlen += taosEncodeVariantU32(buf, pIdx->numOfBlocks);
tlen += taosEncodeFixedU64(buf, pIdx->uid);
tlen += taosEncodeFixedU64(buf, pIdx->maxKey);
return tlen;
}
void *tsdbDecodeSBlockIdx(void *buf, SBlockIdx *pIdx) {
uint8_t hasLast = 0;
uint32_t numOfBlocks = 0;
uint64_t value = 0;
if ((buf = taosDecodeVariantI32(buf, &(pIdx->tid))) == NULL) return NULL;
if ((buf = taosDecodeVariantU32(buf, &(pIdx->len))) == NULL) return NULL;
if ((buf = taosDecodeVariantU32(buf, &(pIdx->offset))) == NULL) return NULL;
if ((buf = taosDecodeFixedU8(buf, &(hasLast))) == NULL) return NULL;
pIdx->hasLast = hasLast;
if ((buf = taosDecodeVariantU32(buf, &(numOfBlocks))) == NULL) return NULL;
pIdx->numOfBlocks = numOfBlocks;
if ((buf = taosDecodeFixedU64(buf, &value)) == NULL) return NULL;
pIdx->uid = (int64_t)value;
if ((buf = taosDecodeFixedU64(buf, &value)) == NULL) return NULL;
pIdx->maxKey = (TSKEY)value;
return buf;
}
void tsdbGetBlockStatis(SReadH *pReadh, SDataStatis *pStatis, int numOfCols, SBlock *pBlock) {
if (pBlock->blkVer == TSDB_SBLK_VER_0) {
SBlockData *pBlockData = pReadh->pBlkData;
for (int i = 0, j = 0; i < numOfCols;) {
if (j >= pBlockData->numOfCols) {
pStatis[i].numOfNull = -1;
i++;
continue;
}
SBlockColV0 *pSBlkCol = ((SBlockColV0 *)(pBlockData->cols)) + j;
if (pStatis[i].colId == pSBlkCol->colId) {
pStatis[i].sum = pSBlkCol->sum;
pStatis[i].max = pSBlkCol->max;
pStatis[i].min = pSBlkCol->min;
pStatis[i].maxIndex = pSBlkCol->maxIndex;
pStatis[i].minIndex = pSBlkCol->minIndex;
pStatis[i].numOfNull = pSBlkCol->numOfNull;
i++;
j++;
} else if (pStatis[i].colId < pSBlkCol->colId) {
pStatis[i].numOfNull = -1;
i++;
} else {
j++;
}
}
} else if (pBlock->aggrStat) {
SAggrBlkData *pAggrBlkData = pReadh->pAggrBlkData;
for (int i = 0, j = 0; i < numOfCols;) {
if (j >= pBlock->numOfCols) {
pStatis[i].numOfNull = -1;
i++;
continue;
}
SAggrBlkCol *pAggrBlkCol = ((SAggrBlkCol *)(pAggrBlkData)) + j;
if (pStatis[i].colId == pAggrBlkCol->colId) {
pStatis[i].sum = pAggrBlkCol->sum;
pStatis[i].max = pAggrBlkCol->max;
pStatis[i].min = pAggrBlkCol->min;
pStatis[i].maxIndex = pAggrBlkCol->maxIndex;
pStatis[i].minIndex = pAggrBlkCol->minIndex;
pStatis[i].numOfNull = pAggrBlkCol->numOfNull;
i++;
j++;
} else if (pStatis[i].colId < pAggrBlkCol->colId) {
pStatis[i].numOfNull = -1;
i++;
} else {
j++;
}
}
}
}
static void tsdbResetReadTable(SReadH *pReadh) {
tdResetDataCols(pReadh->pDCols[0]);
tdResetDataCols(pReadh->pDCols[1]);
pReadh->cidx = 0;
pReadh->pBlkIdx = NULL;
pReadh->pTable = NULL;
}
static void tsdbResetReadFile(SReadH *pReadh) {
tsdbResetReadTable(pReadh);
taosArrayClear(pReadh->aBlkIdx);
tsdbCloseDFileSet(TSDB_READ_FSET(pReadh));
}
static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols) {
ASSERT(pBlock->numOfSubBlocks == 0 || pBlock->numOfSubBlocks == 1);
SDFile *pDFile = (pBlock->last) ? TSDB_READ_LAST_FILE(pReadh) : TSDB_READ_DATA_FILE(pReadh);
tdResetDataCols(pDataCols);
if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pBlock->len) < 0) return -1;
SBlockData *pBlockData = (SBlockData *)TSDB_READ_BUF(pReadh);
if (tsdbSeekDFile(pDFile, pBlock->offset, SEEK_SET) < 0) {
tsdbError("vgId:%d failed to load block data part while seek file %s to offset %" PRId64 " since %s",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, tstrerror(terrno));
return -1;
}
int64_t nread = tsdbReadDFile(pDFile, TSDB_READ_BUF(pReadh), pBlock->len);
if (nread < 0) {
tsdbError("vgId:%d failed to load block data part while read file %s since %s, offset:%" PRId64 " len :%d",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno), (int64_t)pBlock->offset,
pBlock->len);
return -1;
}
if (nread < pBlock->len) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d block data part in file %s is corrupted, offset:%" PRId64
" expected bytes:%d read bytes: %" PRId64,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, pBlock->len, nread);
return -1;
}
int32_t tsize = (int32_t)tsdbBlockStatisSize(pBlock->numOfCols, (uint32_t)pBlock->blkVer);
if (!taosCheckChecksumWhole((uint8_t *)TSDB_READ_BUF(pReadh), tsize)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d block statis part in file %s is corrupted since wrong checksum, offset:%" PRId64 " len :%d",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, tsize);
return -1;
}
ASSERT(tsize < pBlock->len);
ASSERT(pBlockData->numOfCols == pBlock->numOfCols);
pDataCols->numOfRows = pBlock->numOfRows;
// Recover the data
int ccol = 0; // loop iter for SBlockCol object
int dcol = 0; // loop iter for SDataCols object
SBlockCol blockCol = {0};
SBlockCol *pBlockCol = &blockCol;
while (dcol < pDataCols->numOfCols) {
SDataCol *pDataCol = &(pDataCols->cols[dcol]);
if (dcol != 0 && ccol >= pBlockData->numOfCols) {
// Set current column as NULL and forward
dataColReset(pDataCol);
dcol++;
continue;
}
int16_t tcolId = 0;
uint32_t toffset = TSDB_KEY_COL_OFFSET;
int32_t tlen = pBlock->keyLen;
if (dcol != 0) {
tsdbGetSBlockCol(pBlock, &pBlockCol, pBlockData->cols, ccol);
tcolId = pBlockCol->colId;
toffset = tsdbGetBlockColOffset(pBlockCol);
tlen = pBlockCol->len;
} else {
ASSERT(pDataCol->colId == tcolId);
}
if (tcolId == pDataCol->colId) {
if (pBlock->algorithm == TWO_STAGE_COMP) {
int zsize = pDataCol->bytes * pBlock->numOfRows + COMP_OVERFLOW_BYTES;
if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), zsize) < 0) return -1;
}
if (tsdbCheckAndDecodeColumnData(pDataCol, POINTER_SHIFT(pBlockData, tsize + toffset), tlen, pBlock->algorithm,
pBlock->numOfRows, pDataCols->maxPoints, TSDB_READ_COMP_BUF(pReadh),
(int)taosTSizeof(TSDB_READ_COMP_BUF(pReadh))) < 0) {
tsdbError("vgId:%d file %s is broken at column %d block offset %" PRId64 " column offset %u",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tcolId, (int64_t)pBlock->offset, toffset);
return -1;
}
if (dcol != 0) {
ccol++;
}
dcol++;
} else if (tcolId < pDataCol->colId) {
ccol++;
} else {
// Set current column as NULL and forward
dataColReset(pDataCol);
dcol++;
}
}
return 0;
}
static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32_t len, int8_t comp, int numOfRows,
int maxPoints, char *buffer, int bufferSize) {
if (!taosCheckChecksumWhole((uint8_t *)content, len)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1;
}
tdAllocMemForCol(pDataCol, maxPoints);
// Decode the data
if (comp) {
// Need to decompress
int tlen = (*(tDataTypes[pDataCol->type].decompFunc))(content, len - sizeof(TSCKSUM), numOfRows, pDataCol->pData,
pDataCol->spaceSize, comp, buffer, bufferSize);
if (tlen <= 0) {
tsdbError("Failed to decompress column, file corrupted, len:%d comp:%d numOfRows:%d maxPoints:%d bufferSize:%d",
len, comp, numOfRows, maxPoints, bufferSize);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1;
}
pDataCol->len = tlen;
} else {
// No need to decompress, just memcpy it
pDataCol->len = len - sizeof(TSCKSUM);
memcpy(pDataCol->pData, content, pDataCol->len);
}
if (IS_VAR_DATA_TYPE(pDataCol->type)) {
dataColSetOffset(pDataCol, numOfRows);
}
return 0;
}
static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, int16_t *colIds,
int numOfColIds) {
ASSERT(pBlock->numOfSubBlocks == 0 || pBlock->numOfSubBlocks == 1);
ASSERT(colIds[0] == 0);
SDFile * pDFile = (pBlock->last) ? TSDB_READ_LAST_FILE(pReadh) : TSDB_READ_DATA_FILE(pReadh);
SBlockCol blockCol = {0};
tdResetDataCols(pDataCols);
// If only load timestamp column, no need to load SBlockData part
if (numOfColIds > 1 && tsdbLoadBlockOffset(pReadh, pBlock) < 0) return -1;
pDataCols->numOfRows = pBlock->numOfRows;
int dcol = 0;
int ccol = 0;
for (int i = 0; i < numOfColIds; i++) {
int16_t colId = colIds[i];
SDataCol * pDataCol = NULL;
SBlockCol *pBlockCol = NULL;
while (true) {
if (dcol >= pDataCols->numOfCols) {
pDataCol = NULL;
break;
}
pDataCol = &pDataCols->cols[dcol];
if (pDataCol->colId > colId) {
pDataCol = NULL;
break;
} else {
dcol++;
if (pDataCol->colId == colId) break;
}
}
if (pDataCol == NULL) continue;
ASSERT(pDataCol->colId == colId);
if (colId == 0) { // load the key row
blockCol.colId = colId;
blockCol.len = pBlock->keyLen;
blockCol.type = pDataCol->type;
blockCol.offset = TSDB_KEY_COL_OFFSET;
pBlockCol = &blockCol;
} else { // load non-key rows
while (true) {
if (ccol >= pBlock->numOfCols) {
pBlockCol = NULL;
break;
}
pBlockCol = &blockCol;
tsdbGetSBlockCol(pBlock, &pBlockCol, pReadh->pBlkData->cols, ccol);
if (pBlockCol->colId > colId) {
pBlockCol = NULL;
break;
} else {
ccol++;
if (pBlockCol->colId == colId) break;
}
}
if (pBlockCol == NULL) {
dataColReset(pDataCol);
continue;
}
ASSERT(pBlockCol->colId == pDataCol->colId);
}
if (tsdbLoadColData(pReadh, pDFile, pBlock, pBlockCol, pDataCol) < 0) return -1;
}
return 0;
}
static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol) {
ASSERT(pDataCol->colId == pBlockCol->colId);
STsdb *pRepo = TSDB_READ_REPO(pReadh);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int tsize = pDataCol->bytes * pBlock->numOfRows + COMP_OVERFLOW_BYTES;
if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pBlockCol->len) < 0) return -1;
if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), tsize) < 0) return -1;
int64_t offset = pBlock->offset + tsdbBlockStatisSize(pBlock->numOfCols, (uint32_t)pBlock->blkVer) +
tsdbGetBlockColOffset(pBlockCol);
if (tsdbSeekDFile(pDFile, offset, SEEK_SET) < 0) {
tsdbError("vgId:%d failed to load block column data while seek file %s to offset %" PRId64 " since %s",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), offset, tstrerror(terrno));
return -1;
}
int64_t nread = tsdbReadDFile(pDFile, TSDB_READ_BUF(pReadh), pBlockCol->len);
if (nread < 0) {
tsdbError("vgId:%d failed to load block column data while read file %s since %s, offset:%" PRId64 " len :%d",
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno), offset, pBlockCol->len);
return -1;
}
if (nread < pBlockCol->len) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
tsdbError("vgId:%d block column data in file %s is corrupted, offset:%" PRId64 " expected bytes:%d" PRIzu
" read bytes: %" PRId64,
TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), offset, pBlockCol->len, nread);
return -1;
}
if (tsdbCheckAndDecodeColumnData(pDataCol, pReadh->pBuf, pBlockCol->len, pBlock->algorithm, pBlock->numOfRows,
pCfg->maxRowsPerFileBlock, pReadh->pCBuf, (int32_t)taosTSizeof(pReadh->pCBuf)) < 0) {
tsdbError("vgId:%d file %s is broken at column %d offset %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile),
pBlockCol->colId, offset);
return -1;
}
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
\ No newline at end of file
AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST)
add_executable(tsdbTests ${SOURCE_LIST})
target_link_libraries(tsdbTests gtest gtest_main pthread common tsdb tutil trpc)
add_test(NAME unit COMMAND ${CMAKE_CURRENT_BINARY_DIR}/tsdbTests)
\ No newline at end of file
#include <gtest/gtest.h>
#include <stdlib.h>
#include <sys/time.h>
#include "tsdb.h"
#include "tsdbMain.h"
static double getCurTime() {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec * 1E-6;
}
typedef struct {
STsdbRepo *pRepo;
bool isAscend;
int tid;
uint64_t uid;
int sversion;
TSKEY startTime;
TSKEY interval;
int totalRows;
int rowsPerSubmit;
STSchema * pSchema;
} SInsertInfo;
static int insertData(SInsertInfo *pInfo) {
SSubmitMsg *pMsg =
(SSubmitMsg *)malloc(sizeof(SSubmitMsg) + sizeof(SSubmitBlk) + dataRowMaxBytesFromSchema(pInfo->pSchema) * pInfo->rowsPerSubmit);
if (pMsg == NULL) return -1;
TSKEY start_time = pInfo->startTime;
// Loop to write data
double stime = getCurTime();
for (int k = 0; k < pInfo->totalRows/pInfo->rowsPerSubmit; k++) {
memset((void *)pMsg, 0, sizeof(SSubmitMsg));
SSubmitBlk *pBlock = (SSubmitBlk *)pMsg->blocks;
pBlock->uid = pInfo->uid;
pBlock->tid = pInfo->tid;
pBlock->sversion = pInfo->sversion;
pBlock->dataLen = 0;
pBlock->schemaLen = 0;
pBlock->numOfRows = 0;
for (int i = 0; i < pInfo->rowsPerSubmit; i++) {
// start_time += 1000;
if (pInfo->isAscend) {
start_time += pInfo->interval;
} else {
start_time -= pInfo->interval;
}
SDataRow row = (SDataRow)(pBlock->data + pBlock->dataLen);
tdInitDataRow(row, pInfo->pSchema);
for (int j = 0; j < schemaNCols(pInfo->pSchema); j++) {
STColumn *pTCol = schemaColAt(pInfo->pSchema, j);
if (j == 0) { // Just for timestamp
tdAppendColVal(row, (void *)(&start_time), pTCol->type, pTCol->offset);
} else { // For int
int val = 10;
tdAppendColVal(row, (void *)(&val), pTCol->type, pTCol->offset);
}
}
pBlock->dataLen += dataRowLen(row);
pBlock->numOfRows++;
}
pMsg->length = sizeof(SSubmitMsg) + sizeof(SSubmitBlk) + pBlock->dataLen;
pMsg->numOfBlocks = 1;
pBlock->dataLen = htonl(pBlock->dataLen);
pBlock->numOfRows = htonl(pBlock->numOfRows);
pBlock->schemaLen = htonl(pBlock->schemaLen);
pBlock->uid = htobe64(pBlock->uid);
pBlock->tid = htonl(pBlock->tid);
pBlock->sversion = htonl(pBlock->sversion);
pBlock->padding = htonl(pBlock->padding);
pMsg->length = htonl(pMsg->length);
pMsg->numOfBlocks = htonl(pMsg->numOfBlocks);
if (tsdbInsertData(pInfo->pRepo, pMsg, NULL) < 0) {
tfree(pMsg);
return -1;
}
}
double etime = getCurTime();
printf("Spent %f seconds to write %d records\n", etime - stime, pInfo->totalRows);
tfree(pMsg);
return 0;
}
static void tsdbSetCfg(STsdbCfg *pCfg, int32_t tsdbId, int32_t cacheBlockSize, int32_t totalBlocks, int32_t maxTables,
int32_t daysPerFile, int32_t keep, int32_t minRows, int32_t maxRows, int8_t precision,
int8_t compression) {
pCfg->tsdbId = tsdbId;
pCfg->cacheBlockSize = cacheBlockSize;
pCfg->totalBlocks = totalBlocks;
// pCfg->maxTables = maxTables;
pCfg->daysPerFile = daysPerFile;
pCfg->keep = keep;
pCfg->minRowsPerFileBlock = minRows;
pCfg->maxRowsPerFileBlock = maxRows;
pCfg->precision = precision;
pCfg->compression = compression;
}
static void tsdbSetTableCfg(STableCfg *pCfg) {
STSchemaBuilder schemaBuilder = {0};
pCfg->type = TSDB_NORMAL_TABLE;
pCfg->superUid = TSDB_INVALID_SUPER_TABLE_ID;
pCfg->tableId.tid = 1;
pCfg->tableId.uid = 5849583783847394;
tdInitTSchemaBuilder(&schemaBuilder, 0);
int colId = 0;
for (int i = 0; i < 5; i++) {
tdAddColToSchema(&schemaBuilder, (colId == 0) ? TSDB_DATA_TYPE_TIMESTAMP : TSDB_DATA_TYPE_INT, colId, 0);
colId++;
}
pCfg->schema = tdGetSchemaFromBuilder(&schemaBuilder);
pCfg->name = strdup("t1");
tdDestroyTSchemaBuilder(&schemaBuilder);
}
TEST(TsdbTest, testInsertSpeed) {
int vnode = 1;
int ret = 0;
STsdbCfg tsdbCfg;
STableCfg tableCfg;
std::string testDir = "./test";
char * rootDir = strdup((testDir + "/vnode" + std::to_string(vnode)).c_str());
tsdbDebugFlag = 131; //NOTE: you must set the flag
taosRemoveDir(rootDir);
// Create and open repository
tsdbSetCfg(&tsdbCfg, 1, 16, 4, -1, -1, -1, -1, -1, -1, -1);
tsdbCreateRepo(rootDir, &tsdbCfg);
STsdbRepo *repo = tsdbOpenRepo(rootDir, NULL);
ASSERT_NE(repo, nullptr);
// Create table
tsdbSetTableCfg(&tableCfg);
tsdbCreateTable(repo, &tableCfg);
// Insert data
SInsertInfo iInfo = {repo, true, 1, 5849583783847394, 0, 1590000000000, 10, 10000000, 100, tableCfg.schema};
insertData(&iInfo);
tsdbCloseRepo(repo, 1);
}
static char *getTKey(const void *data) {
return (char *)data;
}
\ No newline at end of file
......@@ -112,6 +112,9 @@ public:
int32_t catalogGetTableHashVgroup(const SName* pTableName, SVgroupInfo* vgInfo) const {
// todo
vgInfo->vgId = 1;
vgInfo->numOfEps = 1;
vgInfo->epAddr[0].port = 6030;
strcpy(vgInfo->epAddr[0].fqdn, "node1");
return 0;
}
......
......@@ -234,7 +234,7 @@ static SSubplan* initSubplan(SPlanContext* pCxt, int32_t type) {
static void vgroupInfoToEpSet(const SVgroupInfo* vg, SQueryNodeAddr* execNode) {
execNode->nodeId = vg->vgId;
execNode->inUse = 0; // todo
execNode->inUse = vg->inUse;
execNode->numOfEps = vg->numOfEps;
for (int8_t i = 0; i < vg->numOfEps; ++i) {
execNode->epAddr[i] = vg->epAddr[i];
......
......@@ -62,7 +62,7 @@ static bool fromObjectWithAlloc(const cJSON* json, const char* name, FFromJson f
return func(jObj, *obj);
}
static bool addArray(cJSON* json, const char* name, FToJson func, const SArray* array) {
static bool addTarray(cJSON* json, const char* name, FToJson func, const SArray* array, bool isPoint) {
size_t size = (NULL == array) ? 0 : taosArrayGetSize(array);
if (size > 0) {
cJSON* jArray = cJSON_AddArrayToObject(json, name);
......@@ -70,7 +70,7 @@ static bool addArray(cJSON* json, const char* name, FToJson func, const SArray*
return false;
}
for (size_t i = 0; i < size; ++i) {
if (!addItem(jArray, func, taosArrayGetP(array, i))) {
if (!addItem(jArray, func, isPoint ? taosArrayGetP(array, i) : taosArrayGet(array, i))) {
return false;
}
}
......@@ -78,11 +78,19 @@ static bool addArray(cJSON* json, const char* name, FToJson func, const SArray*
return true;
}
static bool fromArray(const cJSON* json, const char* name, FFromJson func, SArray** array, int32_t itemSize) {
static bool addInlineArray(cJSON* json, const char* name, FToJson func, const SArray* array) {
return addTarray(json, name, func, array, false);
}
static bool addArray(cJSON* json, const char* name, FToJson func, const SArray* array) {
return addTarray(json, name, func, array, true);
}
static bool fromTarray(const cJSON* json, const char* name, FFromJson func, SArray** array, int32_t itemSize, bool isPoint) {
const cJSON* jArray = cJSON_GetObjectItem(json, name);
int32_t size = (NULL == jArray ? 0 : cJSON_GetArraySize(jArray));
if (size > 0) {
*array = taosArrayInit(size, POINTER_BYTES);
*array = taosArrayInit(size, isPoint ? POINTER_BYTES : itemSize);
if (NULL == *array) {
return false;
}
......@@ -92,11 +100,19 @@ static bool fromArray(const cJSON* json, const char* name, FFromJson func, SArra
if (NULL == item || !func(cJSON_GetArrayItem(jArray, i), item)) {
return false;
}
taosArrayPush(*array, &item);
taosArrayPush(*array, isPoint ? &item : item);
}
return true;
}
static bool fromInlineArray(const cJSON* json, const char* name, FFromJson func, SArray** array, int32_t itemSize) {
return fromTarray(json, name, func, array, itemSize, false);
}
static bool fromArray(const cJSON* json, const char* name, FFromJson func, SArray** array, int32_t itemSize) {
return fromTarray(json, name, func, array, itemSize, true);
}
static bool addRawArray(cJSON* json, const char* name, FToJson func, const void* array, int32_t itemSize, int32_t size) {
if (size > 0) {
cJSON* jArray = cJSON_AddArrayToObject(json, name);
......@@ -556,6 +572,32 @@ static bool epAddrFromJson(const cJSON* json, void* obj) {
return true;
}
static const char* jkNodeAddrId = "NodeId";
static const char* jkNodeAddrInUse = "InUse";
static const char* jkNodeAddrEpAddrs = "EpAddrs";
static bool nodeAddrToJson(const void* obj, cJSON* json) {
const SQueryNodeAddr* ep = (const SQueryNodeAddr*)obj;
bool res = cJSON_AddNumberToObject(json, jkNodeAddrId, ep->nodeId);
if (res) {
res = cJSON_AddNumberToObject(json, jkNodeAddrInUse, ep->inUse);
}
if (res) {
res = addRawArray(json, jkNodeAddrEpAddrs, epAddrToJson, ep->epAddr, ep->numOfEps, sizeof(SEpAddrMsg));
}
return res;
}
static bool nodeAddrFromJson(const cJSON* json, void* obj) {
SQueryNodeAddr* ep = (SQueryNodeAddr*)obj;
ep->nodeId = getNumber(json, jkNodeAddrId);
ep->inUse = getNumber(json, jkNodeAddrInUse);
int32_t numOfEps = 0;
bool res = fromRawArray(json, jkNodeAddrEpAddrs, nodeAddrFromJson, &ep->epAddr, sizeof(SEpAddrMsg), &numOfEps);
ep->numOfEps = numOfEps;
return res;
}
static const char* jkExchangeNodeSrcTemplateId = "SrcTemplateId";
static const char* jkExchangeNodeSrcEndPoints = "SrcEndPoints";
......@@ -563,7 +605,7 @@ static bool exchangeNodeToJson(const void* obj, cJSON* json) {
const SExchangePhyNode* exchange = (const SExchangePhyNode*)obj;
bool res = cJSON_AddNumberToObject(json, jkExchangeNodeSrcTemplateId, exchange->srcTemplateId);
if (res) {
res = addArray(json, jkExchangeNodeSrcEndPoints, epAddrToJson, exchange->pSrcEndPoints);
res = addInlineArray(json, jkExchangeNodeSrcEndPoints, nodeAddrToJson, exchange->pSrcEndPoints);
}
return res;
}
......@@ -571,7 +613,7 @@ static bool exchangeNodeToJson(const void* obj, cJSON* json) {
static bool exchangeNodeFromJson(const cJSON* json, void* obj) {
SExchangePhyNode* exchange = (SExchangePhyNode*)obj;
exchange->srcTemplateId = getNumber(json, jkExchangeNodeSrcTemplateId);
return fromArray(json, jkExchangeNodeSrcEndPoints, epAddrFromJson, &exchange->pSrcEndPoints, sizeof(SEpAddrMsg));
return fromInlineArray(json, jkExchangeNodeSrcEndPoints, nodeAddrFromJson, &exchange->pSrcEndPoints, sizeof(SQueryNodeAddr));
}
static bool specificPhyNodeToJson(const void* obj, cJSON* json) {
......@@ -803,7 +845,6 @@ static cJSON* subplanToJson(const SSubplan* subplan) {
if (res) {
res = addObject(jSubplan, jkSubplanDataSink, dataSinkToJson, subplan->pDataSink);
}
if (!res) {
cJSON_Delete(jSubplan);
return NULL;
......
......@@ -9,9 +9,3 @@ target_link_libraries(
tkv
PUBLIC os
)
\ No newline at end of file
if(${BUILD_WITH_ROCKSDB})
target_link_libraries(
tkv
PUBLIC rocksdb
)
endif(${BUILD_WITH_ROCKSDB})
......@@ -13,18 +13,27 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbRowMergeBuf.h"
#include "tdataformat.h"
// row1 has higher priority
SMemRow tsdbMergeTwoRows(SMergeBuf *pBuf, SMemRow row1, SMemRow row2, STSchema *pSchema1, STSchema *pSchema2) {
if(row2 == NULL) return row1;
if(row1 == NULL) return row2;
ASSERT(pSchema1->version == memRowVersion(row1));
ASSERT(pSchema2->version == memRowVersion(row2));
if(tsdbMergeBufMakeSureRoom(pBuf, pSchema1, pSchema2) < 0) {
return NULL;
}
return mergeTwoMemRows(*pBuf, row1, row2, pSchema1, pSchema2);
#ifndef _TD_TDISK_MGR_H_
#define _TD_TDISK_MGR_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "tkvDef.h"
typedef struct SDiskMgr SDiskMgr;
int tdmOpen(SDiskMgr **ppDiskMgr, const char *fname, uint16_t pgsize);
int tdmClose(SDiskMgr *pDiskMgr);
int tdmReadPage(SDiskMgr *pDiskMgr, pgid_t pgid, void *pData);
int tdmWritePage(SDiskMgr *pDiskMgr, pgid_t pgid, const void *pData);
int32_t tdmAllocPage(SDiskMgr *pDiskMgr);
#ifdef __cplusplus
}
#endif
#endif /*_TD_TDISK_MGR_H_*/
\ No newline at end of file
......@@ -12,17 +12,30 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// #ifndef _TD_TSDB_COMPACT_H_
// #define _TD_TSDB_COMPACT_H_
// #ifdef __cplusplus
// extern "C" {
// #endif
#ifndef _TD_TKV_PAGE_H_
#define _TD_TKV_PAGE_H_
// void *tsdbCompactImpl(STsdb *pRepo);
#include "os.h"
// #ifdef __cplusplus
// }
// #endif
#ifdef __cplusplus
extern "C" {
#endif
// #endif /* _TD_TSDB_COMPACT_H_ */
\ No newline at end of file
typedef struct {
uint16_t dbver;
uint16_t pgsize;
uint32_t cksm;
} SPgHdr;
typedef struct {
SPgHdr chdr;
uint16_t used; // number of used slots
uint16_t loffset; // the offset of the starting location of the last slot used
} SSlottedPgHdr;
#ifdef __cplusplus
}
#endif
#endif /*_TD_TKV_PAGE_H_*/
\ No newline at end of file
......@@ -16,40 +16,15 @@
#ifndef _TD_TKV_DEF_H_
#define _TD_TKV_DEF_H_
#ifdef USE_ROCKSDB
#include <rocksdb/c.h>
#endif
#include "os.h"
#ifdef __cplusplus
extern "C" {
#endif
struct STkvDb {
#ifdef USE_ROCKSDB
rocksdb_t *db;
#endif
};
struct STkvOpts {
#ifdef USE_ROCKSDB
rocksdb_options_t *opts;
#endif
};
struct STkvCache {
// TODO
};
struct STkvReadOpts {
#ifdef USE_ROCKSDB
rocksdb_readoptions_t *ropts;
#endif
};
struct STkvWriteOpts {
#ifdef USE_ROCKSDB
rocksdb_writeoptions_t *wopts;
#endif
};
// pgid_t
typedef int32_t pgid_t;
#define TKV_IVLD_PGID ((pgid_t)-1)
#ifdef __cplusplus
}
......
......@@ -13,33 +13,37 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TSDB_ROW_MERGE_BUF_H
#define TSDB_ROW_MERGE_BUF_H
#include "tDiskMgr.h"
#ifdef __cplusplus
extern "C" {
#endif
struct SDiskMgr {
const char *fname;
uint16_t pgsize;
FileFd fd;
int32_t npgid;
};
#include "tsdb.h"
#include "tchecksum.h"
#include "tsdbReadImpl.h"
#define PAGE_OFFSET(PGID, PGSIZE) ((PGID) * (PGSIZE))
typedef void* SMergeBuf;
SDataRow tsdbMergeTwoRows(SMergeBuf *pBuf, SMemRow row1, SMemRow row2, STSchema *pSchema1, STSchema *pSchema2);
int tdmOpen(SDiskMgr **ppDiskMgr, const char *fname, uint16_t pgsize) {
// TODO
return 0;
}
static FORCE_INLINE int tsdbMergeBufMakeSureRoom(SMergeBuf *pBuf, STSchema* pSchema1, STSchema* pSchema2) {
size_t len1 = dataRowMaxBytesFromSchema(pSchema1);
size_t len2 = dataRowMaxBytesFromSchema(pSchema2);
return tsdbMakeRoom(pBuf, MAX(len1, len2));
int tdmClose(SDiskMgr *pDiskMgr) {
// TODO
return 0;
}
static FORCE_INLINE void tsdbFreeMergeBuf(SMergeBuf buf) {
taosTZfree(buf);
int tdmReadPage(SDiskMgr *pDiskMgr, pgid_t pgid, void *pData) {
taosLSeekFile(pDiskMgr->fd, PAGE_OFFSET(pgid, pDiskMgr->pgsize), SEEK_SET);
taosReadFile(pDiskMgr->fd, pData, pDiskMgr->pgsize);
return 0;
}
#ifdef __cplusplus
int tdmWritePage(SDiskMgr *pDiskMgr, pgid_t pgid, const void *pData) {
taosLSeekFile(pDiskMgr->fd, PAGE_OFFSET(pgid, pDiskMgr->pgsize), SEEK_SET);
taosWriteFile(pDiskMgr->fd, pData, pDiskMgr->pgsize);
return 0;
}
#endif
#endif /* ifndef TSDB_ROW_MERGE_BUF_H */
int32_t tdmAllocPage(SDiskMgr *pDiskMgr) { return pDiskMgr->npgid++; }
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tkv.h"
#include "tkvDef.h"
static pthread_once_t isInit = PTHREAD_ONCE_INIT;
static STkvReadOpts defaultReadOpts;
static STkvWriteOpts defaultWriteOpts;
static void tkvInit();
STkvDb *tkvOpen(const STkvOpts *options, const char *path) {
pthread_once(&isInit, tkvInit);
STkvDb *pDb = NULL;
pDb = (STkvDb *)malloc(sizeof(*pDb));
if (pDb == NULL) {
return NULL;
}
#ifdef USE_ROCKSDB
char *err = NULL;
pDb->db = rocksdb_open(options->opts, path, &err);
// TODO: check err
#endif
return pDb;
}
void tkvClose(STkvDb *pDb) {
if (pDb) {
#ifdef USE_ROCKSDB
rocksdb_close(pDb->db);
#endif
free(pDb);
}
}
void tkvPut(STkvDb *pDb, const STkvWriteOpts *pwopts, const char *key, size_t keylen, const char *val, size_t vallen) {
#ifdef USE_ROCKSDB
char *err = NULL;
rocksdb_put(pDb->db, pwopts ? pwopts->wopts : defaultWriteOpts.wopts, key, keylen, val, vallen, &err);
// TODO: check error
#endif
}
char *tkvGet(STkvDb *pDb, const STkvReadOpts *propts, const char *key, size_t keylen, size_t *vallen) {
char *ret = NULL;
#ifdef USE_ROCKSDB
char *err = NULL;
ret = rocksdb_get(pDb->db, propts ? propts->ropts : defaultReadOpts.ropts, key, keylen, vallen, &err);
// TODD: check error
#endif
return ret;
}
STkvOpts *tkvOptsCreate() {
STkvOpts *pOpts = NULL;
pOpts = (STkvOpts *)malloc(sizeof(*pOpts));
if (pOpts == NULL) {
return NULL;
}
#ifdef USE_ROCKSDB
pOpts->opts = rocksdb_options_create();
// TODO: check error
#endif
return pOpts;
}
void tkvOptsDestroy(STkvOpts *pOpts) {
if (pOpts) {
#ifdef USE_ROCKSDB
rocksdb_options_destroy(pOpts->opts);
#endif
free(pOpts);
}
}
void tkvOptionsSetCache(STkvOpts *popts, STkvCache *pCache) {
// TODO
}
void tkvOptsSetCreateIfMissing(STkvOpts *pOpts, unsigned char c) {
#ifdef USE_ROCKSDB
rocksdb_options_set_create_if_missing(pOpts->opts, c);
#endif
}
STkvReadOpts *tkvReadOptsCreate() {
STkvReadOpts *pReadOpts = NULL;
pReadOpts = (STkvReadOpts *)malloc(sizeof(*pReadOpts));
if (pReadOpts == NULL) {
return NULL;
}
#ifdef USE_ROCKSDB
pReadOpts->ropts = rocksdb_readoptions_create();
#endif
return pReadOpts;
}
void tkvReadOptsDestroy(STkvReadOpts *pReadOpts) {
if (pReadOpts) {
#ifdef USE_ROCKSDB
rocksdb_readoptions_destroy(pReadOpts->ropts);
#endif
free(pReadOpts);
}
}
STkvWriteOpts *tkvWriteOptsCreate() {
STkvWriteOpts *pWriteOpts = NULL;
pWriteOpts = (STkvWriteOpts *)malloc(sizeof(*pWriteOpts));
if (pWriteOpts == NULL) {
return NULL;
}
#ifdef USE_ROCKSDB
pWriteOpts->wopts = rocksdb_writeoptions_create();
#endif
return pWriteOpts;
}
void tkvWriteOptsDestroy(STkvWriteOpts *pWriteOpts) {
if (pWriteOpts) {
#ifdef USE_ROCKSDB
rocksdb_writeoptions_destroy(pWriteOpts->wopts);
#endif
free(pWriteOpts);
}
}
/* ------------------------ STATIC METHODS ------------------------ */
static void tkvInit() {
#ifdef USE_ROCKSDB
defaultReadOpts.ropts = rocksdb_readoptions_create();
defaultWriteOpts.wopts = rocksdb_writeoptions_create();
rocksdb_writeoptions_disable_WAL(defaultWriteOpts.wopts, true);
#endif
}
static void tkvClear() {
#ifdef USE_ROCKSDB
rocksdb_readoptions_destroy(defaultReadOpts.ropts);
rocksdb_writeoptions_destroy(defaultWriteOpts.wopts);
#endif
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
\ No newline at end of file
#include "gtest/gtest.h"
#include "iostream"
#include "tDiskMgr.h"
TEST(tDiskMgrTest, simple_test) {
// TODO
std::cout << "This is in tDiskMgrTest::simple_test" << std::endl;
}
\ No newline at end of file
......@@ -120,7 +120,7 @@ int64_t taosReadFile(FileFd fd, void *buf, int64_t count) {
return count;
}
int64_t taosWriteFile(FileFd fd, void *buf, int64_t n) {
int64_t taosWriteFile(FileFd fd, const void *buf, int64_t n) {
int64_t nleft = n;
int64_t nwritten = 0;
char * tbuf = (char *)buf;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册