From 3a4a91a053edb1377ca053034fd3d17d603f4ff1 Mon Sep 17 00:00:00 2001 From: slguan Date: Fri, 27 Mar 2020 00:18:47 +0800 Subject: [PATCH] remove unsed files --- src/vnode/detail/CMakeLists.txt | 25 - src/vnode/detail/inc/vnode.h | 574 -- src/vnode/detail/inc/vnodeCache.h | 61 - src/vnode/detail/inc/vnodeDataFilterFunc.h | 35 - src/vnode/detail/inc/vnodeFile.h | 99 - src/vnode/detail/inc/vnodeQueryImpl.h | 293 - src/vnode/detail/inc/vnodeRead.h | 300 - src/vnode/detail/inc/vnodeShell.h | 29 - src/vnode/detail/inc/vnodeStore.h | 35 - src/vnode/detail/inc/vnodeSupertableQuery.h | 27 - src/vnode/detail/inc/vnodeSystem.h | 29 - src/vnode/detail/inc/vnodeTagMgmt.h | 58 - src/vnode/detail/inc/vnodeUtil.h | 94 - src/vnode/detail/src/vnodeCache.c | 1165 --- src/vnode/detail/src/vnodeCommit.c | 292 - src/vnode/detail/src/vnodeFile.c | 1880 ----- src/vnode/detail/src/vnodeFile.spec.c | 110 - src/vnode/detail/src/vnodeFileUtil.c | 237 - src/vnode/detail/src/vnodeFilterFunc.c | 558 -- src/vnode/detail/src/vnodeImport.c | 1573 ---- src/vnode/detail/src/vnodeMeter.c | 825 -- src/vnode/detail/src/vnodeQueryImpl.c | 7714 ------------------- src/vnode/detail/src/vnodeQueryProcess.c | 1364 ---- src/vnode/detail/src/vnodeRead.c | 1153 --- src/vnode/detail/src/vnodeStore.c | 409 - src/vnode/detail/src/vnodeStore.spec.c | 22 - src/vnode/detail/src/vnodeStream.c | 207 - src/vnode/detail/src/vnodeSupertableQuery.c | 874 --- src/vnode/detail/src/vnodeTagMgmt.c | 391 - src/vnode/detail/src/vnodeUtil.c | 767 -- 30 files changed, 21200 deletions(-) delete mode 100644 src/vnode/detail/CMakeLists.txt delete mode 100644 src/vnode/detail/inc/vnode.h delete mode 100644 src/vnode/detail/inc/vnodeCache.h delete mode 100644 src/vnode/detail/inc/vnodeDataFilterFunc.h delete mode 100644 src/vnode/detail/inc/vnodeFile.h delete mode 100644 src/vnode/detail/inc/vnodeQueryImpl.h delete mode 100644 src/vnode/detail/inc/vnodeRead.h delete mode 100644 src/vnode/detail/inc/vnodeShell.h delete mode 100644 src/vnode/detail/inc/vnodeStore.h delete mode 100644 src/vnode/detail/inc/vnodeSupertableQuery.h delete mode 100644 src/vnode/detail/inc/vnodeSystem.h delete mode 100644 src/vnode/detail/inc/vnodeTagMgmt.h delete mode 100644 src/vnode/detail/inc/vnodeUtil.h delete mode 100644 src/vnode/detail/src/vnodeCache.c delete mode 100644 src/vnode/detail/src/vnodeCommit.c delete mode 100644 src/vnode/detail/src/vnodeFile.c delete mode 100644 src/vnode/detail/src/vnodeFile.spec.c delete mode 100644 src/vnode/detail/src/vnodeFileUtil.c delete mode 100644 src/vnode/detail/src/vnodeFilterFunc.c delete mode 100644 src/vnode/detail/src/vnodeImport.c delete mode 100644 src/vnode/detail/src/vnodeMeter.c delete mode 100644 src/vnode/detail/src/vnodeQueryImpl.c delete mode 100644 src/vnode/detail/src/vnodeQueryProcess.c delete mode 100644 src/vnode/detail/src/vnodeRead.c delete mode 100644 src/vnode/detail/src/vnodeStore.c delete mode 100644 src/vnode/detail/src/vnodeStore.spec.c delete mode 100644 src/vnode/detail/src/vnodeStream.c delete mode 100644 src/vnode/detail/src/vnodeSupertableQuery.c delete mode 100644 src/vnode/detail/src/vnodeTagMgmt.c delete mode 100644 src/vnode/detail/src/vnodeUtil.c diff --git a/src/vnode/detail/CMakeLists.txt b/src/vnode/detail/CMakeLists.txt deleted file mode 100644 index 7b186bda53..0000000000 --- a/src/vnode/detail/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.8) -PROJECT(TDengine) - -IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM)) - INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/inc) - INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/dnode/inc) - INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/mnode/detail/inc) - INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/vnode/detail/inc) - INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/client/inc) - INCLUDE_DIRECTORIES(${TD_OS_DIR}/inc) - INCLUDE_DIRECTORIES(inc) - AUX_SOURCE_DIRECTORY(./src SRC) - LIST(REMOVE_ITEM SRC ./src/vnodeFileUtil.c) - LIST(REMOVE_ITEM SRC ./src/taosGrant.c) - - ADD_LIBRARY(vnode ${SRC}) - - IF (TD_CLUSTER) - TARGET_LINK_LIBRARIES(vnode vcluster) - ELSEIF (TD_LITE) - TARGET_LINK_LIBRARIES(vnode vlite) - ENDIF () -ENDIF () - - diff --git a/src/vnode/detail/inc/vnode.h b/src/vnode/detail/inc/vnode.h deleted file mode 100644 index ecd9e3f392..0000000000 --- a/src/vnode/detail/inc/vnode.h +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODE_H -#define TDENGINE_VNODE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "os.h" - -#include "tglobalcfg.h" -#include "tidpool.h" -#include "tlog.h" -#include "tmempool.h" -#include "trpc.h" -#include "tsclient.h" -#include "taosdef.h" -#include "tsocket.h" -#include "ttime.h" -#include "ttimer.h" -#include "tutil.h" -#include "vnodeCache.h" -#include "vnodeFile.h" -#include "vnodePeer.h" -#include "vnodeShell.h" - -#define TSDB_FILE_HEADER_LEN 512 -#define TSDB_FILE_HEADER_VERSION_SIZE 32 -#define TSDB_CACHE_POS_BITS 13 -#define TSDB_CACHE_POS_MASK 0x1FFF - -#define TSDB_ACTION_INSERT 0 -#define TSDB_ACTION_IMPORT 1 -#define TSDB_ACTION_DELETE 2 -#define TSDB_ACTION_UPDATE 3 -#define TSDB_ACTION_MAX 4 - -enum _data_source { - TSDB_DATA_SOURCE_METER, - TSDB_DATA_SOURCE_VNODE, - TSDB_DATA_SOURCE_SHELL, - TSDB_DATA_SOURCE_QUEUE, - TSDB_DATA_SOURCE_LOG, -}; - -enum _sync_cmd { - TSDB_SYNC_CMD_FILE, - TSDB_SYNC_CMD_CACHE, - TSDB_SYNC_CMD_CREATE, - TSDB_SYNC_CMD_REMOVE, -}; - -typedef struct { - int64_t offset : 48; - int64_t length : 16; -} SMeterObjHeader; - -typedef struct { - int64_t len; - char data[]; -} SData; - -#pragma pack(push, 8) -typedef struct { - SVnodeStatisticInfo vnodeStatistic; - int vnode; - SVnodeCfg cfg; - // SDiskDesc tierDisk[TSDB_MAX_TIER]; - SVPeerDesc vpeers[TSDB_VNODES_SUPPORT]; - SVnodePeer * peerInfo[TSDB_VNODES_SUPPORT]; - char selfIndex; - char vnodeStatus; - char accessState; // Vnode access state, Readable/Writable - char syncStatus; - char commitInProcess; - pthread_t commitThread; - TSKEY firstKey; // minimum key uncommitted, it may be smaller than - // commitFirstKey - TSKEY commitFirstKey; // minimum key for a commit file, it shall be - // xxxx00000, calculated from fileId - TSKEY commitLastKey; // maximum key for a commit file, it shall be xxxx99999, - // calculated fromm fileId - int commitFileId; - TSKEY lastCreate; - TSKEY lastRemove; - TSKEY lastKey; // last key for the whole vnode, updated by every insert - // operation - uint64_t version; - - int streamRole; - int numOfStreams; - void *streamTimer; - - TSKEY lastKeyOnFile; // maximum key on the last file, is shall be xxxx99999 - int fileId; - int badFileId; - int numOfFiles; - int maxFiles; - int maxFile1; - int maxFile2; - int nfd; // temp head file FD - int hfd; // head file FD - int lfd; // last file FD - int tfd; // temp last file FD - int dfd; // data file FD - int64_t dfSize; - int64_t lfSize; - uint64_t * fmagic; // hold magic number for each file - char cfn[TSDB_FILENAME_LEN]; - char nfn[TSDB_FILENAME_LEN]; - char lfn[TSDB_FILENAME_LEN]; // last file name - char tfn[TSDB_FILENAME_LEN]; // temp last file name - pthread_mutex_t vmutex; - - int logFd; - char * pMem; - char * pWrite; - pthread_mutex_t logMutex; - char logFn[TSDB_FILENAME_LEN]; - char logOFn[TSDB_FILENAME_LEN]; - int64_t mappingSize; - int64_t mappingThreshold; - - void * commitTimer; - void ** meterList; - void * pCachePool; - void * pQueue; - pthread_t thread; - int peersOnline; - int shellConns; - int meterConns; - struct _qinfo *pQInfoList; - - TAOS * dbConn; - SMeterObjHeader *meterIndex; -} SVnodeObj; -#pragma pack(pop) - -typedef struct SColumn { - short colId; - short bytes; - char type; -} SColumn; - -typedef struct _meter_obj { - uint64_t uid; - char meterId[TSDB_TABLE_ID_LEN]; - int sid; - short vnode; - short numOfColumns; - short bytesPerPoint; - short maxBytes; - int32_t pointsPerBlock; - int32_t pointsPerFileBlock; - int freePoints; - TSKEY lastKey; // updated by insert operation - TSKEY lastKeyOnFile; // last key on file, updated by commit action - TSKEY timeStamp; // delete or added time - uint64_t commitCount; - int32_t sversion; - short sqlLen; - char searchAlgorithm : 4; - char compAlgorithm : 4; - char status; // 0: ok, 1: stop stream computing - - char reserved[16]; - int state; - int numOfQueries; - char * pSql; - void * pStream; - void * pCache; - SColumn *schema; -} SMeterObj; - -typedef struct { - char type; - char pversion; // protocol version - char action; // insert, import, delete, update - int32_t sversion; // only for insert - int32_t sid; - int32_t len; - uint64_t lastVersion; // latest version - char cont[]; -} SVMsgHeader; - -struct tSQLBinaryExpr; - -typedef struct SColumnInfoEx { - SColumnInfo data; - int16_t colIdx; - int16_t colIdxInBuf; - - /* - * 0: denotes if its is required in the first round of scan of data block - * 1: denotes if its is required in the secondary scan - */ - int16_t req[2]; -} SColumnInfoEx; - -struct SColumnFilterElem; - -typedef bool (*__filter_func_t)(struct SColumnFilterElem *pFilter, char *val1, char *val2); - -typedef struct SColumnFilterElem { - int16_t bytes; // column length - __filter_func_t fp; - SColumnFilterInfo filterInfo; -} SColumnFilterElem; - -typedef struct SSingleColumnFilterInfo { - SColumnInfoEx info; - int32_t numOfFilters; - SColumnFilterElem *pFilters; - char * pData; -} SSingleColumnFilterInfo; - -typedef struct SQuery { - short numOfCols; - SOrderVal order; - char keyIsMet; // if key is met, it will be set - char over; - int fileId; // only for query in file - int hfd; // only for query in file, head file handle - int dfd; // only for query in file, data file handle - int lfd; // only for query in file, last file handle - SCompBlock *pBlock; // only for query in file - SField ** pFields; - - int numOfBlocks; // only for query in file - int blockBufferSize; // length of pBlock buffer - int currentSlot; - int firstSlot; - - /* - * the two parameters are utilized to handle the data missing situation, caused by import operation. - * When the commit slot is the first slot, and commitPoints != 0 - */ - int32_t commitSlot; // which slot is committed, - int32_t commitPoint; // starting point for next commit - - int slot; - int pos; - TSKEY key; - int compBlockLen; // only for import - int64_t blockId; - TSKEY skey; - TSKEY ekey; - int64_t intervalTime; - int64_t slidingTime; // sliding time for sliding window query - char intervalTimeUnit; // interval data type, used for daytime revise - int8_t precision; - int16_t numOfOutputCols; - int16_t interpoType; - int16_t checkBufferInLoop; // check if the buffer is full during scan each block - SLimitVal limit; - int32_t rowSize; - - SSqlGroupbyExpr * pGroupbyExpr; - SSqlFunctionExpr * pSelectExpr; - SColumnInfoEx * colList; - int32_t numOfFilterCols; - SSingleColumnFilterInfo *pFilterInfo; - int64_t * defaultVal; - TSKEY lastKey; - - // buffer info - int64_t pointsRead; // the number of points returned - int64_t pointsToRead; // maximum number of points to read - int64_t pointsOffset; // the number of points offset to save read data - SData **sdata; - SData * tsData; // timestamp column/primary key column -} SQuery; - -typedef struct { - char spi; - char encrypt; - char secret[TSDB_KEY_LEN]; - char cipheringKey[TSDB_KEY_LEN]; -} SConnSec; - -typedef struct { - char * buffer; - char * offset; - int trans; - int bufferSize; - pthread_mutex_t qmutex; -} STranQueue; - -// internal globals -extern int tsMeterSizeOnFile; - -extern void * tsQueryQhandle; -extern int tsVnodePeers; -extern int tsMaxVnode; -extern int tsMaxQueues; -extern int tsOpenVnodes; -extern SVnodeObj *vnodeList; -extern void * vnodeTmrCtrl; - -// read API -extern int (*vnodeSearchKeyFunc[])(char *pValue, int num, TSKEY key, int order); - -void *vnodeQueryOnSingleTable(SMeterObj **pMeterObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *sqlExprs, - SQueryMeterMsg *pQueryMsg, int *code); - -void *vnodeQueryOnMultiMeters(SMeterObj **pMeterObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pSqlExprs, - SQueryMeterMsg *pQueryMsg, int *code); - -// assistant/tool functions -SSqlGroupbyExpr *vnodeCreateGroupbyExpr(SQueryMeterMsg *pQuery, int32_t *code); - -SSqlFunctionExpr *vnodeCreateSqlFunctionExpr(SQueryMeterMsg *pQuery, int32_t *code); -bool vnodeValidateExprColumnInfo(SQueryMeterMsg *pQueryMsg, SSqlFuncExprMsg *pExprMsg); - -bool vnodeIsValidVnodeCfg(SVnodeCfg *pCfg); - -int32_t vnodeGetResultSize(void *handle, int32_t *numOfRows); - -int32_t vnodeCopyQueryResultToMsg(void *handle, char *data, int32_t numOfRows); - -int64_t vnodeGetOffsetVal(void *thandle); - -bool vnodeHasRemainResults(void *handle); - -int vnodeRetrieveQueryResult(void *handle, int *pNum, char *argv[]); - -int vnodeSaveQueryResult(void *handle, char *data, int32_t* size); - -int vnodeRetrieveQueryInfo(void *handle, int *numOfRows, int *rowSize, int16_t *timePrec); - -void vnodeFreeQInfo(void *, bool); - -void vnodeFreeQInfoInQueue(void *param); - -bool vnodeIsQInfoValid(void *param); -void vnodeDecRefCount(void *param); -void vnodeAddRefCount(void *param); - -int32_t vnodeConvertQueryMeterMsg(SQueryMeterMsg *pQuery); - -void vnodeQueryData(SSchedMsg *pMsg); - -// meter API -int vnodeOpenMetersVnode(int vnode); - -void vnodeCloseMetersVnode(int vnode); - -int vnodeCreateMeterObj(SMeterObj *pNew, SConnSec *pSec); - -int vnodeRemoveMeterObj(int vnode, int sid); - -int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *, int sversion, int *numOfPoints, TSKEY now); - -int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *, int sversion, int *numOfPoints, TSKEY now); - -int vnodeInsertBufferedPoints(int vnode); - -int vnodeSaveAllMeterObjToFile(int vnode); - -int vnodeSaveMeterObjToFile(SMeterObj *pObj); - -int vnodeSaveVnodeCfg(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc); - -int vnodeSaveVnodeInfo(int vnode); - -// cache API -void *vnodeOpenCachePool(int vnode); - -void vnodeCloseCachePool(int vnode); - -void *vnodeAllocateCacheInfo(SMeterObj *pObj); - -void vnodeFreeCacheInfo(SMeterObj *pObj); - -void vnodeSetCommitQuery(SMeterObj *pObj, SQuery *pQuery); - -int vnodeInsertPointToCache(SMeterObj *pObj, char *pData); - -int vnodeQueryFromCache(SMeterObj *pObj, SQuery *pQuery); - -uint64_t vnodeGetPoolCount(SVnodeObj *pVnode); - -void vnodeUpdateCommitInfo(SMeterObj *pObj, int slot, int pos, uint64_t count); - -void vnodeCommitOver(SVnodeObj *pVnode); - -TSKEY vnodeGetFirstKey(int vnode); - -int vnodeSyncRetrieveCache(int vnode, int fd); - -int vnodeSyncRestoreCache(int vnode, int fd); - -pthread_t vnodeCreateCommitThread(SVnodeObj *pVnode); - -void vnodeCancelCommit(SVnodeObj *pVnode); - -void vnodeCloseStream(SVnodeObj *pVnode); - -void vnodeProcessCommitTimer(void *param, void *tmrId); - -void vnodeSearchPointInCache(SMeterObj *pObj, SQuery *pQuery); - -int vnodeAllocateCacheBlock(SMeterObj *pObj); - -int vnodeFreeCacheBlock(SCacheBlock *pCacheBlock); - -int vnodeIsCacheCommitted(SMeterObj *pObj); - -// file API -int vnodeInitFile(int vnode); - -int vnodeQueryFromFile(SMeterObj *pObj, SQuery *pQuery); - -void *vnodeCommitToFile(void *param); - -void *vnodeCommitMultiToFile(SVnodeObj *pVnode, int ssid, int esid); - -int vnodeSyncRetrieveFile(int vnode, int fd, uint32_t fileId, uint64_t *fmagic); - -int vnodeSyncRestoreFile(int vnode, int sfd); - -int vnodeWriteBlockToFile(SMeterObj *pObj, SCompBlock *pBlock, SData *data[], SData *cdata[], int pointsRead); - -int vnodeSearchPointInFile(SMeterObj *pObj, SQuery *pQuery); - -int vnodeReadCompBlockToMem(SMeterObj *pObj, SQuery *pQuery, SData *sdata[]); - -int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast); - -void vnodeCloseCommitFiles(SVnodeObj *pVnode); - -int vnodeReadLastBlockToMem(SMeterObj *pObj, SCompBlock *pBlock, SData *sdata[]); - -// vnode API -void vnodeUpdateStreamRole(SVnodeObj *pVnode); - -int vnodeInitPeer(int numOfThreads); - -void vnodeCleanUpPeer(); - -int vnodeOpenPeerVnode(int vnode); - -void vnodeClosePeerVnode(int vnode); - -void *vnodeGetMeterPeerConnection(SMeterObj *pObj, int index); - -int vnodeForwardToPeer(SMeterObj *pObj, char *msg, int msgLen, char action, int sversion); - -void vnodeCloseAllSyncFds(int vnode); - -void vnodeConfigVPeers(int vnode, int numOfPeers, SVPeerDesc peerDesc[]); - -void vnodeStartSyncProcess(SVnodeObj *pVnode); - -void vnodeCancelSync(int vnode); - -void vnodeListPeerStatus(char *buffer); - -void vnodeCheckOwnStatus(SVnodeObj *pVnode); - -int vnodeSaveMeterObjToFile(SMeterObj *pObj); - -int vnodeRecoverFromPeer(SVnodeObj *pVnode, int fileId); - -// vnodes API -int vnodeInitVnodes(); - -int vnodeInitStore(); - -void vnodeCleanUpVnodes(); - -int vnodeRemoveVnode(int vnode); - -int vnodeCreateVnode(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc); - -void vnodeOpenStreams(void *param, void *tmrId); - -void vnodeCreateStream(SMeterObj *pObj); - -void vnodeRemoveStream(SMeterObj *pObj); - -// shell API -int vnodeInitShell(); - -void vnodeCleanUpShell(); - -int vnodeOpenShellVnode(int vnode); - -void vnodeCloseShellVnode(int vnode); - -// memter mgmt -int vnodeInitMeterMgmt(); - -void vnodeCleanUpMeterMgmt(); - -int vnodeOpenMeterMgmtVnode(int vnode); - -int vnodeOpenMeterMgmtStoreVnode(int vnode); - -void vnodeCloseMeterMgmtVnode(int vnode); - -int vnodeCreateMeterMgmt(SMeterObj *pObj, SConnSec *pSec); - -void vnodeRemoveMeterMgmt(SMeterObj *pObj); - -SConnSec *vnodeGetMeterSec(int vnode, int sid); - -int vnodeCreateMeterObjFile(int vnode); - -// mgmt - -void vnodeCleanUpMgmt(); - -int vnodeRetrieveMissedCreateMsg(int vnode, int fd, uint64_t stime); - -int vnodeRestoreMissedCreateMsg(int vnode, int fd); - -int vnodeRetrieveMissedRemoveMsg(int vid, int fd, uint64_t stime); - -int vnodeRestoreMissedRemoveMsg(int vnode, int fd); - -int vnodeProcessBufferedCreateMsgs(int vnode); - -void vnodeSendVpeerCfgMsg(int vnode); - -int vnodeSendMeterCfgMsg(int vnode, int sid); - -int vnodeMgmtConns(); - -void vnodeRemoveFile(int vnode, int fileId); - -// commit -int vnodeInitCommit(int vnode); - -void vnodeCleanUpCommit(int vnode); - -int vnodeRenewCommitLog(int vnode); - -void vnodeRemoveCommitLog(int vnode); - -int vnodeWriteToCommitLog(SMeterObj *pObj, char action, char *cont, int contLen, int sversion); - -extern int (*vnodeProcessAction[])(SMeterObj *, char *, int, char, void *, int, int *, TSKEY); - -extern int (*pCompFunc[])(const char *const input, int inputSize, const int elements, char *const output, - int outputSize, char algorithm, char *const buffer, int bufferSize); - -extern int (*pDecompFunc[])(const char *const input, int compressedSize, const int elements, char *const output, - int outputSize, char algorithm, char *const buffer, int bufferSize); - -// global variable and APIs provided by mgmt -extern char mgmtStatus; -extern char tsMgmtDirectory[]; -extern const int16_t vnodeFileVersion; - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODE_H diff --git a/src/vnode/detail/inc/vnodeCache.h b/src/vnode/detail/inc/vnodeCache.h deleted file mode 100644 index 8fb6d15647..0000000000 --- a/src/vnode/detail/inc/vnodeCache.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODECACHE_H -#define TDENGINE_VNODECACHE_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - short notFree; - short numOfPoints; - int slot; - int index; - int64_t blockId; - struct _meter_obj *pMeterObj; - char * offset[]; -} SCacheBlock; - -typedef struct { - int64_t blocks; - int maxBlocks; - int numOfBlocks; - int unCommittedBlocks; - int32_t currentSlot; - int32_t commitSlot; // which slot is committed - int32_t commitPoint; // starting point for next commit - SCacheBlock **cacheBlocks; // cache block list, circular list -} SCacheInfo; - -typedef struct { - int vnode; - char ** pMem; - int64_t freeSlot; - pthread_mutex_t vmutex; - uint64_t count; // kind of transcation ID - int64_t notFreeSlots; - int64_t threshold; - char commitInProcess; - int cacheBlockSize; - int cacheNumOfBlocks; -} SCachePool; - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODECACHE_H diff --git a/src/vnode/detail/inc/vnodeDataFilterFunc.h b/src/vnode/detail/inc/vnodeDataFilterFunc.h deleted file mode 100644 index c2cd06597a..0000000000 --- a/src/vnode/detail/inc/vnodeDataFilterFunc.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODEDATAFILTERFUNC_H -#define TDENGINE_VNODEDATAFILTERFUNC_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "vnode.h" - -__filter_func_t *vnodeGetRangeFilterFuncArray(int32_t type); - -__filter_func_t *vnodeGetValueFilterFuncArray(int32_t type); - -bool vnodeSupportPrefilter(int32_t type); - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODEDATAFILTERFUNC_H diff --git a/src/vnode/detail/inc/vnodeFile.h b/src/vnode/detail/inc/vnodeFile.h deleted file mode 100644 index 3202f80ed6..0000000000 --- a/src/vnode/detail/inc/vnodeFile.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODEFILE_H -#define TDENGINE_VNODEFILE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "tchecksum.h" - -#define TSDB_VNODE_DELIMITER 0xF00AFA0F - -typedef struct { int64_t compInfoOffset; } SCompHeader; - -typedef struct { - short colId; - short bytes; - int32_t numOfNullPoints; - int32_t type : 8; - int32_t offset : 24; - int32_t len; // data length - int64_t sum; - int64_t max; - int64_t min; - int16_t maxIndex; - int16_t minIndex; - char reserved[20]; -} SField; - -typedef struct { - int64_t last : 1; - int64_t offset : 63; - int32_t algorithm : 8; // compression algorithm can be changed - int32_t numOfPoints : 24; // how many points have been written into this block - int32_t sversion; - int32_t len; // total length of this data block - uint16_t numOfCols; - char reserved[16]; - TSKEY keyFirst; // time stamp for the first point - TSKEY keyLast; // time stamp for the last point -} SCompBlock; - -typedef struct { - SCompBlock *compBlock; - SField * fields; -} SCompBlockFields; - -typedef struct { - uint64_t uid; - int64_t last : 1; - int64_t numOfBlocks : 62; - uint32_t delimiter; // delimiter for recovery - TSCKSUM checksum; - SCompBlock compBlocks[]; // comp block list -} SCompInfo; - -typedef struct { - int64_t tempHeadOffset; - int64_t compInfoOffset; - int64_t oldCompBlockOffset; - - int64_t oldNumOfBlocks; - int64_t newNumOfBlocks; - int64_t finalNumOfBlocks; - - int64_t oldCompBlockLen; - int64_t newCompBlockLen; - int64_t finalCompBlockLen; - - int64_t committedPoints; - int commitSlot; - int32_t last : 1; - int32_t changed : 1; - int32_t commitPos : 30; - int64_t commitCount; - SCompBlock lastBlock; -} SMeterInfo; - -typedef struct { int64_t totalStorage; } SVnodeHeadInfo; - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODEFILE_H diff --git a/src/vnode/detail/inc/vnodeQueryImpl.h b/src/vnode/detail/inc/vnodeQueryImpl.h deleted file mode 100644 index 9c28af22c7..0000000000 --- a/src/vnode/detail/inc/vnodeQueryImpl.h +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODEQUERYIMPL_H -#define TDENGINE_VNODEQUERYIMPL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "os.h" - -#include "hash.h" -#include "hashfunc.h" - -#define GET_QINFO_ADDR(x) ((char*)(x)-offsetof(SQInfo, query)) -#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0) - -/* - * set the output buffer page size is 16k - * The page size should be sufficient for at least one output result or intermediate result. - * Some intermediate results may be extremely large, such as top/bottom(100) query. - */ -#define DEFAULT_INTERN_BUF_SIZE 16384L - -#define INIT_ALLOCATE_DISK_PAGES 60L -#define DEFAULT_DATA_FILE_MAPPING_PAGES 2L -#define DEFAULT_DATA_FILE_MMAP_WINDOW_SIZE (DEFAULT_DATA_FILE_MAPPING_PAGES * DEFAULT_INTERN_BUF_SIZE) - -#define IO_ENGINE_MMAP 0 -#define IO_ENGINE_SYNC 1 - -#define DEFAULT_IO_ENGINE IO_ENGINE_SYNC - -/** - * check if the primary column is load by default, otherwise, the program will - * forced to load primary column explicitly. - */ -#define PRIMARY_TSCOL_LOADED(query) ((query)->colList[0].data.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) - -typedef enum { - - /* - * the program will call this function again, if this status is set. - * used to transfer from QUERY_RESBUF_FULL - */ - QUERY_NOT_COMPLETED = 0x1u, - - /* - * output buffer is full, so, the next query will be employed, - * in this case, we need to set the appropriated start scan point for - * the next query. - * - * this status is only exist in group-by clause and - * diff/add/division/multiply/ query. - */ - QUERY_RESBUF_FULL = 0x2u, - - /* - * query is over - * 1. this status is used in one row result query process, e.g., - * count/sum/first/last/ - * avg...etc. - * 2. when the query range on timestamp is satisfied, it is also denoted as - * query_compeleted - */ - QUERY_COMPLETED = 0x4u, - - /* - * all data has been scanned, so current search is stopped, - * At last, the function will transfer this status to QUERY_COMPLETED - */ - QUERY_NO_DATA_TO_CHECK = 0x8u, -} vnodeQueryStatus; - -typedef struct SPointInterpoSupporter { - int32_t numOfCols; - char** pPrevPoint; - char** pNextPoint; -} SPointInterpoSupporter; - -typedef struct SBlockInfo { - TSKEY keyFirst; - TSKEY keyLast; - int32_t numOfCols; - int32_t size; -} SBlockInfo; - -typedef struct SMeterDataBlockInfoEx { - SCompBlockFields pBlock; - SMeterDataInfo* pMeterDataInfo; - int32_t blockIndex; - int32_t groupIdx; /* number of group is less than the total number of meters */ -} SMeterDataBlockInfoEx; - -typedef enum { - DISK_DATA_LOAD_FAILED = -0x1, - DISK_DATA_LOADED = 0x0, - DISK_DATA_DISCARDED = 0x01, -} vnodeDiskLoadStatus; - -#define IS_MASTER_SCAN(runtime) (((runtime)->scanFlag & 1u) == MASTER_SCAN) -#define IS_SUPPLEMENT_SCAN(runtime) ((runtime)->scanFlag == SUPPLEMENTARY_SCAN) -#define SET_SUPPLEMENT_SCAN_FLAG(runtime) ((runtime)->scanFlag = SUPPLEMENTARY_SCAN) -#define SET_MASTER_SCAN_FLAG(runtime) ((runtime)->scanFlag = MASTER_SCAN) - -typedef int (*__block_search_fn_t)(char* data, int num, int64_t key, int order); - -static FORCE_INLINE SMeterObj* getMeterObj(void* hashHandle, int32_t sid) { - return *(SMeterObj**)taosHashGet(hashHandle, (const char*)&sid, sizeof(sid)); -} - -bool isQueryKilled(SQuery* pQuery); -bool isFixedOutputQuery(SQuery* pQuery); -bool isPointInterpoQuery(SQuery* pQuery); -bool isSumAvgRateQuery(SQuery *pQuery); -bool isTopBottomQuery(SQuery* pQuery); -bool isFirstLastRowQuery(SQuery* pQuery); -bool isTSCompQuery(SQuery* pQuery); -bool notHasQueryTimeRange(SQuery* pQuery); - -bool needSupplementaryScan(SQuery* pQuery); -bool onDemandLoadDatablock(SQuery* pQuery, int16_t queryRangeSet); - -void setQueryStatus(SQuery* pQuery, int8_t status); - -bool doRevisedResultsByLimit(SQInfo* pQInfo); -void truncateResultByLimit(SQInfo* pQInfo, int64_t* final, int32_t* interpo); - -void initCtxOutputBuf(SQueryRuntimeEnv* pRuntimeEnv); -void resetCtxOutputBuf(SQueryRuntimeEnv* pRuntimeEnv); -void forwardCtxOutputBuf(SQueryRuntimeEnv* pRuntimeEnv, int64_t output); - -bool needPrimaryTimestampCol(SQuery* pQuery, SBlockInfo* pBlockInfo); -void vnodeScanAllData(SQueryRuntimeEnv* pRuntimeEnv); - -int32_t vnodeQueryResultInterpolate(SQInfo* pQInfo, tFilePage** pDst, tFilePage** pDataSrc, int32_t numOfRows, - int32_t* numOfInterpo); -void copyResToQueryResultBuf(STableQuerySupportObj* pSupporter, SQuery* pQuery); - -void doSkipResults(SQueryRuntimeEnv* pRuntimeEnv); -void doFinalizeResult(SQueryRuntimeEnv* pRuntimeEnv); -int64_t getNumOfResult(SQueryRuntimeEnv* pRuntimeEnv); - -void forwardQueryStartPosition(SQueryRuntimeEnv* pRuntimeEnv); - -bool normalizedFirstQueryRange(bool dataInDisk, bool dataInCache, STableQuerySupportObj* pSupporter, - SPointInterpoSupporter* pPointInterpSupporter, int64_t* key); - -void pointInterpSupporterInit(SQuery* pQuery, SPointInterpoSupporter* pInterpoSupport); -void pointInterpSupporterDestroy(SPointInterpoSupporter* pPointInterpSupport); -void pointInterpSupporterSetData(SQInfo* pQInfo, SPointInterpoSupporter* pPointInterpSupport); - -int64_t loadRequiredBlockIntoMem(SQueryRuntimeEnv* pRuntimeEnv, SPositionInfo* position); -void disableFunctForSuppleScan(STableQuerySupportObj* pSupporter, int32_t order); -void enableFunctForMasterScan(SQueryRuntimeEnv* pRuntimeEnv, int32_t order); - -int32_t mergeMetersResultToOneGroups(STableQuerySupportObj* pSupporter); -void copyFromWindowResToSData(SQInfo* pQInfo, SWindowResult* result); - -SBlockInfo getBlockInfo(SQueryRuntimeEnv *pRuntimeEnv); -SBlockInfo getBlockBasicInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pBlock, int32_t type); - -SCacheBlock* getCacheDataBlock(SMeterObj* pMeterObj, SQueryRuntimeEnv* pRuntimeEnv, int32_t slot); - -void stableApplyFunctionsOnBlock(STableQuerySupportObj* pSupporter, SMeterDataInfo* pMeterDataInfo, - SBlockInfo* pBlockInfo, SField* pFields, __block_search_fn_t searchFn); - -int32_t vnodeFilterQualifiedMeters(SQInfo* pQInfo, int32_t vid, tSidSet* pSidSet, SMeterDataInfo* pMeterDataInfo, - int32_t* numOfMeters, SMeterDataInfo*** pReqMeterDataInfo); -int32_t vnodeGetVnodeHeaderFileIndex(int32_t* fid, SQueryRuntimeEnv* pRuntimeEnv, int32_t order); - -int32_t createDataBlocksInfoEx(SMeterDataInfo** pMeterDataInfo, int32_t numOfMeters, - SMeterDataBlockInfoEx** pDataBlockInfoEx, int32_t numOfCompBlocks, - int32_t* nAllocBlocksInfoSize, int64_t addr); -void freeMeterBlockInfoEx(SMeterDataBlockInfoEx* pDataBlockInfoEx, int32_t len); - -void setExecutionContext(STableQuerySupportObj* pSupporter, SMeterQueryInfo* pMeterQueryInfo, int32_t meterIdx, - int32_t groupIdx, TSKEY nextKey); -int32_t setAdditionalInfo(STableQuerySupportObj *pSupporter, int32_t meterIdx, SMeterQueryInfo *pMeterQueryInfo); -void doGetAlignedIntervalQueryRangeImpl(SQuery* pQuery, int64_t pKey, int64_t keyFirst, int64_t keyLast, - int64_t* actualSkey, int64_t* actualEkey, int64_t* skey, int64_t* ekey); - -int64_t getQueryStartPositionInCache(SQueryRuntimeEnv* pRuntimeEnv, int32_t* slot, int32_t* pos, bool ignoreQueryRange); - -int32_t getDataBlocksForMeters(STableQuerySupportObj* pSupporter, SQuery* pQuery, int32_t numOfMeters, - const char* filePath, SMeterDataInfo** pMeterDataInfo, uint32_t* numOfBlocks); -int32_t LoadDatablockOnDemand(SCompBlock* pBlock, SField** pFields, uint8_t* blkStatus, SQueryRuntimeEnv* pRuntimeEnv, - int32_t fileIdx, int32_t slotIdx, __block_search_fn_t searchFn, bool onDemand); -int32_t vnodeGetHeaderFile(SQueryRuntimeEnv* pRuntimeEnv, int32_t fileIndex); - -/** - * Create SMeterQueryInfo. - * The MeterQueryInfo is created one for each table during super table query - * - * @param skey - * @param ekey - * @return - */ -SMeterQueryInfo* createMeterQueryInfo(STableQuerySupportObj* pSupporter, int32_t sid, TSKEY skey, TSKEY ekey); - -/** - * Destroy meter query info - * @param pMeterQInfo - * @param numOfCols - */ -void destroyMeterQueryInfo(SMeterQueryInfo* pMeterQueryInfo, int32_t numOfCols); - -/** - * change the meter query info for supplement scan - * @param pMeterQueryInfo - * @param skey - * @param ekey - */ -void changeMeterQueryInfoForSuppleQuery(SQuery* pQuery, SMeterQueryInfo* pMeterQueryInfo, - TSKEY skey, TSKEY ekey); - -/** - * add the new allocated disk page to meter query info - * the new allocated disk page is used to keep the intermediate (interval) results - * @param pQuery - * @param pMeterQueryInfo - * @param pSupporter - */ -tFilePage* addDataPageForMeterQueryInfo(SQuery* pQuery, SMeterQueryInfo* pMeterQueryInfo, - STableQuerySupportObj* pSupporter); - -/** - * restore the query range data from SMeterQueryInfo to runtime environment - * - * @param pRuntimeEnv - * @param pMeterQueryInfo - */ -void restoreIntervalQueryRange(SQueryRuntimeEnv* pRuntimeEnv, SMeterQueryInfo* pMeterQueryInfo); - -/** - * set the interval query range for the interval query, when handling a data(cache) block - * - * @param pMeterQueryInfo - * @param pSupporter - * @param key - */ -void setIntervalQueryRange(SMeterQueryInfo* pMeterQueryInfo, STableQuerySupportObj* pSupporter, int64_t key); - -/** - * set the meter data information - * @param pMeterDataInfo - * @param pMeterObj current query meter object - * @param meterIdx meter index in the sid list - * @param groupId group index, which the meter is belonged to - */ -void setMeterDataInfo(SMeterDataInfo* pMeterDataInfo, SMeterObj* pMeterObj, int32_t meterIdx, int32_t groupId); - -void vnodeSetTagValueInParam(tSidSet* pSidSet, SQueryRuntimeEnv* pRuntimeEnv, SMeterSidExtInfo* pMeterInfo); - -void vnodeCheckIfDataExists(SQueryRuntimeEnv* pRuntimeEnv, SMeterObj* pMeterObj, bool* dataInDisk, bool* dataInCache); - -void displayInterResult(SData** pdata, SQuery* pQuery, int32_t numOfRows); - -void vnodePrintQueryStatistics(STableQuerySupportObj* pSupporter); - -void clearTimeWindowResBuf(SQueryRuntimeEnv* pRuntimeEnv, SWindowResult* pOneOutputRes); -void copyTimeWindowResBuf(SQueryRuntimeEnv* pRuntimeEnv, SWindowResult* dst, const SWindowResult* src); - -int32_t initWindowResInfo(SWindowResInfo* pWindowResInfo, SQueryRuntimeEnv* pRuntimeEnv, int32_t size, - int32_t threshold, int16_t type); - -void cleanupTimeWindowInfo(SWindowResInfo* pWindowResInfo, SQueryRuntimeEnv* pRuntimeEnv); -void resetTimeWindowInfo(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo* pWindowResInfo); -void clearFirstNTimeWindow(SQueryRuntimeEnv *pRuntimeEnv, int32_t num); - -void clearClosedTimeWindow(SQueryRuntimeEnv* pRuntimeEnv); -int32_t numOfClosedTimeWindow(SWindowResInfo* pWindowResInfo); -void closeTimeWindow(SWindowResInfo* pWindowResInfo, int32_t slot); -void closeAllTimeWindow(SWindowResInfo* pWindowResInfo); - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODEQUERYIMPL_H diff --git a/src/vnode/detail/inc/vnodeRead.h b/src/vnode/detail/inc/vnodeRead.h deleted file mode 100644 index 4e6e04208d..0000000000 --- a/src/vnode/detail/inc/vnodeRead.h +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODEREAD_H -#define TDENGINE_VNODEREAD_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "os.h" -#include "qresultBuf.h" - -#include "qinterpolation.h" -#include "vnodeTagMgmt.h" - -/* - * use to keep the first point position, consisting of position in blk and block - * id, file id - */ -typedef struct { - int32_t pos; - int32_t slot; - int32_t fileId; -} SPositionInfo; - -typedef struct SLoadDataBlockInfo { - int32_t fileListIndex; /* index of this file in files list of this vnode */ - int32_t fileId; - int32_t slotIdx; - int32_t sid; - bool tsLoaded; // if timestamp column of current block is loaded or not -} SLoadDataBlockInfo; - -typedef struct SLoadCompBlockInfo { - int32_t sid; /* meter sid */ - int32_t fileId; - int32_t fileListIndex; -} SLoadCompBlockInfo; - -/* - * the header file info for one vnode - */ -typedef struct SHeaderFileInfo { - int32_t fileID; // file id -} SHeaderFileInfo; - -typedef struct SQueryCostSummary { - double cacheTimeUs; - double fileTimeUs; - - int64_t numOfFiles; // opened files during query - int64_t numOfTables; // num of queries tables - int64_t numOfSeek; // number of seek operation - - int64_t readDiskBlocks; // accessed disk block - int64_t skippedFileBlocks; // skipped blocks - int64_t blocksInCache; // accessed cache blocks - - int64_t readField; // field size - int64_t totalFieldSize; // total read fields size - double loadFieldUs; // total elapsed time to read fields info - - int64_t totalBlockSize; // read data blocks - double loadBlocksUs; // total elapsed time to read data blocks - - int64_t totalGenData; // in-memory generated data - - int64_t readCompInfo; // read compblock info - int64_t totalCompInfoSize; // total comp block size - double loadCompInfoUs; // total elapsed time to read comp block info - - int64_t tmpBufferInDisk; // size of buffer for intermediate result -} SQueryCostSummary; - -typedef struct SPosInfo { - int16_t pageId; - int16_t rowId; -} SPosInfo; - -typedef struct STimeWindow { - TSKEY skey; - TSKEY ekey; -} STimeWindow; - -typedef struct SWindowStatus { - bool closed; -} SWindowStatus; - -typedef struct SWindowResult { - uint16_t numOfRows; - SPosInfo pos; // Position of current result in disk-based output buffer - SResultInfo* resultInfo; // For each result column, there is a resultInfo - STimeWindow window; // The time window that current result covers. - SWindowStatus status; -} SWindowResult; - -/* - * header files info, avoid to iterate the directory, the data is acquired - * during in query preparation function - */ -typedef struct SQueryFilesInfo { - SHeaderFileInfo* pFileInfo; - uint32_t numOfFiles; // the total available number of files for this virtual node during query execution - int32_t current; // the memory mapped header file, NOTE: only one header file can be mmap. - int32_t vnodeId; - - int32_t headerFd; // header file fd - int64_t headerFileSize; - int32_t dataFd; - int32_t lastFd; - - char headerFilePath[PATH_MAX]; // current opened header file name - char dataFilePath[PATH_MAX]; // current opened data file name - char lastFilePath[PATH_MAX]; // current opened last file path - char dbFilePathPrefix[PATH_MAX]; -} SQueryFilesInfo; - -typedef struct SWindowResInfo { - SWindowResult* pResult; // reference to SQuerySupporter->pResult - void* hashList; // hash list for quick access - int16_t type; // data type for hash key - int32_t capacity; // max capacity - int32_t curIndex; // current start active index - int32_t size; - - int64_t startTime; // start time of the first time window for sliding query - int64_t prevSKey; // previous (not completed) sliding window start key - int64_t threshold; // threshold for return completed results. -} SWindowResInfo; - -typedef struct SQueryRuntimeEnv { - SPositionInfo startPos; /* the start position, used for secondary/third iteration */ - SPositionInfo endPos; /* the last access position in query, served as the start pos of reversed order query */ - SPositionInfo nextPos; /* start position of the next scan */ - SData* colDataBuffer[TSDB_MAX_COLUMNS]; - SResultInfo* resultInfo; // todo refactor to merge with SWindowResInfo - uint8_t blockStatus; // Indicate if data block is loaded, the block is first/last/internal block - int32_t unzipBufSize; - SData* primaryColBuffer; - char* unzipBuffer; - char* secondaryUnzipBuffer; - SQuery* pQuery; - SMeterObj* pMeterObj; - SQLFunctionCtx* pCtx; - SLoadDataBlockInfo loadBlockInfo; /* record current block load information */ - SLoadCompBlockInfo loadCompBlockInfo; /* record current compblock information in SQuery */ - SQueryFilesInfo vnodeFileInfo; - int16_t numOfRowsPerPage; - int16_t offset[TSDB_MAX_COLUMNS]; - uint16_t scanFlag; // denotes reversed scan of data or not - SInterpolationInfo interpoInfo; - SData** pInterpoBuf; - - SWindowResInfo windowResInfo; - - STSBuf* pTSBuf; - STSCursor cur; - SQueryCostSummary summary; - bool stableQuery; // is super table query or not - SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file - - /* - * Temporarily hold the in-memory cache block info during scan cache blocks - * Here we do not use the cache block info from pMeterObj, simple because it may change anytime - * during the query by the submit/insert handling threads. - * So we keep a copy of the support structure as well as the cache block data itself. - */ - SCacheBlock cacheBlock; -} SQueryRuntimeEnv; - -/* intermediate pos during multimeter query involves interval */ -typedef struct SMeterQueryInfo { - int64_t lastKey; - int64_t skey; - int64_t ekey; - int32_t numOfRes; - int16_t queryRangeSet; // denote if the query range is set, only available for interval query - int64_t tag; - STSCursor cur; - int32_t sid; // for retrieve the page id list - - SWindowResInfo windowResInfo; -} SMeterQueryInfo; - -typedef struct SMeterDataInfo { - uint64_t offsetInHeaderFile; - int32_t numOfBlocks; - int32_t start; // start block index - SCompBlock* pBlock; - int32_t meterOrderIdx; - SMeterObj* pMeterObj; - int32_t groupIdx; // group id in meter list - SMeterQueryInfo* pMeterQInfo; -} SMeterDataInfo; - -typedef struct STableQuerySupportObj { - void* pMetersHashTable; // meter table hash list - - SMeterSidExtInfo** pMeterSidExtInfo; - int32_t numOfMeters; - - /* - * multimeter query resultset. - * In multimeter queries, the result is temporarily stored on this structure, instead of - * directly put result into output buffer, since we have no idea how many number of - * rows may be generated by a specific subgroup. When query on all subgroups is executed, - * the result is copy to output buffer. This attribution is not used during single meter query processing. - */ - SQueryRuntimeEnv runtimeEnv; - int64_t rawSKey; - int64_t rawEKey; - int32_t subgroupIdx; - int32_t offset; /* offset in group result set of subgroup */ - tSidSet* pSidSet; - - /* - * the query is executed position on which meter of the whole list. - * when the index reaches the last one of the list, it means the query is completed. - * We later may refactor to remove this attribution by using another flag to denote - * whether a multimeter query is completed or not. - */ - int32_t meterIdx; - - int32_t numOfGroupResultPages; - int32_t groupResultSize; - SMeterDataInfo* pMeterDataInfo; - - TSKEY* tsList; -} STableQuerySupportObj; - -typedef struct _qinfo { - uint64_t signature; - int32_t refCount; // QInfo reference count, when the value is 0, it can be released safely - char user[TSDB_TABLE_ID_LEN + 1]; - char sql[TSDB_SHOW_SQL_LEN]; - uint8_t stream; - uint16_t port; - uint32_t ip; - uint64_t startTime; - int64_t useconds; - int killed; - struct _qinfo *prev, *next; - SQuery query; - int totalPoints; - int pointsRead; - int pointsReturned; - int pointsInterpo; - int code; - char bufIndex; - char changed; - char over; - SMeterObj* pObj; - sem_t dataReady; - - STableQuerySupportObj* pTableQuerySupporter; - int (*fp)(SMeterObj*, SQuery*); -} SQInfo; - -int32_t vnodeQueryTablePrepare(SQInfo* pQInfo, SMeterObj* pMeterObj, STableQuerySupportObj* pSMultiMeterObj, - void* param); - -void vnodeQueryFreeQInfoEx(SQInfo* pQInfo); - -bool vnodeParametersSafetyCheck(SQuery* pQuery); - -int32_t vnodeSTableQueryPrepare(SQInfo* pQInfo, SQuery* pQuery, void* param); - -/** - * decrease the numofQuery of each table that is queried, enable the - * remove/close operation can be executed - * @param pQInfo - */ -void vnodeDecMeterRefcnt(SQInfo* pQInfo); - -/* sql query handle in dnode */ -void vnodeSingleTableQuery(SSchedMsg* pMsg); - -/* - * handle multi-meter query process - */ -void vnodeMultiMeterQuery(SSchedMsg* pMsg); - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODEREAD_H diff --git a/src/vnode/detail/inc/vnodeShell.h b/src/vnode/detail/inc/vnodeShell.h deleted file mode 100644 index b03634ca8f..0000000000 --- a/src/vnode/detail/inc/vnodeShell.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODESHELL_H -#define TDENGINE_VNODESHELL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "os.h" - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODESHELL_H diff --git a/src/vnode/detail/inc/vnodeStore.h b/src/vnode/detail/inc/vnodeStore.h deleted file mode 100644 index 638bcb54bb..0000000000 --- a/src/vnode/detail/inc/vnodeStore.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODESTORE_H -#define TDENGINE_VNODESTORE_H - -#ifdef __cplusplus -extern "C" { -#endif - -void vnodeProcessDataFromVnode(SIntMsg *msg, void *tcpHandle); - -void vnodeCalcOpenVnodes(); - -bool vnodeRemoveDataFileFromLinkFile(char* linkFile, char* de_name); - -int vnodeInitInfo(); - -#ifdef __cplusplus -} -#endif - -#endif // TDEGINE_VNODESTORE_H diff --git a/src/vnode/detail/inc/vnodeSupertableQuery.h b/src/vnode/detail/inc/vnodeSupertableQuery.h deleted file mode 100644 index cc2d21871c..0000000000 --- a/src/vnode/detail/inc/vnodeSupertableQuery.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TBASE_MNODE_SUPER_TABLE_QUERY_H -#define TBASE_MNODE_SUPER_TABLE_QUERY_H - -#include "os.h" -#include "mnode.h" -#include "qast.h" - -int32_t mgmtDoJoin(SSuperTableMetaMsg* pSuperTableMetaMsg, tQueryResultset* pRes); -void mgmtReorganizeMetersInMetricMeta(SSuperTableMetaMsg* pInfo, int32_t index, tQueryResultset* pRes); - - -#endif diff --git a/src/vnode/detail/inc/vnodeSystem.h b/src/vnode/detail/inc/vnodeSystem.h deleted file mode 100644 index c951be0b26..0000000000 --- a/src/vnode/detail/inc/vnodeSystem.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODESYSTEM_H -#define TDENGINE_VNODESYSTEM_H - -#ifdef __cplusplus -extern "C" { -#endif - - - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODESYSTEM_H diff --git a/src/vnode/detail/inc/vnodeTagMgmt.h b/src/vnode/detail/inc/vnodeTagMgmt.h deleted file mode 100644 index 88eb7ccb6e..0000000000 --- a/src/vnode/detail/inc/vnodeTagMgmt.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODETAGMGMT_H -#define TDENGINE_VNODETAGMGMT_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * @version 0.1 - - * @date 2018/01/02 - * @author liaohj - * management of the tag value of tables - * in query, client need the vnode to aggregate results according to tags - * values, - * the grouping operation is done here. - * Note: - * 1. we implement a quick sort algorithm, may remove it later. - */ - -typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, void *param); - -tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOfMeters, SSchema *pSchema, - int32_t numOfTags, SColIndexEx *colList, int32_t numOfOrderCols); - -int32_t *calculateSubGroup(void **pSids, int32_t numOfMeters, int32_t *numOfSubset, tOrderDescriptor *pOrderDesc, - __ext_compar_fn_t compareFn); - -void tSidSetDestroy(tSidSet **pSets); - -void tSidSetSort(tSidSet *pSets); - -int32_t meterSidComparator(const void *s1, const void *s2, void *param); - -int32_t doCompare(char *f1, char *f2, int32_t type, int32_t size); - -void tQSortEx(void **pMeterSids, size_t size, int32_t start, int32_t end, void *param, __ext_compar_fn_t compareFn); - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODETAGMGMT_H diff --git a/src/vnode/detail/inc/vnodeUtil.h b/src/vnode/detail/inc/vnodeUtil.h deleted file mode 100644 index b0f573ba2d..0000000000 --- a/src/vnode/detail/inc/vnodeUtil.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODE_UTIL_H -#define TDENGINE_VNODE_UTIL_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* get the qinfo struct address from the query struct address */ -#define GET_COLUMN_BYTES(query, colidx) \ - ((query)->colList[(query)->pSelectExpr[colidx].pBase.colInfo.colIdxInBuf].data.bytes) -#define GET_COLUMN_TYPE(query, colidx) \ - ((query)->colList[(query)->pSelectExpr[colidx].pBase.colInfo.colIdxInBuf].data.type) - -#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP) -#define EXTRA_BYTES 2 // for possible compression deflation - -#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index)*(step)) - -int vnodeGetEid(int days); - -int vnodeCheckFileIntegrity(FILE *fp); - -void vnodeCreateFileHeader(FILE *fp); - -void vnodeCreateFileHeaderFd(int fd); - -void vnodeGetHeadFileHeaderInfo(int fd, SVnodeHeadInfo *pHeadInfo); - -void vnodeUpdateHeadFileHeader(int fd, SVnodeHeadInfo *pHeadInfo); - -/** - * check if two schema is identical or not - * This function does not check if a schema is valid or not - * - * @param pSSchemaFirst - * @param numOfCols1 - * @param pSSchemaSecond - * @param numOfCols2 - * @return - */ -bool vnodeMeterSchemaIdentical(SColumn *pSchema1, int32_t numOfCols1, SColumn *pSchema2, int32_t numOfCols2); - -/** - * free SFields in SQuery - * vnodeFreeFields must be called before free(pQuery->pBlock); - * @param pQuery - */ -void vnodeFreeFields(SQuery *pQuery); - -void vnodeUpdateFilterColumnIndex(SQuery* pQuery); -void vnodeUpdateQueryColumnIndex(SQuery* pQuery, SMeterObj* pMeterObj); - -int32_t vnodeCreateFilterInfo(void* pQInfo, SQuery *pQuery); - -bool vnodeFilterData(SQuery* pQuery, int32_t* numOfActualRead, int32_t index); -bool vnodeDoFilterData(SQuery* pQuery, int32_t elemPos); - -bool vnodeIsProjectionQuery(SSqlFunctionExpr *pExpr, int32_t numOfOutput); - -int32_t vnodeIncQueryRefCount(SQueryMeterMsg *pQueryMsg, SMeterSidExtInfo **pSids, SMeterObj **pMeterObjList, - int32_t *numOfInc); - -void vnodeDecQueryRefCount(SQueryMeterMsg *pQueryMsg, SMeterObj **pMeterObjList, int32_t numOfInc); - -int32_t vnodeSetMeterState(SMeterObj* pMeterObj, int32_t state); -void vnodeClearMeterState(SMeterObj* pMeterObj, int32_t state); -bool vnodeIsMeterState(SMeterObj* pMeterObj, int32_t state); -void vnodeSetMeterDeleting(SMeterObj* pMeterObj); -int32_t vnodeSetMeterInsertImportStateEx(SMeterObj* pObj, int32_t st); - -bool vnodeIsSafeToDeleteMeter(SVnodeObj* pVnode, int32_t sid); -void vnodeFreeColumnInfo(SColumnInfo* pColumnInfo); -bool isGroupbyNormalCol(SSqlGroupbyExpr* pExpr); - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_VNODE_UTIL_H diff --git a/src/vnode/detail/src/vnodeCache.c b/src/vnode/detail/src/vnodeCache.c deleted file mode 100644 index 9f078b09ff..0000000000 --- a/src/vnode/detail/src/vnodeCache.c +++ /dev/null @@ -1,1165 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "taosmsg.h" -#include "vnode.h" -#include "vnodeCache.h" -#include "vnodeUtil.h" -#include "vnodeStatus.h" - -void vnodeSearchPointInCache(SMeterObj *pObj, SQuery *pQuery); -void vnodeProcessCommitTimer(void *param, void *tmrId); - -void *vnodeOpenCachePool(int vnode) { - SCachePool *pCachePool; - SVnodeCfg * pCfg = &vnodeList[vnode].cfg; - int blockId = 0; - char * pMem = NULL; - - pCachePool = (SCachePool *)malloc(sizeof(SCachePool)); - if (pCachePool == NULL) { - dError("no memory to allocate cache pool!"); - return NULL; - } - - memset(pCachePool, 0, sizeof(SCachePool)); - pCachePool->count = 1; - pCachePool->vnode = vnode; - - pthread_mutex_init(&(pCachePool->vmutex), NULL); - - size_t size = sizeof(char *) * pCfg->cacheNumOfBlocks.totalBlocks; - pCachePool->pMem = malloc(size); - if (pCachePool->pMem == NULL) { - dError("no memory to allocate cache blocks!"); - pthread_mutex_destroy(&(pCachePool->vmutex)); - tfree(pCachePool); - return NULL; - } - - memset(pCachePool->pMem, 0, size); - pCachePool->threshold = pCfg->cacheNumOfBlocks.totalBlocks * 0.6; - - int maxAllocBlock = (1024 * 1024 * 1024) / pCfg->cacheBlockSize; - if (maxAllocBlock < 1) { - dError("Cache block size is too large"); - pthread_mutex_destroy(&(pCachePool->vmutex)); - tfree(pCachePool->pMem); - tfree(pCachePool); - return NULL; - } - while (blockId < pCfg->cacheNumOfBlocks.totalBlocks) { - // TODO : Allocate real blocks - int allocBlocks = MIN(pCfg->cacheNumOfBlocks.totalBlocks - blockId, maxAllocBlock); - pMem = calloc(allocBlocks, pCfg->cacheBlockSize); - if (pMem == NULL) { - dError("failed to allocate cache memory: %d", allocBlocks*pCfg->cacheBlockSize); - goto _err_exit; - } - - for (int i = 0; i < allocBlocks; i++) { - pCachePool->pMem[blockId] = pMem + i * pCfg->cacheBlockSize; - blockId++; - } - } - - dPrint("vid:%d, cache pool is allocated:0x%x", vnode, pCachePool); - - return pCachePool; - -_err_exit: - pthread_mutex_destroy(&(pCachePool->vmutex)); - // TODO : Free the cache blocks and return - blockId = 0; - while (blockId < pCfg->cacheNumOfBlocks.totalBlocks) { - tfree(pCachePool->pMem[blockId]); - blockId = blockId + (MIN(maxAllocBlock, pCfg->cacheNumOfBlocks.totalBlocks - blockId)); - } - tfree(pCachePool->pMem); - tfree(pCachePool); - return NULL; -} - -void vnodeCloseCachePool(int vnode) { - SVnodeObj * pVnode = vnodeList + vnode; - SCachePool *pCachePool = (SCachePool *)pVnode->pCachePool; - int blockId = 0; - - taosTmrStopA(&pVnode->commitTimer); - if (pVnode->commitInProcess) pthread_cancel(pVnode->commitThread); - - dPrint("vid:%d, cache pool closed, count:%d", vnode, pCachePool->count); - - int maxAllocBlock = (1024 * 1024 * 1024) / pVnode->cfg.cacheBlockSize; - while (blockId < pVnode->cfg.cacheNumOfBlocks.totalBlocks) { - tfree(pCachePool->pMem[blockId]); - blockId = blockId + (MIN(maxAllocBlock, pVnode->cfg.cacheNumOfBlocks.totalBlocks - blockId)); - } - tfree(pCachePool->pMem); - pthread_mutex_destroy(&(pCachePool->vmutex)); - tfree(pCachePool); - pVnode->pCachePool = NULL; -} - -void *vnodeAllocateCacheInfo(SMeterObj *pObj) { - SCacheInfo *pInfo; - size_t size; - SVnodeCfg * pCfg = &vnodeList[pObj->vnode].cfg; - - size = sizeof(SCacheInfo); - pInfo = (SCacheInfo *)malloc(size); - if (pInfo == NULL) { - dError("id:%s, no memory for cacheInfo", pObj->meterId); - return NULL; - } - memset(pInfo, 0, size); - pInfo->maxBlocks = vnodeList[pObj->vnode].cfg.blocksPerMeter; - size = sizeof(SCacheBlock *) * pInfo->maxBlocks; - pInfo->cacheBlocks = (SCacheBlock **)malloc(size); - if (pInfo->cacheBlocks == NULL) { - dError("id:%s, no memory for cacheBlocks", pObj->meterId); - tfree(pInfo); - return NULL; - } - memset(pInfo->cacheBlocks, 0, size); - pInfo->currentSlot = -1; - - pObj->pointsPerBlock = - (pCfg->cacheBlockSize - sizeof(SCacheBlock) - pObj->numOfColumns * sizeof(char *)) / pObj->bytesPerPoint; - if (pObj->pointsPerBlock > pObj->pointsPerFileBlock) pObj->pointsPerBlock = pObj->pointsPerFileBlock; - pObj->pCache = (void *)pInfo; - - pObj->freePoints = pObj->pointsPerBlock * pInfo->maxBlocks; - - return (void *)pInfo; -} - -int vnodeFreeCacheBlock(SCacheBlock *pCacheBlock) { - SMeterObj * pObj; - SCacheInfo *pInfo; - - if (pCacheBlock == NULL) return -1; - - pObj = pCacheBlock->pMeterObj; - pInfo = (SCacheInfo *)pObj->pCache; - - if (pObj) { - pInfo->numOfBlocks--; - - if (pInfo->numOfBlocks < 0) { - dError("vid:%d sid:%d id:%s, numOfBlocks:%d shall never be negative", pObj->vnode, pObj->sid, pObj->meterId, - pInfo->numOfBlocks); - } - - if (pCacheBlock->blockId == 0) { - dError("vid:%d sid:%d id:%s, double free", pObj->vnode, pObj->sid, pObj->meterId); - } - - SCachePool *pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool; - if (pCacheBlock->notFree) { - pPool->notFreeSlots--; - pInfo->unCommittedBlocks--; - dTrace("vid:%d sid:%d id:%s, cache block is not free, slot:%d, index:%d notFreeSlots:%d", - pObj->vnode, pObj->sid, pObj->meterId, pCacheBlock->slot, pCacheBlock->index, pPool->notFreeSlots); - } - - dTrace("vid:%d sid:%d id:%s, free a cache block, numOfBlocks:%d, slot:%d, index:%d notFreeSlots:%d", - pObj->vnode, pObj->sid, pObj->meterId, pInfo->numOfBlocks, pCacheBlock->slot, pCacheBlock->index, - pPool->notFreeSlots); - - memset(pCacheBlock, 0, sizeof(SCacheBlock)); - - } else { - dError("BUG, pObj is null"); - } - - return 0; -} - -void vnodeFreeCacheInfo(SMeterObj *pObj) { - SCacheInfo * pInfo; - SCacheBlock *pCacheBlock; - SCachePool * pPool; - int slot, numOfBlocks; - - if (pObj == NULL || pObj->pCache == NULL) return; - - pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool; - pInfo = (SCacheInfo *)pObj->pCache; - if (pPool == NULL || pInfo == NULL) return; - - pthread_mutex_lock(&pPool->vmutex); - numOfBlocks = pInfo->numOfBlocks; - slot = pInfo->currentSlot; - - for (int i = 0; i < numOfBlocks; ++i) { - pCacheBlock = pInfo->cacheBlocks[slot]; - vnodeFreeCacheBlock(pCacheBlock); - slot = (slot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - } - - pObj->pCache = NULL; - tfree(pInfo->cacheBlocks); - tfree(pInfo); - pthread_mutex_unlock(&pPool->vmutex); -} - -uint64_t vnodeGetPoolCount(SVnodeObj *pVnode) { - SCachePool *pPool; - - pPool = (SCachePool *)pVnode->pCachePool; - - return pPool->count; -} - -void vnodeUpdateCommitInfo(SMeterObj *pObj, int slot, int pos, uint64_t count) { - SCacheInfo * pInfo; - SCacheBlock *pBlock; - SCachePool * pPool; - - pInfo = (SCacheInfo *)pObj->pCache; - pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool; - - int tslot = - (pInfo->commitPoint == pObj->pointsPerBlock) ? (pInfo->commitSlot + 1) % pInfo->maxBlocks : pInfo->commitSlot; - int slots = 0; - - while (tslot != slot || ((tslot == slot) && (pos == pObj->pointsPerBlock))) { - slots++; - pthread_mutex_lock(&pPool->vmutex); - pBlock = pInfo->cacheBlocks[tslot]; - assert(pBlock->notFree); - pBlock->notFree = 0; - pInfo->unCommittedBlocks--; - pPool->notFreeSlots--; - pthread_mutex_unlock(&pPool->vmutex); - - dTrace("vid:%d sid:%d id:%s, cache block is committed, slot:%d, index:%d notFreeSlots:%d, unCommittedBlocks:%d", - pObj->vnode, pObj->sid, pObj->meterId, pBlock->slot, pBlock->index, pPool->notFreeSlots, - pInfo->unCommittedBlocks); - if (tslot == slot) break; - tslot = (tslot + 1) % pInfo->maxBlocks; - } - - atomic_fetch_add_32(&pObj->freePoints, pObj->pointsPerBlock * slots); - pInfo->commitSlot = slot; - pInfo->commitPoint = pos; - pObj->commitCount = count; -} - -TSKEY vnodeGetFirstKey(int vnode) { - SMeterObj * pObj; - SCacheInfo * pInfo; - SCacheBlock *pCacheBlock; - - SVnodeCfg *pCfg = &vnodeList[vnode].cfg; - TSKEY key = taosGetTimestamp(pCfg->precision); - - for (int sid = 0; sid < pCfg->maxSessions; ++sid) { - pObj = vnodeList[vnode].meterList[sid]; - if (pObj == NULL || pObj->pCache == NULL) continue; - - pInfo = (SCacheInfo *)pObj->pCache; - pCacheBlock = pInfo->cacheBlocks[0]; - - if (pCacheBlock == NULL || pCacheBlock->numOfPoints <= 0) continue; - - if (*((TSKEY *)(pCacheBlock->offset[0])) < key) key = *((TSKEY *)(pCacheBlock->offset[0])); - } - - return key; -} - -pthread_t vnodeCreateCommitThread(SVnodeObj *pVnode) { - // this function has to mutex locked before it is called - - pthread_attr_t thattr; - SCachePool * pPool = (SCachePool *)pVnode->pCachePool; - - if (pPool->commitInProcess) { - dTrace("vid:%d, commit is already in process", pVnode->vnode); - return pVnode->commitThread; - } - - taosTmrStopA(&pVnode->commitTimer); - - if (pVnode->vnodeStatus == TSDB_VN_STATUS_UNSYNCED) { - taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer); - dTrace("vid:%d, it is in unsyc state, commit later", pVnode->vnode); - return pVnode->commitThread; - } - - pthread_attr_init(&thattr); - pthread_attr_setdetachstate(&thattr, PTHREAD_CREATE_DETACHED); - if (pthread_create(&(pVnode->commitThread), &thattr, vnodeCommitToFile, pVnode) != 0) { - dError("vid:%d, failed to create thread to commit file, reason:%s", pVnode->vnode, strerror(errno)); - } else { - pPool->commitInProcess = 1; - dTrace("vid:%d, commit thread: 0x%lx is created", pVnode->vnode, pVnode->commitThread); - } - - pthread_attr_destroy(&thattr); - - return pVnode->commitThread; -} - -void vnodeProcessCommitTimer(void *param, void *tmrId) { - SVnodeObj * pVnode = (SVnodeObj *)param; - SCachePool *pPool = (SCachePool *)pVnode->pCachePool; - - pthread_mutex_lock(&pPool->vmutex); - - vnodeCreateCommitThread(pVnode); - - pthread_mutex_unlock(&pPool->vmutex); -} - -void vnodeCommitOver(SVnodeObj *pVnode) { - SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); - - taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer); - - pthread_mutex_lock(&pPool->vmutex); - - pPool->commitInProcess = 0; - dTrace("vid:%d, commit is over, notFreeSlots:%d", pPool->vnode, pPool->notFreeSlots); - - pthread_mutex_unlock(&pPool->vmutex); -} - -static void vnodeWaitForCommitComplete(SVnodeObj *pVnode) { - SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); - - // wait for 100s at most - const int32_t totalCount = 1000; - int32_t count = 0; - - // all meter is marked as dropped, so the commit will abort very quickly - while(count++ < totalCount) { - int32_t commitInProcess = 0; - - pthread_mutex_lock(&pPool->vmutex); - commitInProcess = pPool->commitInProcess; - pthread_mutex_unlock(&pPool->vmutex); - - if (commitInProcess) { - dWarn("vid:%d still in commit, wait for completed", pVnode->vnode); - taosMsleep(10); - } - } -} - -void vnodeCancelCommit(SVnodeObj *pVnode) { - SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); - if (pPool == NULL) return; - - vnodeWaitForCommitComplete(pVnode); - taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer); -} - -/* The vnode cache lock should be hold before calling this interface - */ -SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode) { - SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); - SVnodeCfg *pCfg = &(pVnode->cfg); - SCacheBlock *pCacheBlock = NULL; - int skipped = 0; - - while (1) { - pCacheBlock = (SCacheBlock *)(pPool->pMem[((int64_t)pPool->freeSlot)]); - if (pCacheBlock->blockId == 0) break; - - if (pCacheBlock->notFree) { - pPool->freeSlot++; - pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks; - skipped++; - if (skipped > pPool->threshold) { - vnodeCreateCommitThread(pVnode); - pthread_mutex_unlock(&pPool->vmutex); - dError("vid:%d committing process is too slow, notFreeSlots:%d....", pVnode->vnode, pPool->notFreeSlots); - return NULL; - } - } else { - SMeterObj * pRelObj = pCacheBlock->pMeterObj; - SCacheInfo *pRelInfo = (SCacheInfo *)pRelObj->pCache; - int firstSlot = (pRelInfo->currentSlot - pRelInfo->numOfBlocks + 1 + pRelInfo->maxBlocks) % pRelInfo->maxBlocks; - pCacheBlock = pRelInfo->cacheBlocks[firstSlot]; - if (pCacheBlock) { - pPool->freeSlot = pCacheBlock->index; - vnodeFreeCacheBlock(pCacheBlock); - break; - } else { - pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks; - skipped++; - } - } - } - - pCacheBlock = (SCacheBlock *)(pPool->pMem[pPool->freeSlot]); - pCacheBlock->index = pPool->freeSlot; - pCacheBlock->notFree = 1; - pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks; - pPool->notFreeSlots++; - - return pCacheBlock; -} - -int vnodeAllocateCacheBlock(SMeterObj *pObj) { - int index; - SCachePool * pPool; - SCacheBlock *pCacheBlock; - SCacheInfo * pInfo; - SVnodeObj * pVnode; - int commit = 0; - - pVnode = vnodeList + pObj->vnode; - pPool = (SCachePool *)pVnode->pCachePool; - pInfo = (SCacheInfo *)pObj->pCache; - SVnodeCfg *pCfg = &(vnodeList[pObj->vnode].cfg); - - if (pPool == NULL) return -1; - pthread_mutex_lock(&pPool->vmutex); - - if (pInfo == NULL || pInfo->cacheBlocks == NULL) { - pthread_mutex_unlock(&pPool->vmutex); - dError("vid:%d sid:%d id:%s, meter is not there", pObj->vnode, pObj->sid, pObj->meterId); - return -1; - } - - if (pPool->count <= 1) { - if (pVnode->commitTimer == NULL) - pVnode->commitTimer = taosTmrStart(vnodeProcessCommitTimer, pCfg->commitTime * 1000, pVnode, vnodeTmrCtrl); - } - - if (pInfo->unCommittedBlocks >= pInfo->maxBlocks-1) { - vnodeCreateCommitThread(pVnode); - pthread_mutex_unlock(&pPool->vmutex); - dError("vid:%d sid:%d id:%s, all blocks are not committed yet....", pObj->vnode, pObj->sid, pObj->meterId); - return -1; - } - - if ((pCacheBlock = vnodeGetFreeCacheBlock(pVnode)) == NULL) return -1; - - index = pCacheBlock->index; - pCacheBlock->pMeterObj = pObj; - - pCacheBlock->offset[0] = ((char *)(pCacheBlock)) + sizeof(SCacheBlock) + pObj->numOfColumns * sizeof(char *); - for (int col = 1; col < pObj->numOfColumns; ++col) - pCacheBlock->offset[col] = pCacheBlock->offset[col - 1] + pObj->schema[col - 1].bytes * pObj->pointsPerBlock; - - pInfo->numOfBlocks++; - pInfo->blocks++; - pInfo->unCommittedBlocks++; - pInfo->currentSlot = (pInfo->currentSlot + 1) % pInfo->maxBlocks; - pCacheBlock->blockId = pInfo->blocks; - pCacheBlock->slot = pInfo->currentSlot; - if (pInfo->numOfBlocks > pInfo->maxBlocks) { - pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot]; - vnodeFreeCacheBlock(pCacheBlock); - } - - pInfo->cacheBlocks[pInfo->currentSlot] = (SCacheBlock *)(pPool->pMem[(int64_t)index]); - dTrace("vid:%d sid:%d id:%s, allocate a cache block, numOfBlocks:%d, slot:%d, index:%d notFreeSlots:%d blocks:%d", - pObj->vnode, pObj->sid, pObj->meterId, pInfo->numOfBlocks, pInfo->currentSlot, index, pPool->notFreeSlots, - pInfo->blocks); - - if (((pPool->notFreeSlots > pPool->threshold) || (pInfo->unCommittedBlocks >= pInfo->maxBlocks / 2))) { - dTrace("vid:%d sid:%d id:%s, too many unCommitted slots, unCommitted:%d notFreeSlots:%d", - pObj->vnode, pObj->sid, pObj->meterId, pInfo->unCommittedBlocks, pPool->notFreeSlots); - vnodeCreateCommitThread(pVnode); - commit = 1; - } - - pthread_mutex_unlock(&pPool->vmutex); - - return commit; -} - -int vnodeInsertPointToCache(SMeterObj *pObj, char *pData) { - SCacheBlock *pCacheBlock; - SCacheInfo * pInfo; - SCachePool * pPool; - - pInfo = (SCacheInfo *)pObj->pCache; - pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool; - - if (pInfo->numOfBlocks == 0) { - if (vnodeAllocateCacheBlock(pObj) < 0) { - return -1; - } - } - - if (pInfo->currentSlot < 0) return -1; - pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot]; - if (pCacheBlock->numOfPoints >= pObj->pointsPerBlock) { - if (vnodeAllocateCacheBlock(pObj) < 0) return -1; - pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot]; - } - - for (int col = 0; col < pObj->numOfColumns; ++col) { - memcpy(pCacheBlock->offset[col] + pCacheBlock->numOfPoints * pObj->schema[col].bytes, pData, - pObj->schema[col].bytes); - pData += pObj->schema[col].bytes; - } - - atomic_fetch_sub_32(&pObj->freePoints, 1); - pCacheBlock->numOfPoints++; - pPool->count++; - - return 0; -} - -void vnodeUpdateQuerySlotPos(SCacheInfo *pInfo, SQuery *pQuery) { - SCacheBlock *pCacheBlock; - - int step = QUERY_IS_ASC_QUERY(pQuery) ? -1 : 1; - - if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->slot == pQuery->currentSlot)) || - (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->slot == pQuery->firstSlot))) { - pQuery->over = 1; - - } else { - pQuery->slot = (pQuery->slot - step + pInfo->maxBlocks) % pInfo->maxBlocks; - pCacheBlock = pInfo->cacheBlocks[pQuery->slot]; - pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pCacheBlock->numOfPoints - 1; - } -} - -static FORCE_INLINE TSKEY vnodeGetTSInCacheBlock(SCacheBlock *pCacheBlock, int32_t pos) { - return *(TSKEY *)(pCacheBlock->offset[PRIMARYKEY_TIMESTAMP_COL_INDEX] + pos * TSDB_KEYSIZE); -} - -int vnodeQueryFromCache(SMeterObj *pObj, SQuery *pQuery) { - SCacheBlock *pCacheBlock; - int col, step; - char * pRead, *pData; - SCacheInfo * pInfo; - int lastPos = -1; - int startPos, numOfReads, numOfPoints; - - pQuery->pointsRead = 0; - if (pQuery->over) return 0; - - vnodeFreeFields(pQuery); - - pInfo = (SCacheInfo *)pObj->pCache; - if ((pInfo == NULL) || (pInfo->numOfBlocks == 0)) { - pQuery->over = 1; - return 0; - } - - if (pQuery->slot < 0 || pQuery->pos < 0) // it means a new query, we need to find the point first - vnodeSearchPointInCache(pObj, pQuery); - - if (pQuery->slot < 0 || pQuery->pos < 0) { - pQuery->over = 1; - return 0; - } - - step = QUERY_IS_ASC_QUERY(pQuery) ? -1 : 1; - pCacheBlock = pInfo->cacheBlocks[pQuery->slot]; - numOfPoints = pCacheBlock->numOfPoints; - - int maxReads = QUERY_IS_ASC_QUERY(pQuery) ? numOfPoints - pQuery->pos : pQuery->pos + 1; - if (maxReads <= 0) { - vnodeUpdateQuerySlotPos(pInfo, pQuery); - return 0; - } - - TSKEY startkey = vnodeGetTSInCacheBlock(pCacheBlock, 0); - TSKEY endkey = vnodeGetTSInCacheBlock(pCacheBlock, numOfPoints - 1); - - if (QUERY_IS_ASC_QUERY(pQuery)) { - if (endkey < pQuery->ekey) { - numOfReads = maxReads; - } else { - lastPos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])( - pCacheBlock->offset[PRIMARYKEY_TIMESTAMP_COL_INDEX] + TSDB_KEYSIZE * pQuery->pos, maxReads, pQuery->ekey, 0); - numOfReads = (lastPos >= 0) ? lastPos + 1 : 0; - } - } else { - if (startkey > pQuery->ekey) { - numOfReads = maxReads; - } else { - lastPos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(pCacheBlock->offset[PRIMARYKEY_TIMESTAMP_COL_INDEX], - maxReads, pQuery->ekey, 1); - numOfReads = (lastPos >= 0) ? pQuery->pos - lastPos + 1 : 0; - } - } - - if (numOfReads > pQuery->pointsToRead - pQuery->pointsRead) { - numOfReads = pQuery->pointsToRead - pQuery->pointsRead; - } else { - if (lastPos >= 0 || numOfReads == 0) { - pQuery->keyIsMet = 1; - pQuery->over = 1; - } - } - - startPos = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos : pQuery->pos - numOfReads + 1; - - int32_t numOfQualifiedPoints = 0; - int32_t numOfActualRead = numOfReads; - - if (pQuery->numOfFilterCols == 0) { - for (col = 0; col < pQuery->numOfOutputCols; ++col) { - int16_t colIdx = pQuery->pSelectExpr[col].pBase.colInfo.colIdx; - - int16_t bytes = GET_COLUMN_BYTES(pQuery, col); - int16_t type = GET_COLUMN_TYPE(pQuery, col); - - pData = pQuery->sdata[col]->data + pQuery->pointsOffset * bytes; - /* this column is absent from current block, fill this block with null value */ - if (colIdx < 0 || colIdx >= pObj->numOfColumns || - pObj->schema[colIdx].colId != pQuery->pSelectExpr[col].pBase.colInfo.colId) { // set null - setNullN(pData, type, bytes, pCacheBlock->numOfPoints); - } else { - pRead = pCacheBlock->offset[colIdx] + startPos * bytes; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - memcpy(pData, pRead, numOfReads * bytes); - } else { - for(int32_t j = 0; j < numOfReads; ++j) { - memcpy(pData + bytes * j, pRead + (numOfReads - 1 - j) * bytes, bytes); - } - } - } - } - numOfQualifiedPoints = numOfReads; - } else { // check each data one by one - // set the input column data - for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { - int16_t colIdx = pQuery->pFilterInfo[k].info.colIdx; - - if (colIdx < 0) { // current data has not specified column - pQuery->pFilterInfo[k].pData = NULL; - } else { - pQuery->pFilterInfo[k].pData = pCacheBlock->offset[colIdx]; - } - } - - int32_t *ids = calloc(1, numOfReads * sizeof(int32_t)); - numOfActualRead = 0; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - for (int32_t j = startPos; j < pCacheBlock->numOfPoints; ++j) { - TSKEY key = vnodeGetTSInCacheBlock(pCacheBlock, j); - if (key < startkey || key > endkey) { - dError("vid:%d sid:%d id:%s, timestamp in cache slot is disordered. slot:%d, pos:%d, ts:%" PRId64 ", block " - "range:%" PRId64 "-%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, j, key, startkey, endkey); - tfree(ids); - return -TSDB_CODE_FILE_BLOCK_TS_DISORDERED; - } - - if (key > pQuery->ekey) { - break; - } - - if (!vnodeFilterData(pQuery, &numOfActualRead, j)) { - continue; - } - - ids[numOfQualifiedPoints] = j; - if (++numOfQualifiedPoints == numOfReads) { // qualified data are enough - break; - } - } - } else { - startPos = pQuery->pos; - for (int32_t j = startPos; j >= 0; --j) { - TSKEY key = vnodeGetTSInCacheBlock(pCacheBlock, j); - if (key < startkey || key > endkey) { - dError("vid:%d sid:%d id:%s, timestamp in cache slot is disordered. slot:%d, pos:%d, ts:%" PRId64 ", block " - "range:%" PRId64 "-%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, j, key, startkey, endkey); - tfree(ids); - return -TSDB_CODE_FILE_BLOCK_TS_DISORDERED; - } - - if (key < pQuery->ekey) { - break; - } - - if (!vnodeFilterData(pQuery, &numOfActualRead, j)) { - continue; - } - - ids[numOfQualifiedPoints] = j; - if (++numOfQualifiedPoints == numOfReads) { // qualified data are enough - break; - } - } - } - -// int32_t start = QUERY_IS_ASC_QUERY(pQuery) ? 0 : numOfReads - numOfQualifiedPoints; - for (int32_t j = 0; j < numOfQualifiedPoints; ++j) { - for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) { - int16_t colIndex = pQuery->pSelectExpr[col].pBase.colInfo.colIdx; - - int32_t bytes = pObj->schema[colIndex].bytes; - pData = pQuery->sdata[col]->data + (pQuery->pointsOffset + j) * bytes; - pRead = pCacheBlock->offset[colIndex] + ids[j/* + start*/] * bytes; - - memcpy(pData, pRead, bytes); - } - } - - tfree(ids); - assert(numOfQualifiedPoints <= numOfReads); - } - - pQuery->pointsRead += numOfQualifiedPoints; - pQuery->pos -= numOfActualRead * step; - - // update the skey/lastkey - int32_t lastAccessPos = pQuery->pos + step; - pQuery->lastKey = vnodeGetTSInCacheBlock(pCacheBlock, lastAccessPos); - pQuery->skey = pQuery->lastKey - step; - - int update = 0; // go to next slot after this round - if ((pQuery->pos < 0 || pQuery->pos >= pObj->pointsPerBlock || numOfReads == 0) && (pQuery->over == 0)) update = 1; - - // if block is changed, it shall be thrown away, it won't happen for committing - if (pObj != pCacheBlock->pMeterObj || pCacheBlock->blockId > pQuery->blockId) { - update = 1; - pQuery->pointsRead = 0; - dWarn("vid:%d sid:%d id:%s, cache block is overwritten, slot:%d blockId:%d qBlockId:%d", - pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, pCacheBlock->blockId, pQuery->blockId); - } - - if (update) vnodeUpdateQuerySlotPos(pInfo, pQuery); - - for (col = 0; col < pQuery->numOfOutputCols; ++col) { - int16_t bytes = GET_COLUMN_BYTES(pQuery, col); - pQuery->sdata[col]->len = bytes * (pQuery->pointsRead + pQuery->pointsOffset); - } - return pQuery->pointsRead; -} - -void vnodeSearchPointInCache(SMeterObj *pObj, SQuery *pQuery) { - int numOfBlocks; - int firstSlot, lastSlot, midSlot; - TSKEY keyFirst, keyLast; - SCacheBlock *pBlock; - SCacheInfo * pInfo = (SCacheInfo *)pObj->pCache; - SCachePool * pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool; - - pQuery->slot = -1; - pQuery->pos = -1; - - // save these variables first in case it may be changed by write operation - pthread_mutex_lock(&pPool->vmutex); - numOfBlocks = pInfo->numOfBlocks; - lastSlot = pInfo->currentSlot; - pthread_mutex_unlock(&pPool->vmutex); - if (numOfBlocks <= 0) return; - - firstSlot = (lastSlot - numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - - // make sure it is there, otherwise, return right away - pBlock = pInfo->cacheBlocks[firstSlot]; - keyFirst = vnodeGetTSInCacheBlock(pBlock, 0); - - pBlock = pInfo->cacheBlocks[lastSlot]; - keyLast = vnodeGetTSInCacheBlock(pBlock, pBlock->numOfPoints - 1); - - pQuery->blockId = pBlock->blockId; - pQuery->currentSlot = lastSlot; - pQuery->numOfBlocks = numOfBlocks; - pQuery->firstSlot = firstSlot; - - if (!QUERY_IS_ASC_QUERY(pQuery)) { - if (pQuery->skey < keyFirst) return; - if (pQuery->ekey > keyLast) return; - } else { - if (pQuery->skey > keyLast) return; - if (pQuery->ekey < keyFirst) return; - } - - while (1) { - numOfBlocks = (lastSlot - firstSlot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - if (numOfBlocks == 0) numOfBlocks = pInfo->maxBlocks; - midSlot = (firstSlot + (numOfBlocks >> 1)) % pInfo->maxBlocks; - pBlock = pInfo->cacheBlocks[midSlot]; - - keyFirst = vnodeGetTSInCacheBlock(pBlock, 0); - keyLast = vnodeGetTSInCacheBlock(pBlock, pBlock->numOfPoints - 1); - - if (numOfBlocks == 1) break; - - if (pQuery->skey > keyLast) { - if (numOfBlocks == 2) break; - if (!QUERY_IS_ASC_QUERY(pQuery)) { - int nextSlot = (midSlot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - SCacheBlock *pNextBlock = pInfo->cacheBlocks[nextSlot]; - TSKEY nextKeyFirst = vnodeGetTSInCacheBlock(pNextBlock, 0); - if (pQuery->skey < nextKeyFirst) break; - } - firstSlot = (midSlot + 1) % pInfo->maxBlocks; - } else if (pQuery->skey < keyFirst) { - if (QUERY_IS_ASC_QUERY(pQuery)) { - int prevSlot = (midSlot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - SCacheBlock *pPrevBlock = pInfo->cacheBlocks[prevSlot]; - TSKEY prevKeyLast = vnodeGetTSInCacheBlock(pPrevBlock, pPrevBlock->numOfPoints - 1); - - if (pQuery->skey > prevKeyLast) break; - } - lastSlot = (midSlot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - } else { - break; // got the slot - } - } - - pQuery->slot = midSlot; - if (!QUERY_IS_ASC_QUERY(pQuery)) { - if (pQuery->skey < keyFirst) return; - - if (pQuery->ekey > keyLast) { - pQuery->slot = (midSlot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - return; - } - } else { - if (pQuery->skey > keyLast) { - pQuery->slot = (midSlot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - return; - } - - if (pQuery->ekey < keyFirst) return; - } - - // midSlot and pBlock is the search result - - pBlock = pInfo->cacheBlocks[midSlot]; - pQuery->pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(pBlock->offset[0], pBlock->numOfPoints, pQuery->skey, - pQuery->order.order); - pQuery->key = vnodeGetTSInCacheBlock(pBlock, pQuery->pos); - - if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) { - int maxReads = QUERY_IS_ASC_QUERY(pQuery) ? pBlock->numOfPoints - pQuery->pos : pQuery->pos + 1; - - if (pQuery->limit.offset < maxReads) { // start position in current block - if (QUERY_IS_ASC_QUERY(pQuery)) { - pQuery->pos += pQuery->limit.offset; - } else { - pQuery->pos -= pQuery->limit.offset; - } - - pQuery->key = vnodeGetTSInCacheBlock(pBlock, pQuery->pos); - pQuery->limit.offset = 0; - } else if (pInfo->numOfBlocks == 1) { - pQuery->pos = -1; // no qualified data - } else { - int step = QUERY_IS_ASC_QUERY(pQuery) ? 1 : -1; - - pQuery->limit.offset -= maxReads; - midSlot = (midSlot + step + pInfo->maxBlocks) % pInfo->maxBlocks; - - bool hasData = true; - while (pQuery->limit.offset > pInfo->cacheBlocks[midSlot]->numOfPoints) { - pQuery->limit.offset -= pInfo->cacheBlocks[midSlot]->numOfPoints; - - if ((QUERY_IS_ASC_QUERY(pQuery) && midSlot == pQuery->currentSlot) || - (!QUERY_IS_ASC_QUERY(pQuery) && midSlot == pQuery->firstSlot)) { // no qualified data in cache - hasData = false; - break; - } - midSlot = (midSlot + step + pInfo->maxBlocks) % pInfo->maxBlocks; - } - - if (hasData) { - if (QUERY_IS_ASC_QUERY(pQuery)) { - pQuery->pos = pQuery->limit.offset; - } else { - pQuery->pos = pInfo->cacheBlocks[midSlot]->numOfPoints - pQuery->limit.offset - 1; - } - pQuery->limit.offset = 0; - pQuery->slot = midSlot; - - pQuery->key = vnodeGetTSInCacheBlock(pInfo->cacheBlocks[midSlot], pQuery->pos); - } else { - pQuery->pos = -1; // no qualified data - - pBlock = pInfo->cacheBlocks[midSlot]; - if (QUERY_IS_ASC_QUERY(pQuery)) { - pQuery->lastKey = vnodeGetTSInCacheBlock(pBlock, pBlock->numOfPoints - 1); - pQuery->skey = pQuery->lastKey + 1; - } else { - pQuery->lastKey = vnodeGetTSInCacheBlock(pBlock, 0); - pQuery->skey = pQuery->lastKey - 1; - } - } - } - } - - return; -} - -void vnodeSetCommitQuery(SMeterObj *pObj, SQuery *pQuery) { - SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; - SCachePool *pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool; - SVnodeObj * pVnode = vnodeList + pObj->vnode; - - pQuery->order.order = TSQL_SO_ASC; - pQuery->numOfCols = pObj->numOfColumns; - pQuery->numOfOutputCols = pObj->numOfColumns; - - for (int16_t col = 0; col < pObj->numOfColumns; ++col) { - pQuery->colList[col].colIdxInBuf = col; - - pQuery->colList[col].data.colId = pObj->schema[col].colId; - pQuery->colList[col].data.bytes = pObj->schema[col].bytes; - pQuery->colList[col].data.type = pObj->schema[col].type; - - SColIndexEx *pColIndexEx = &pQuery->pSelectExpr[col].pBase.colInfo; - - pColIndexEx->colId = pObj->schema[col].colId; - pColIndexEx->colIdx = col; - pColIndexEx->colIdxInBuf = col; - pColIndexEx->flag = TSDB_COL_NORMAL; - } - - pQuery->slot = pInfo->commitSlot; - pQuery->pos = pInfo->commitPoint; - pQuery->over = 0; - - pthread_mutex_lock(&pPool->vmutex); - pQuery->currentSlot = pInfo->currentSlot; - pQuery->numOfBlocks = pInfo->numOfBlocks; - pthread_mutex_unlock(&pPool->vmutex); - - if (pQuery->numOfBlocks <= 0 || pQuery->firstSlot < 0) { - pQuery->over = 1; - return; - } - - pQuery->firstSlot = (pQuery->currentSlot - pQuery->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - pQuery->blockId = pInfo->cacheBlocks[pQuery->currentSlot]->blockId; - - SCacheBlock *pCacheBlock; - pCacheBlock = pInfo->cacheBlocks[pInfo->commitSlot]; - if (pInfo->commitSlot == pQuery->currentSlot && pInfo->commitPoint == pCacheBlock->numOfPoints) { - dTrace("vid:%d sid:%d id:%s, no new data to commit", pObj->vnode, pObj->sid, pObj->meterId); - pQuery->over = 1; - return; - } - - if (pQuery->pos == pObj->pointsPerBlock) { - pQuery->slot = (pQuery->slot + 1) % pInfo->maxBlocks; - pQuery->pos = 0; - } - - pCacheBlock = pInfo->cacheBlocks[pQuery->slot]; - TSKEY firstKey = *((TSKEY *)(pCacheBlock->offset[0] + pQuery->pos * pObj->schema[0].bytes)); - - if (firstKey < pQuery->skey) { - pQuery->over = 1; - dTrace("vid:%d sid:%d id:%s, first key is small, keyFirst:%" PRId64 " commitFirstKey:%" PRId64 "", - pObj->vnode, pObj->sid, pObj->meterId, firstKey, pQuery->skey); - pthread_mutex_lock(&(pVnode->vmutex)); - if (firstKey < pVnode->firstKey) pVnode->firstKey = firstKey; - assert(pVnode->firstKey > 0); - pthread_mutex_unlock(&(pVnode->vmutex)); - } -} - -int vnodeSyncRetrieveVnodeStatistic(int vnode, int fd) { - SVnodeObj *pVnode = vnodeList + vnode; - if (taosWriteMsg(fd, &(pVnode->vnodeStatistic.pointsWritten), sizeof(int64_t)) < 0) return -1; - if (taosWriteMsg(fd, &(pVnode->vnodeStatistic.totalStorage), sizeof(int64_t)) < 0) return -1; - if (taosWriteMsg(fd, &(pVnode->vnodeStatistic.compStorage), sizeof(int64_t)) < 0) return -1; - - return 0; -} - -int vnodeSyncRestoreVnodeStatistic(int vnode, int fd) { - SVnodeObj *pVnode = vnodeList + vnode; - if (taosReadMsg(fd, &(pVnode->vnodeStatistic.pointsWritten), sizeof(int64_t)) < 0) return -1; - if (taosReadMsg(fd, &(pVnode->vnodeStatistic.totalStorage), sizeof(int64_t)) < 0) return -1; - if (taosReadMsg(fd, &(pVnode->vnodeStatistic.compStorage), sizeof(int64_t)) < 0) return -1; - - return 0; -} - -int vnodeSyncRetrieveCache(int vnode, int fd) { - int32_t sid, slot, points; - SVnodeObj * pVnode; - SMeterObj * pObj; - SCacheInfo * pInfo; - SCacheBlock *pBlock; - int blocksSent, pointsSent; - - pVnode = vnodeList + vnode; - points = 0; - SVnodeCfg *pCfg = &vnodeList[vnode].cfg; - - for (sid = 0; sid < pCfg->maxSessions; ++sid) { - pObj = pVnode->meterList[sid]; - if (pObj == NULL) continue; - - pInfo = (SCacheInfo *)pObj->pCache; - if (pInfo == NULL) continue; - - // write sid first - if (taosWriteMsg(fd, &sid, sizeof(sid)) <= 0) return -1; - if (taosWriteMsg(fd, &(pObj->lastKey), sizeof(pObj->lastKey)) <= 0) return -1; - if (taosWriteMsg(fd, &(pObj->lastKeyOnFile), sizeof(pObj->lastKeyOnFile)) <= 0) return -1; - if (taosWriteMsg(fd, &(pInfo->commitPoint), sizeof(pInfo->commitPoint)) <= 0) return -1; - - dTrace("vid:%d sid:%d id:%s, send lastKey:%" PRId64 " lastKeyOnFile:%" PRId64, vnode, sid, pObj->meterId, pObj->lastKey, - pObj->lastKeyOnFile); - - slot = pInfo->commitSlot; - blocksSent = 0; - pointsSent = 0; - - while (pInfo->numOfBlocks > 0) { - pBlock = pInfo->cacheBlocks[slot]; - if (pBlock->numOfPoints == 0) break; - - // write the number of points - points = pBlock->numOfPoints; - if (taosWriteMsg(fd, &(points), sizeof(points)) <= 0) return -1; - - // write the data - for (int col = 0; col < pObj->numOfColumns; ++col) - if (taosWriteMsg(fd, pBlock->offset[col], pObj->schema[col].bytes * points) <= 0) return -1; - - TSKEY lastKey = *((TSKEY *)(pBlock->offset[0] + pObj->schema[0].bytes * (points - 1))); - dTrace("vid:%d sid:%d id:%s, cache block is sent, points:%d lastKey:%" PRId64, vnode, sid, pObj->meterId, points, - lastKey); - - blocksSent++; - pointsSent += pBlock->numOfPoints; - if (slot == pInfo->currentSlot) break; - - slot = (slot + 1) % pInfo->maxBlocks; - } - - // set number of points as zero at the end - points = 0; - if (taosWriteMsg(fd, &(points), sizeof(points)) <= 0) return -1; - } - - sid = -1; - if (taosWriteMsg(fd, &sid, sizeof(sid)) < 0) return -1; - if (vnodeSyncRetrieveVnodeStatistic(vnode, fd) < 0) return -1; - - return 0; -} - -int vnodeSyncRestoreCache(int vnode, int fd) { - int32_t sid, points, i, slot; - SMeterObj * pObj; - SCacheInfo * pInfo; - SCacheBlock *pBlock; - int blocksReceived, pointsReceived; - int numOfBlocks; - SVnodeCfg * pCfg = &vnodeList[vnode].cfg; - SCachePool * pPool = (SCachePool *)vnodeList[vnode].pCachePool; - - while (1) { - // read sid first - - if (taosReadMsg(fd, &sid, sizeof(sid)) <= 0) return -1; - if (sid >= pCfg->maxSessions) { - dError("vid:%d, restore cache, sid:%d is messed up", vnode, sid); - return -1; - } - if (sid < 0) break; - - pObj = vnodeList[vnode].meterList[sid]; - if (pObj == NULL) { - dError("vid:%d sid:%d, meter is not there", vnode, sid); - vnodeSendMeterCfgMsg(vnode, sid); - return -1; - } - - pInfo = (SCacheInfo *)pObj->pCache; - numOfBlocks = pInfo->numOfBlocks; - pthread_mutex_lock(&pPool->vmutex); - for (i = 0; i < numOfBlocks; ++i) { - slot = (pInfo->currentSlot - i + pInfo->maxBlocks) % pInfo->maxBlocks; - pBlock = pInfo->cacheBlocks[slot]; - vnodeFreeCacheBlock(pBlock); - } - pthread_mutex_unlock(&pPool->vmutex); - - pInfo->unCommittedBlocks = 0; - if (taosReadMsg(fd, &(pObj->lastKey), sizeof(pObj->lastKey)) <= 0) return -1; - if (taosReadMsg(fd, &(pObj->lastKeyOnFile), sizeof(pObj->lastKeyOnFile)) <= 0) return -1; - if (taosReadMsg(fd, &(pInfo->commitPoint), sizeof(pInfo->commitPoint)) <= 0) return -1; - - dTrace("vid:%d sid:%d id:%s, commitPoint:%d lastKeyOnFile:%" PRId64, vnode, sid, pObj->meterId, pInfo->commitPoint, - pObj->lastKeyOnFile); - - if (vnodeList[pObj->vnode].lastKey < pObj->lastKey) vnodeList[pObj->vnode].lastKey = pObj->lastKey; - - if (vnodeList[pObj->vnode].lastKeyOnFile < pObj->lastKeyOnFile) - vnodeList[pObj->vnode].lastKeyOnFile = pObj->lastKeyOnFile; - - pInfo->currentSlot = -1; - pInfo->commitSlot = 0; - memset(pInfo->cacheBlocks, 0, sizeof(SCacheBlock *) * pInfo->maxBlocks); - blocksReceived = 0; - pointsReceived = 0; - pObj->freePoints = pObj->pointsPerBlock * pInfo->maxBlocks; - - while (1) { - // read number of points; - points = 0; - if (taosReadMsg(fd, &points, sizeof(points)) <= 0) return -1; - if (points == 0) break; - - if (vnodeAllocateCacheBlock(pObj) < 0) return -1; - pBlock = pInfo->cacheBlocks[pInfo->currentSlot]; - pBlock->numOfPoints = points; - - // read the data - for (int col = 0; col < pObj->numOfColumns; ++col) - if (taosReadMsg(fd, pBlock->offset[col], pObj->schema[col].bytes * points) <= 0) return -1; - - atomic_fetch_sub_32(&pObj->freePoints, points); - blocksReceived++; - pointsReceived += points; - pObj->lastKey = *((TSKEY *)(pBlock->offset[0] + pObj->schema[0].bytes * (points - 1))); - if (vnodeList[pObj->vnode].lastKey < pObj->lastKey) vnodeList[pObj->vnode].lastKey = pObj->lastKey; - - if (vnodeList[pObj->vnode].firstKey > *(TSKEY *)(pBlock->offset[0])) - vnodeList[pObj->vnode].firstKey = *(TSKEY *)(pBlock->offset[0]); - - dTrace("vid:%d sid:%d id:%s, cache block is received, points:%d lastKey:%" PRId64, vnode, sid, pObj->meterId, points, - pObj->lastKey); - } - } - - if (vnodeSyncRestoreVnodeStatistic(pObj->vnode, fd) < 0) return -1; - - return 0; -} - -int vnodeIsCacheCommitted(SMeterObj *pObj) { - if (pObj->pCache == NULL) return 1; - - SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; - if (pInfo->currentSlot < 0) return 1; - - SCacheBlock *pBlock = pInfo->cacheBlocks[pInfo->currentSlot]; - if (pInfo->commitSlot != pInfo->currentSlot) return 0; - if (pInfo->commitPoint != pBlock->numOfPoints) return 0; - - return 1; -} diff --git a/src/vnode/detail/src/vnodeCommit.c b/src/vnode/detail/src/vnodeCommit.c deleted file mode 100644 index a43ad3728a..0000000000 --- a/src/vnode/detail/src/vnodeCommit.c +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _GNU_SOURCE /* See feature_test_macros(7) */ -#include "os.h" - -#include "taosdef.h" -#include "vnode.h" -#include "vnodeUtil.h" -#include "vnodeStatus.h" - -typedef struct { - int sversion; - int sid; - int contLen; - int action:8; - int simpleCheck:24; -} SCommitHead; - -int vnodeOpenCommitLog(int vnode, uint64_t firstV) { - SVnodeObj *pVnode = vnodeList + vnode; - char * fileName = pVnode->logFn; - - pVnode->logFd = open(fileName, O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO); - if (pVnode->logFd < 0) { - dError("vid:%d, failed to open file:%s, reason:%s", vnode, fileName, strerror(errno)); - return -1; - } - - dTrace("vid:%d, logfd:%d, open file:%s success", vnode, pVnode->logFd, fileName); - if (posix_fallocate64(pVnode->logFd, 0, pVnode->mappingSize) != 0) { - dError("vid:%d, logfd:%d, failed to alloc file size:%d, reason:%s", vnode, pVnode->logFd, pVnode->mappingSize, strerror(errno)); - perror("fallocate failed"); - goto _err_log_open; - } - - struct stat statbuf; - stat(fileName, &statbuf); - int64_t length = statbuf.st_size; - - if (length != pVnode->mappingSize) { - dError("vid:%d, logfd:%d, alloc file size:%" PRId64 " not equal to mapping size:%" PRId64, vnode, pVnode->logFd, length, - pVnode->mappingSize); - goto _err_log_open; - } - - pVnode->pMem = mmap(0, pVnode->mappingSize, PROT_WRITE | PROT_READ, MAP_SHARED, pVnode->logFd, 0); - if (pVnode->pMem == MAP_FAILED) { - dError("vid:%d, logfd:%d, failed to map file, reason:%s", vnode, pVnode->logFd, strerror(errno)); - goto _err_log_open; - } - - pVnode->pWrite = pVnode->pMem; - memcpy(pVnode->pWrite, &(firstV), sizeof(firstV)); - pVnode->pWrite += sizeof(firstV); - - return pVnode->logFd; - - _err_log_open: - close(pVnode->logFd); - remove(fileName); - pVnode->logFd = -1; - return -1; -} - -int vnodeRenewCommitLog(int vnode) { - SVnodeObj *pVnode = vnodeList + vnode; - char * fileName = pVnode->logFn; - char * oldName = pVnode->logOFn; - - pthread_mutex_lock(&(pVnode->logMutex)); - - if (FD_VALID(pVnode->logFd)) { - munmap(pVnode->pMem, pVnode->mappingSize); - close(pVnode->logFd); - rename(fileName, oldName); - } - - if (pVnode->cfg.commitLog) vnodeOpenCommitLog(vnode, vnodeList[vnode].version); - - pthread_mutex_unlock(&(pVnode->logMutex)); - - return pVnode->logFd; -} - -void vnodeRemoveCommitLog(int vnode) { remove(vnodeList[vnode].logOFn); } - -size_t vnodeRestoreDataFromLog(int vnode, char *fileName, uint64_t *firstV) { - int fd, ret; - char * cont = NULL; - size_t totalLen = 0; - int actions = 0; - - SVnodeObj *pVnode = vnodeList + vnode; - if (pVnode->meterList == NULL) { - dError("vid:%d, vnode is not initialized!!!", vnode); - return 0; - } - - struct stat fstat; - if (stat(fileName, &fstat) < 0) { - dTrace("vid:%d, no log file:%s", vnode, fileName); - return 0; - } - - dTrace("vid:%d, uncommitted data in file:%s, restore them ...", vnode, fileName); - - fd = open(fileName, O_RDWR); - if (fd < 0) { - dError("vid:%d, failed to open:%s, reason:%s", vnode, fileName, strerror(errno)); - goto _error; - } - - ret = read(fd, firstV, sizeof(pVnode->version)); - if (ret <= 0) { - dError("vid:%d, failed to read version", vnode); - goto _error; - } - pVnode->version = *firstV; - - int32_t bufLen = TSDB_PAYLOAD_SIZE; - cont = calloc(1, bufLen); - if (cont == NULL) { - dError("vid:%d, out of memory", vnode); - goto _error; - } - TSKEY now = taosGetTimestamp(pVnode->cfg.precision); - - SCommitHead head; - int simpleCheck = 0; - while (1) { - ret = read(fd, &head, sizeof(head)); - if (ret < 0) goto _error; - if (ret == 0) break; - if (((head.sversion+head.sid+head.contLen+head.action) & 0xFFFFFF) != head.simpleCheck) break; - simpleCheck = head.simpleCheck; - - // head.contLen validation is removed - if (head.sid >= pVnode->cfg.maxSessions || head.sid < 0 || head.action >= TSDB_ACTION_MAX) { - dError("vid, invalid commit head, sid:%d contLen:%d action:%d", head.sid, head.contLen, head.action); - } else { - if (head.contLen > 0) { - if (bufLen < head.contLen+sizeof(simpleCheck)) { // pre-allocated buffer is not enough - cont = realloc(cont, head.contLen+sizeof(simpleCheck)); - bufLen = head.contLen+sizeof(simpleCheck); - } - - if (read(fd, cont, head.contLen+sizeof(simpleCheck)) < 0) goto _error; - if (*(int *)(cont+head.contLen) != simpleCheck) break; - SMeterObj *pObj = pVnode->meterList[head.sid]; - if (pObj == NULL) { - dError("vid:%d, sid:%d not exists, ignore data in commit log, contLen:%d action:%d", - vnode, head.sid, head.contLen, head.action); - continue; - } - - if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) { - dWarn("vid:%d sid:%d id:%s, meter is dropped, ignore data in commit log, contLen:%d action:%d", - vnode, head.sid, head.contLen, head.action); - continue; - } - - int32_t numOfPoints = 0; - (*vnodeProcessAction[head.action])(pObj, cont, head.contLen, TSDB_DATA_SOURCE_LOG, NULL, head.sversion, - &numOfPoints, now); - actions++; - } else { - break; - } - } - - totalLen += sizeof(head) + head.contLen + sizeof(simpleCheck); - } - - tclose(fd); - tfree(cont); - dTrace("vid:%d, %d pieces of uncommitted data are restored", vnode, actions); - - return totalLen; - -_error: - tclose(fd); - tfree(cont); - dError("vid:%d, failed to restore %s, remove this node...", vnode, fileName); - - // rename to error file for future process - char *f = NULL; - taosFileRename(fileName, "error", '/', &f); - free(f); - - return -1; -} - -int vnodeInitCommit(int vnode) { - size_t size = 0; - uint64_t firstV = 0; - SVnodeObj *pVnode = vnodeList + vnode; - - pthread_mutex_init(&(pVnode->logMutex), NULL); - - sprintf(pVnode->logFn, "%s/vnode%d/db/submit%d.log", tsDirectory, vnode, vnode); - sprintf(pVnode->logOFn, "%s/vnode%d/db/submit%d.olog", tsDirectory, vnode, vnode); - pVnode->mappingSize = ((int64_t)pVnode->cfg.cacheBlockSize) * pVnode->cfg.cacheNumOfBlocks.totalBlocks * 1.5; - pVnode->mappingThreshold = pVnode->mappingSize * 0.7; - - // restore from .olog file and commit to file - size = vnodeRestoreDataFromLog(vnode, pVnode->logOFn, &firstV); - if (size < 0) return -1; - if (size > 0) { - if (pVnode->commitInProcess == 0) vnodeCommitToFile(pVnode); - remove(pVnode->logOFn); - } - - // restore from .log file to cache - size = vnodeRestoreDataFromLog(vnode, pVnode->logFn, &firstV); - if (size < 0) return -1; - - if (pVnode->cfg.commitLog == 0) return 0; - - if (size == 0) firstV = pVnode->version; - if (vnodeOpenCommitLog(vnode, firstV) < 0) { - dError("vid:%d, commit log init failed", vnode); - return -1; - } - - pVnode->pWrite += size; - dPrint("vid:%d, commit log is initialized", vnode); - - return 0; -} - -void vnodeCleanUpCommit(int vnode) { - SVnodeObj *pVnode = vnodeList + vnode; - - if (FD_VALID(pVnode->logFd)) close(pVnode->logFd); - - if (pVnode->cfg.commitLog && (pVnode->logFd > 0 && remove(pVnode->logFn) < 0)) { - dError("vid:%d, failed to remove:%s", vnode, pVnode->logFn); - taosLogError("vid:%d, failed to remove:%s", vnode, pVnode->logFn); - } - - pthread_mutex_destroy(&(pVnode->logMutex)); -} - -int vnodeWriteToCommitLog(SMeterObj *pObj, char action, char *cont, int contLen, int sverion) { - SVnodeObj *pVnode = vnodeList + pObj->vnode; - if (pVnode->pWrite == NULL) return 0; - - SCommitHead head; - head.sid = pObj->sid; - head.action = action; - head.sversion = pObj->sversion; - head.contLen = contLen; - head.simpleCheck = (head.sversion+head.sid+head.contLen+head.action) & 0xFFFFFF; - int simpleCheck = head.simpleCheck; - - pthread_mutex_lock(&(pVnode->logMutex)); - // 100 bytes redundant mem space - if (pVnode->mappingSize - (pVnode->pWrite - pVnode->pMem) < contLen + sizeof(SCommitHead) + sizeof(simpleCheck) + 100) { - pthread_mutex_unlock(&(pVnode->logMutex)); - dTrace("vid:%d, mem mapping space is not enough, wait for commit", pObj->vnode); - vnodeProcessCommitTimer(pVnode, NULL); - return TSDB_CODE_ACTION_IN_PROGRESS; - } - char *pWrite = pVnode->pWrite; - pVnode->pWrite += sizeof(head) + contLen + sizeof(simpleCheck); - memcpy(pWrite, (char *)&head, sizeof(head)); - memcpy(pWrite + sizeof(head), cont, contLen); - memcpy(pWrite + sizeof(head) + contLen, &simpleCheck, sizeof(simpleCheck)); - pthread_mutex_unlock(&(pVnode->logMutex)); - - if (pVnode->pWrite - pVnode->pMem > pVnode->mappingThreshold) { - dTrace("vid:%d, mem mapping is close to limit, commit", pObj->vnode); - vnodeProcessCommitTimer(pVnode, NULL); - } - - dTrace("vid:%d sid:%d, data is written to commit log", pObj->vnode, pObj->sid); - - return 0; -} diff --git a/src/vnode/detail/src/vnodeFile.c b/src/vnode/detail/src/vnodeFile.c deleted file mode 100644 index 5bda602370..0000000000 --- a/src/vnode/detail/src/vnodeFile.c +++ /dev/null @@ -1,1880 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "tscompression.h" -#include "tutil.h" -#include "vnode.h" -#include "vnodeFile.h" -#include "vnodeUtil.h" -#include "vnodeStatus.h" - -#define FILE_QUERY_NEW_BLOCK -5 // a special negative number - -const int16_t vnodeFileVersion = 0; - -int (*pCompFunc[])(const char *const input, int inputSize, const int elements, char *const output, int outputSize, - char algorithm, char *const buffer, int bufferSize) = {NULL, - tsCompressBool, - tsCompressTinyint, - tsCompressSmallint, - tsCompressInt, - tsCompressBigint, - tsCompressFloat, - tsCompressDouble, - tsCompressString, - tsCompressTimestamp, - tsCompressString}; - -int (*pDecompFunc[])(const char *const input, int compressedSize, const int elements, char *const output, - int outputSize, char algorithm, char *const buffer, int bufferSize) = {NULL, - tsDecompressBool, - tsDecompressTinyint, - tsDecompressSmallint, - tsDecompressInt, - tsDecompressBigint, - tsDecompressFloat, - tsDecompressDouble, - tsDecompressString, - tsDecompressTimestamp, - tsDecompressString}; - -int vnodeUpdateFileMagic(int vnode, int fileId); -int vnodeRecoverCompHeader(int vnode, int fileId); -int vnodeRecoverHeadFile(int vnode, int fileId); -int vnodeRecoverDataFile(int vnode, int fileId); -int vnodeForwardStartPosition(SQuery *pQuery, SCompBlock *pBlock, int32_t slotIdx, SVnodeObj *pVnode, SMeterObj *pObj); -int vnodeCheckNewHeaderFile(int fd, SVnodeObj *pVnode); -char* vnodeGetDataDir(int vnode, int fileId); -char* vnodeGetDiskFromHeadFile(char *headName); -void vnodeAdustVnodeFile(SVnodeObj *pVnode); -int vnodeSyncRetrieveFile(int vnode, int fd, uint32_t peerFid, uint64_t *fmagic); -int vnodeSyncRestoreFile(int vnode, int sfd); -void vnodeAdjustFileTier(int vnode); - -void vnodeGetHeadDataLname(char *headName, char *dataName, char *lastName, int vnode, int fileId) { - if (headName != NULL) sprintf(headName, "%s/vnode%d/db/v%df%d.head", tsDirectory, vnode, vnode, fileId); - if (dataName != NULL) sprintf(dataName, "%s/vnode%d/db/v%df%d.data", tsDirectory, vnode, vnode, fileId); - if (lastName != NULL) sprintf(lastName, "%s/vnode%d/db/v%df%d.last", tsDirectory, vnode, vnode, fileId); -} - -void vnodeGetHeadDataDname(char *dHeadName, char *dDataName, char *dLastName, int vnode, int fileId, char *path) { - if (dHeadName != NULL) sprintf(dHeadName, "%s/data/vnode%d/v%df%d.head0", path, vnode, vnode, fileId); - if (dDataName != NULL) sprintf(dDataName, "%s/data/vnode%d/v%df%d.data", path, vnode, vnode, fileId); - if (dLastName != NULL) sprintf(dLastName, "%s/data/vnode%d/v%df%d.last0", path, vnode, vnode, fileId); -} - -void vnodeGetDnameFromLname(char *lhead, char *ldata, char *llast, char *dhead, char *ddata, char *dlast) { - if (lhead != NULL) { - assert(dhead != NULL); - readlink(lhead, dhead, TSDB_FILENAME_LEN); - } - - if (ldata != NULL) { - assert(ddata != NULL); - readlink(ldata, ddata, TSDB_FILENAME_LEN); - } - - if (llast != NULL) { - assert(dlast != NULL); - readlink(llast, dlast, TSDB_FILENAME_LEN); - } -} - -void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId) { - if (nHeadName != NULL) sprintf(nHeadName, "%s/vnode%d/db/v%df%d.t", tsDirectory, vnode, vnode, fileId); - if (nLastName != NULL) sprintf(nLastName, "%s/vnode%d/db/v%df%d.l", tsDirectory, vnode, vnode, fileId); -} - -void vnodeCreateDataDirIfNeeded(int vnode, char *path) { - char directory[TSDB_FILENAME_LEN] = "\0"; - - sprintf(directory, "%s/data/vnode%d", path, vnode); - - if (access(directory, F_OK) != 0) mkdir(directory, 0755); -} - -int vnodeCreateHeadDataFile(int vnode, int fileId, char *headName, char *dataName, char *lastName) { - char dHeadName[TSDB_FILENAME_LEN]; - char dDataName[TSDB_FILENAME_LEN]; - char dLastName[TSDB_FILENAME_LEN]; - - char *path = vnodeGetDataDir(vnode, fileId); - if (path == NULL) { - dError("vid:%d, fileId:%d, failed to get dataDir", vnode, fileId); - return -1; - } - - vnodeCreateDataDirIfNeeded(vnode, path); - - vnodeGetHeadDataLname(headName, dataName, lastName, vnode, fileId); - vnodeGetHeadDataDname(dHeadName, dDataName, dLastName, vnode, fileId, path); - if (symlink(dHeadName, headName) != 0) return -1; - if (symlink(dDataName, dataName) != 0) return -1; - if (symlink(dLastName, lastName) != 0) return -1; - - dPrint("vid:%d, fileId:%d, empty header file:%s file:%s lastFile:%s on disk:%s is created ", - vnode, fileId, headName, dataName, lastName, path); - - return 0; -} - -int vnodeCreateEmptyCompFile(int vnode, int fileId) { - char headName[TSDB_FILENAME_LEN]; - char dataName[TSDB_FILENAME_LEN]; - char lastName[TSDB_FILENAME_LEN]; - int tfd; - char *temp; - - if (vnodeCreateHeadDataFile(vnode, fileId, headName, dataName, lastName) < 0) { - dError("failed to create head data file, vnode: %d, fileId: %d", vnode, fileId); - return -1; - } - - tfd = open(headName, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); - if (tfd < 0) { - dError("failed to create head file:%s, reason:%s", headName, strerror(errno)); - return -1; - } - - vnodeCreateFileHeaderFd(tfd); - int size = sizeof(SCompHeader) * vnodeList[vnode].cfg.maxSessions + sizeof(TSCKSUM); - temp = malloc(size); - memset(temp, 0, size); - taosCalcChecksumAppend(0, (uint8_t *)temp, size); - - lseek(tfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - twrite(tfd, temp, size); - free(temp); - close(tfd); - - tfd = open(dataName, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); - if (tfd < 0) { - dError("failed to create data file:%s, reason:%s", dataName, strerror(errno)); - return -1; - } - vnodeCreateFileHeaderFd(tfd); - close(tfd); - - tfd = open(lastName, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); - if (tfd < 0) { - dError("failed to create last file:%s, reason:%s", lastName, strerror(errno)); - return -1; - } - vnodeCreateFileHeaderFd(tfd); - close(tfd); - - return 0; -} - -int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) { - int numOfFiles = 0, fileId, filesAdded = 0; - int vnode = pVnode->vnode; - SVnodeCfg *pCfg = &(pVnode->cfg); - - if (pVnode->lastKeyOnFile == 0) { - if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10; - pVnode->fileId = pVnode->firstKey / tsMsPerDay[(uint8_t)pVnode->cfg.precision] / pCfg->daysPerFile; - pVnode->lastKeyOnFile = (int64_t)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision] - 1; - pVnode->numOfFiles = 1; - if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1; - } - - numOfFiles = (pVnode->lastKeyOnFile - pVnode->commitFirstKey) / tsMsPerDay[(uint8_t)pVnode->cfg.precision] / pCfg->daysPerFile; - if (pVnode->commitFirstKey > pVnode->lastKeyOnFile) numOfFiles = -1; - - dTrace("vid:%d, commitFirstKey:%" PRId64 " lastKeyOnFile:%" PRId64 " numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", pVnode->vnode, - pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles); - - if (numOfFiles >= pVnode->numOfFiles) { - // create empty header files backward - filesAdded = numOfFiles - pVnode->numOfFiles + 1; - assert(filesAdded <= pVnode->maxFiles + 2); - for (int i = 0; i < filesAdded; ++i) { - fileId = pVnode->fileId - pVnode->numOfFiles - i; - if (vnodeCreateEmptyCompFile(vnode, fileId) < 0) -#ifdef CLUSTER - return vnodeRecoverFromPeer(pVnode, fileId); -#else - return -1; -#endif - } - } else if (numOfFiles < 0) { - // create empty header files forward - pVnode->fileId++; - if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) -#ifdef CLUSTER - return vnodeRecoverFromPeer(pVnode, pVnode->fileId); -#else - return -1; -#endif - pVnode->lastKeyOnFile += (int64_t)tsMsPerDay[(uint8_t)pVnode->cfg.precision] * pCfg->daysPerFile; - filesAdded = 1; - numOfFiles = 0; // hacker way - } - - fileId = pVnode->fileId - numOfFiles; - pVnode->commitLastKey = - pVnode->lastKeyOnFile - (int64_t)numOfFiles * tsMsPerDay[(uint8_t)pVnode->cfg.precision] * pCfg->daysPerFile; - pVnode->commitFirstKey = pVnode->commitLastKey - (int64_t)tsMsPerDay[(uint8_t)pVnode->cfg.precision] * pCfg->daysPerFile + 1; - pVnode->commitFileId = fileId; - pVnode->numOfFiles = pVnode->numOfFiles + filesAdded; - - return 0; -} - - -int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) { - char name[TSDB_FILENAME_LEN]; - char dHeadName[TSDB_FILENAME_LEN] = "\0"; - char dLastName[TSDB_FILENAME_LEN] = "\0"; - int len = 0; - struct stat filestat; - int vnode = pVnode->vnode; - int fileId; - - if (vnodeCreateNeccessaryFiles(pVnode) < 0) return -1; - - fileId = pVnode->commitFileId; - - dTrace("vid:%d, commit fileId:%d, commitLastKey:%" PRId64 ", vnodeLastKey:%" PRId64 ", lastKeyOnFile:%" PRId64 " numOfFiles:%d", - vnode, fileId, pVnode->commitLastKey, pVnode->lastKey, pVnode->lastKeyOnFile, pVnode->numOfFiles); - - int minSize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM) + TSDB_FILE_HEADER_LEN; - - vnodeGetHeadDataLname(pVnode->cfn, name, pVnode->lfn, vnode, fileId); - readlink(pVnode->cfn, dHeadName, TSDB_FILENAME_LEN); - readlink(pVnode->lfn, dLastName, TSDB_FILENAME_LEN); - len = strlen(dHeadName); - if (dHeadName[len - 1] == 'd') { - dHeadName[len] = '0'; - dHeadName[len + 1] = '\0'; - } else { - dHeadName[len - 1] = '0' + (dHeadName[len - 1] + 1 - '0') % 2; - } - len = strlen(dLastName); - if (dLastName[len - 1] == 't') { - dLastName[len] = '0'; - dLastName[len + 1] = '\0'; - } else { - dLastName[len - 1] = '0' + (dLastName[len - 1] + 1 - '0') % 2; - } - vnodeGetHeadTname(pVnode->nfn, pVnode->tfn, vnode, fileId); - symlink(dHeadName, pVnode->nfn); - if (!noTempLast) symlink(dLastName, pVnode->tfn); - - // open head file - pVnode->hfd = open(pVnode->cfn, O_RDONLY); - if (pVnode->hfd < 0) { - dError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno)); - taosLogError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno)); - vnodeRecoverFromPeer(pVnode, fileId); - goto _error; - } - - // verify head file, check size - fstat(pVnode->hfd, &filestat); - if (filestat.st_size < minSize) { - dError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn); - taosLogError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn); - vnodeRecoverFromPeer(pVnode, fileId); - goto _error; - } - - // open a new header file - pVnode->nfd = open(pVnode->nfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); - if (pVnode->nfd < 0) { - dError("vid:%d, failed to open new head file:%s, reason:%s", vnode, pVnode->nfn, strerror(errno)); - taosLogError("vid:%d, failed to open new head file:%s, reason:%s", vnode, pVnode->nfn, strerror(errno)); - goto _error; - } - vnodeCreateFileHeaderFd(pVnode->nfd); - - // open existing data file - pVnode->dfd = open(name, O_WRONLY | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO); - if (pVnode->dfd < 0) { - dError("vid:%d, failed to open data file:%s, reason:%s", vnode, name, strerror(errno)); - taosLogError("vid:%d, failed to open data file:%s, reason:%s", vnode, name, strerror(errno)); - vnodeRecoverFromPeer(pVnode, fileId); - goto _error; - } - - // verify data file, check size - fstat(pVnode->dfd, &filestat); - if (filestat.st_size < TSDB_FILE_HEADER_LEN) { - dError("vid:%d, data file:%s corrupted", vnode, name); - taosLogError("vid:%d, data file:%s corrupted", vnode, name); - vnodeRecoverFromPeer(pVnode, fileId); - goto _error; - } else { - dPrint("vid:%d, data file:%s is opened to write", vnode, name); - } - - // open last file - pVnode->lfd = open(pVnode->lfn, O_RDWR); - if (pVnode->lfd < 0) { - dError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno)); - taosLogError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno)); - vnodeRecoverFromPeer(pVnode, fileId); - goto _error; - } - - // verify last file, check size - fstat(pVnode->lfd, &filestat); - if (filestat.st_size < TSDB_FILE_HEADER_LEN) { - dError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn); - taosLogError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn); - vnodeRecoverFromPeer(pVnode, fileId); - goto _error; - } - - // open a new last file - if (noTempLast) { - pVnode->tfd = -1; // do not open temporary last file - } else { - pVnode->tfd = open(pVnode->tfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); - if (pVnode->tfd < 0) { - dError("vid:%d, failed to open new last file:%s, reason:%s", vnode, pVnode->tfn, strerror(errno)); - taosLogError("vid:%d, failed to open new last file:%s, reason:%s", vnode, pVnode->tfn, strerror(errno)); - goto _error; - } - vnodeCreateFileHeaderFd(pVnode->tfd); - pVnode->lfSize = lseek(pVnode->tfd, 0, SEEK_END); - } - - int size = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM); - char *temp = malloc(size); - if (NULL == temp) { - dError("vid:%d, malloc failed", vnode); - taosLogError("vid:%d, malloc failed", vnode); - //vnodeRecoverFromPeer(pVnode, fileId); - goto _error; - } - memset(temp, 0, size); - - taosCalcChecksumAppend(0, (uint8_t *)temp, size); - twrite(pVnode->nfd, temp, size); - free(temp); - - pVnode->dfSize = lseek(pVnode->dfd, 0, SEEK_END); - - return 0; - -_error: - if (pVnode->dfd > 0) close(pVnode->dfd); - pVnode->dfd = 0; - - if (pVnode->hfd > 0) close(pVnode->hfd); - pVnode->hfd = 0; - - if (pVnode->nfd > 0) close(pVnode->nfd); - pVnode->nfd = 0; - - if (pVnode->lfd > 0) close(pVnode->lfd); - pVnode->lfd = 0; - - if (pVnode->tfd > 0) close(pVnode->tfd); - pVnode->tfd = 0; - - return -1; -} - -void vnodeRemoveFile(int vnode, int fileId) { - char headName[TSDB_FILENAME_LEN] = "\0"; - char dataName[TSDB_FILENAME_LEN] = "\0"; - char lastName[TSDB_FILENAME_LEN] = "\0"; - char dHeadName[TSDB_FILENAME_LEN] = "\0"; - char dDataName[TSDB_FILENAME_LEN] = "\0"; - char dLastName[TSDB_FILENAME_LEN] = "\0"; - SVnodeObj * pVnode = NULL; - SVnodeHeadInfo headInfo; - - pVnode = vnodeList + vnode; - - vnodeGetHeadDataLname(headName, dataName, lastName, vnode, fileId); - char *path = vnodeGetDiskFromHeadFile(headName); - if (path == NULL) { - return ; - } - vnodeGetDnameFromLname(headName, dataName, lastName, dHeadName, dDataName, dLastName); - - int fd = open(headName, O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO); - if (fd > 0) { - vnodeGetHeadFileHeaderInfo(fd, &headInfo); - atomic_fetch_add_64(&(pVnode->vnodeStatistic.totalStorage), -headInfo.totalStorage); - close(fd); - } - - remove(headName); - remove(dataName); - remove(lastName); - remove(dHeadName); - remove(dDataName); - remove(dLastName); - - dPrint("vid:%d fileId:%d on disk: %s is removed, numOfFiles:%d maxFiles:%d", vnode, fileId, path, - pVnode->numOfFiles, pVnode->maxFiles); -} - -void vnodeCloseCommitFiles(SVnodeObj *pVnode) { - char dpath[TSDB_FILENAME_LEN] = "\0"; - int ret; - - // Check new if new header file is correct - if (tsCheckHeaderFile != 0) { - assert(vnodeCheckNewHeaderFile(pVnode->nfd, pVnode) == 0); - } - - close(pVnode->nfd); - pVnode->nfd = 0; - - close(pVnode->hfd); - pVnode->hfd = 0; - - close(pVnode->dfd); - pVnode->dfd = 0; - - close(pVnode->lfd); - pVnode->lfd = 0; - - if (pVnode->tfd > 0) close(pVnode->tfd); - - pthread_mutex_lock(&(pVnode->vmutex)); - - readlink(pVnode->cfn, dpath, TSDB_FILENAME_LEN); - ret = rename(pVnode->nfn, pVnode->cfn); - if (ret < 0) { - dError("vid:%d, failed to rename:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno)); - } - remove(dpath); - - if (pVnode->tfd > 0) { - memset(dpath, 0, TSDB_FILENAME_LEN); - readlink(pVnode->lfn, dpath, TSDB_FILENAME_LEN); - ret = rename(pVnode->tfn, pVnode->lfn); - if (ret < 0) { - dError("vid:%d, failed to rename:%s, reason:%s", pVnode->vnode, pVnode->tfn, strerror(errno)); - } - remove(dpath); - } - - pthread_mutex_unlock(&(pVnode->vmutex)); - - pVnode->tfd = 0; - - dTrace("vid:%d, %s and %s is saved", pVnode->vnode, pVnode->cfn, pVnode->lfn); - vnodeAdustVnodeFile(pVnode); - vnodeSaveAllMeterObjToFile(pVnode->vnode); - - return; -} - -void vnodeBroadcastStatusToUnsyncedPeer(SVnodeObj *pVnode); - -void *vnodeCommitMultiToFile(SVnodeObj *pVnode, int ssid, int esid) { - int vnode = pVnode->vnode; - SData * data[TSDB_MAX_COLUMNS], *cdata[TSDB_MAX_COLUMNS]; // first 4 bytes are length - char * buffer = NULL, *dmem = NULL, *cmem = NULL, *hmem = NULL, *tmem = NULL; - SMeterObj * pObj = NULL; - SCompInfo compInfo = {0}; - SCompHeader * pHeader; - SMeterInfo * meterInfo = NULL, *pTable = NULL; - SQuery query; - SColumnInfoEx colList[TSDB_MAX_COLUMNS] = {0}; - SSqlFunctionExpr pExprs[TSDB_MAX_COLUMNS] = {0}; - int commitAgain; - int headLen, sid, col; - int64_t pointsRead; - int64_t pointsReadLast; - SCompBlock * pCompBlock = NULL; - SVnodeCfg * pCfg = &pVnode->cfg; - TSCKSUM chksum; - SVnodeHeadInfo headInfo; - uint8_t * pOldCompBlocks; - - dPrint("vid:%d, committing to file, firstKey:%" PRId64 " lastKey:%" PRId64 " ssid:%d esid:%d", vnode, pVnode->firstKey, - pVnode->lastKey, ssid, esid); - if (pVnode->lastKey == 0) goto _over; - - vnodeCloseAllSyncFds(vnode); - vnodeRenewCommitLog(vnode); - - // get the MAX consumption buffer for this vnode - int32_t maxBytesPerPoint = 0; - int32_t minBytesPerPoint = INT32_MAX; - for (sid = ssid; sid <= esid; ++sid) { - pObj = (SMeterObj *)(pVnode->meterList[sid]); - if ((pObj == NULL) || (pObj->pCache == NULL)) continue; - - if (maxBytesPerPoint < pObj->bytesPerPoint) { - maxBytesPerPoint = pObj->bytesPerPoint; - } - if (minBytesPerPoint > pObj->bytesPerPoint) { - minBytesPerPoint = pObj->bytesPerPoint; - } - } - - // buffer to hold the temp head - int tcachblocks = pCfg->cacheBlockSize / (minBytesPerPoint * pCfg->rowsInFileBlock); - - int hmsize = - (pCfg->cacheNumOfBlocks.totalBlocks * (MAX(tcachblocks, 1) + 1) + pCfg->maxSessions) * sizeof(SCompBlock); - - // buffer to hold the uncompressed data - int dmsize = - maxBytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * TSDB_MAX_COLUMNS; - - // buffer to hold the compressed data - int cmsize = - maxBytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * TSDB_MAX_COLUMNS; - - // buffer to hold compHeader - int tmsize = sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM); - - // buffer to hold meterInfo - int misize = pVnode->cfg.maxSessions * sizeof(SMeterInfo); - - int totalSize = hmsize + dmsize + cmsize + misize + tmsize; - buffer = malloc(totalSize); - if (buffer == NULL) { - dError("no enough memory for committing buffer"); - return NULL; - } - - hmem = buffer; - dmem = hmem + hmsize; - cmem = dmem + dmsize; - tmem = cmem + cmsize; - meterInfo = (SMeterInfo *)(tmem + tmsize); - - pthread_mutex_lock(&(pVnode->vmutex)); - pVnode->commitFirstKey = pVnode->firstKey; - pVnode->firstKey = pVnode->lastKey + 1; - pthread_mutex_unlock(&(pVnode->vmutex)); - -_again: - pVnode->commitInProcess = 1; - commitAgain = 0; - memset(hmem, 0, totalSize); - memset(&query, 0, sizeof(query)); - - if (vnodeOpenCommitFiles(pVnode, ssid) < 0) goto _over; - dTrace("vid:%d, start to commit, commitFirstKey:%" PRId64 " commitLastKey:%" PRId64, vnode, pVnode->commitFirstKey, - pVnode->commitLastKey); - - headLen = 0; - vnodeGetHeadFileHeaderInfo(pVnode->hfd, &headInfo); - int maxOldBlocks = 1; - - // read head info - if (pVnode->hfd) { - lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - if (read(pVnode->hfd, tmem, tmsize) <= 0) { - dError("vid:%d, failed to read old header file:%s", vnode, pVnode->cfn); - taosLogError("vid:%d, failed to read old header file:%s", vnode, pVnode->cfn); - vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); - goto _over; - } else { - if (!taosCheckChecksumWhole((uint8_t *)tmem, tmsize)) { - dError("vid:%d, failed to read old header file:%s since comp header offset is broken", vnode, pVnode->cfn); - taosLogError("vid:%d, failed to read old header file:%s since comp header offset is broken", - vnode, pVnode->cfn); - - vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); - goto _over; - } - } - } - - // read compInfo - for (sid = 0; sid < pCfg->maxSessions; ++sid) { - if (pVnode->meterList == NULL) { // vnode is being freed, abort - goto _over; - } - - pObj = (SMeterObj *)(pVnode->meterList[sid]); - if (pObj == NULL) { - continue; - } - - // meter is going to be deleted, abort - if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) { - dWarn("vid:%d sid:%d is dropped, ignore this meter", vnode, sid); - continue; - } - - pTable = meterInfo + sid; - pHeader = ((SCompHeader *)tmem) + sid; - - if (pVnode->hfd > 0) { - if (pHeader->compInfoOffset > 0) { - lseek(pVnode->hfd, pHeader->compInfoOffset, SEEK_SET); - if (read(pVnode->hfd, &compInfo, sizeof(compInfo)) == sizeof(compInfo)) { - if (!taosCheckChecksumWhole((uint8_t *)(&compInfo), sizeof(SCompInfo))) { - dError("vid:%d sid:%d id:%s, failed to read compinfo in file:%s since checksum mismatch", - vnode, sid, pObj->meterId, pVnode->cfn); - taosLogError("vid:%d sid:%d id:%s, failed to read compinfo in file:%s since checksum mismatch", - vnode, sid, pObj->meterId, pVnode->cfn); - vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); - goto _over; - } else { - if (pObj->uid == compInfo.uid) { - pTable->oldNumOfBlocks = compInfo.numOfBlocks; - pTable->oldCompBlockOffset = pHeader->compInfoOffset + sizeof(SCompInfo); - pTable->last = compInfo.last; - if (compInfo.numOfBlocks > maxOldBlocks) maxOldBlocks = compInfo.numOfBlocks; - if (pTable->last) { - lseek(pVnode->hfd, sizeof(SCompBlock) * (compInfo.numOfBlocks - 1), SEEK_CUR); - read(pVnode->hfd, &pTable->lastBlock, sizeof(SCompBlock)); - } - } else { - dTrace("vid:%d sid:%d id:%s, uid:%" PRIu64 " is not matched with old:%" PRIu64 ", old data will be thrown away", - vnode, sid, pObj->meterId, pObj->uid, compInfo.uid); - pTable->oldNumOfBlocks = 0; - } - } - } else { - dError("vid:%d sid:%d id:%s, failed to read compinfo in file:%s", vnode, sid, pObj->meterId, pVnode->cfn); - vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); - goto _over; - } - } - } - } - // Loop To write data to fileId - for (sid = ssid; sid <= esid; ++sid) { - pObj = (SMeterObj *)(pVnode->meterList[sid]); - if ((pObj == NULL) || (pObj->pCache == NULL)) continue; - - data[0] = (SData *)dmem; - cdata[0] = (SData *)cmem; - for (col = 1; col < pObj->numOfColumns; ++col) { - data[col] = (SData *)(((char *)data[col - 1]) + sizeof(SData) + - pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes + EXTRA_BYTES + sizeof(TSCKSUM)); - cdata[col] = (SData *)(((char *)cdata[col - 1]) + sizeof(SData) + - pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes + EXTRA_BYTES + sizeof(TSCKSUM)); - } - - pTable = meterInfo + sid; - pTable->tempHeadOffset = headLen; - - memset(&query, 0, sizeof(query)); - query.colList = colList; - query.pSelectExpr = pExprs; - - query.ekey = pVnode->commitLastKey; - query.skey = pVnode->commitFirstKey; - query.lastKey = query.skey; - - query.sdata = data; - vnodeSetCommitQuery(pObj, &query); - - dTrace("vid:%d sid:%d id:%s, start to commit, startKey:%" PRId64 " slot:%d pos:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->lastKeyOnFile, query.slot, query.pos); - - pointsRead = 0; - pointsReadLast = 0; - - // last block is at last file - if (pTable->last) { - if ((pTable->lastBlock.sversion != pObj->sversion) || (query.over)) { - // TODO : Check the correctness of this code. write the last block to - // .data file - pCompBlock = (SCompBlock *)(hmem + headLen); - assert(dmem - (char *)pCompBlock >= sizeof(SCompBlock)); - *pCompBlock = pTable->lastBlock; - if (pTable->lastBlock.sversion != pObj->sversion) { - pCompBlock->last = 0; - pCompBlock->offset = lseek(pVnode->dfd, 0, SEEK_END); - pTable->last = 0; - lseek(pVnode->lfd, pTable->lastBlock.offset, SEEK_SET); - tsendfile(pVnode->dfd, pVnode->lfd, NULL, pTable->lastBlock.len); - pVnode->dfSize = pCompBlock->offset + pTable->lastBlock.len; - } else { - if (ssid == 0) { - assert(pCompBlock->last && pVnode->tfd != -1); - pCompBlock->offset = lseek(pVnode->tfd, 0, SEEK_END); - lseek(pVnode->lfd, pTable->lastBlock.offset, SEEK_SET); - tsendfile(pVnode->tfd, pVnode->lfd, NULL, pTable->lastBlock.len); - pVnode->lfSize = pCompBlock->offset + pTable->lastBlock.len; - } else { - assert(pVnode->tfd == -1); - } - - } - - headLen += sizeof(SCompBlock); - pTable->newNumOfBlocks++; - } else { - // read last block into memory - if (vnodeReadLastBlockToMem(pObj, &pTable->lastBlock, data) < 0) goto _over; - pTable->last = 0; - pointsReadLast = pTable->lastBlock.numOfPoints; - query.over = 0; - headInfo.totalStorage -= (pointsReadLast * pObj->bytesPerPoint); - - dTrace("vid:%d sid:%d id:%s, points:%d in last block will be merged to new block", - pObj->vnode, pObj->sid, pObj->meterId, pointsReadLast); - } - - pTable->changed = 1; - pTable->oldNumOfBlocks--; - } - - while (query.over == 0) { - pCompBlock = (SCompBlock *)(hmem + headLen); - assert(dmem - (char *)pCompBlock >= sizeof(SCompBlock)); - pointsRead += pointsReadLast; - - while (pointsRead < pObj->pointsPerFileBlock) { - query.pointsToRead = pObj->pointsPerFileBlock - pointsRead; - query.pointsOffset = pointsRead; - pointsRead += vnodeQueryFromCache(pObj, &query); - if (query.over) break; - } - - if (pointsRead == 0) break; - - headInfo.totalStorage += ((pointsRead - pointsReadLast) * pObj->bytesPerPoint); - pCompBlock->last = 1; - if (vnodeWriteBlockToFile(pObj, pCompBlock, data, cdata, pointsRead) < 0) goto _over; - if (pCompBlock->keyLast > pObj->lastKeyOnFile) pObj->lastKeyOnFile = pCompBlock->keyLast; - pTable->last = pCompBlock->last; - - // write block info into header buffer - headLen += sizeof(SCompBlock); - pTable->newNumOfBlocks++; - pTable->committedPoints += (pointsRead - pointsReadLast); - - dTrace("vid:%d sid:%d id:%s, pointsRead:%d, pointsReadLast:%d lastKey:%" PRId64 ", " - "slot:%d pos:%d newNumOfBlocks:%d headLen:%d", - pObj->vnode, pObj->sid, pObj->meterId, pointsRead, pointsReadLast, pObj->lastKeyOnFile, query.slot, query.pos, - pTable->newNumOfBlocks, headLen); - - if (pointsRead < pObj->pointsPerFileBlock || query.keyIsMet) break; - - pointsRead = 0; - pointsReadLast = 0; - } - - dTrace("vid:%d sid:%d id:%s, %d points are committed, lastKey:%" PRId64 " slot:%d pos:%d newNumOfBlocks:%d", - pObj->vnode, pObj->sid, pObj->meterId, pTable->committedPoints, pObj->lastKeyOnFile, query.slot, query.pos, - pTable->newNumOfBlocks); - - if (pTable->committedPoints > 0) { - pTable->commitSlot = query.slot; - pTable->commitPos = query.pos; - } - - TSKEY nextKey = 0; - if (pObj->lastKey > pVnode->commitLastKey) - nextKey = pVnode->commitLastKey + 1; - else if (pObj->lastKey > pObj->lastKeyOnFile) - nextKey = pObj->lastKeyOnFile + 1; - - pthread_mutex_lock(&(pVnode->vmutex)); - if (nextKey < pVnode->firstKey && nextKey > 1) pVnode->firstKey = nextKey; - pthread_mutex_unlock(&(pVnode->vmutex)); - } - - if (pVnode->lastKey > pVnode->commitLastKey) commitAgain = 1; - - dTrace("vid:%d, finish appending the data file", vnode); - - // calculate the new compInfoOffset - int compInfoOffset = TSDB_FILE_HEADER_LEN + tmsize; - for (sid = 0; sid < pCfg->maxSessions; ++sid) { - pObj = (SMeterObj *)(pVnode->meterList[sid]); - pHeader = ((SCompHeader *)tmem) + sid; - if (pObj == NULL) { - pHeader->compInfoOffset = 0; - continue; - } - - pTable = meterInfo + sid; - pTable->compInfoOffset = compInfoOffset; - pTable->finalNumOfBlocks = pTable->oldNumOfBlocks + pTable->newNumOfBlocks; - - if (pTable->finalNumOfBlocks > 0) { - pHeader->compInfoOffset = pTable->compInfoOffset; - compInfoOffset += sizeof(SCompInfo) + pTable->finalNumOfBlocks * sizeof(SCompBlock) + sizeof(TSCKSUM); - } else { - pHeader->compInfoOffset = 0; - } - - dTrace("vid:%d sid:%d id:%s, oldBlocks:%d numOfBlocks:%d compInfoOffset:%d", pObj->vnode, pObj->sid, pObj->meterId, - pTable->oldNumOfBlocks, pTable->finalNumOfBlocks, compInfoOffset); - } - - // write the comp header into new file - vnodeUpdateHeadFileHeader(pVnode->nfd, &headInfo); - lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - taosCalcChecksumAppend(0, (uint8_t *)tmem, tmsize); - if (twrite(pVnode->nfd, tmem, tmsize) <= 0) { - dError("vid:%d sid:%d id:%s, failed to write:%s, error:%s", vnode, sid, pObj->meterId, pVnode->nfn, - strerror(errno)); - vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); - goto _over; - } - - pOldCompBlocks = (uint8_t *)malloc(sizeof(SCompBlock) * maxOldBlocks); - - // write the comp block list in new file - for (sid = 0; sid < pCfg->maxSessions; ++sid) { - pObj = (SMeterObj *)(pVnode->meterList[sid]); - if (pObj == NULL) continue; - - pTable = meterInfo + sid; - if (pTable->finalNumOfBlocks <= 0) continue; - - compInfo.last = pTable->last; - compInfo.uid = pObj->uid; - compInfo.numOfBlocks = pTable->finalNumOfBlocks; - /* compInfo.compBlockLen = pTable->finalCompBlockLen; */ - compInfo.delimiter = TSDB_VNODE_DELIMITER; - taosCalcChecksumAppend(0, (uint8_t *)(&compInfo), sizeof(SCompInfo)); - lseek(pVnode->nfd, pTable->compInfoOffset, SEEK_SET); - if (twrite(pVnode->nfd, &compInfo, sizeof(compInfo)) <= 0) { - dError("vid:%d sid:%d id:%s, failed to write:%s, reason:%s", vnode, sid, pObj->meterId, pVnode->nfn, - strerror(errno)); - vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); - goto _over; - } - - // write the old comp blocks - chksum = 0; - if (pVnode->hfd && pTable->oldNumOfBlocks) { - lseek(pVnode->hfd, pTable->oldCompBlockOffset, SEEK_SET); - if (pTable->changed) { - int compBlockLen = pTable->oldNumOfBlocks * sizeof(SCompBlock); - read(pVnode->hfd, pOldCompBlocks, compBlockLen); - twrite(pVnode->nfd, pOldCompBlocks, compBlockLen); - chksum = taosCalcChecksum(0, pOldCompBlocks, compBlockLen); - } else { - tsendfile(pVnode->nfd, pVnode->hfd, NULL, pTable->oldNumOfBlocks * sizeof(SCompBlock)); - read(pVnode->hfd, &chksum, sizeof(TSCKSUM)); - } - } - - if (pTable->newNumOfBlocks) { - chksum = taosCalcChecksum(chksum, (uint8_t *)(hmem + pTable->tempHeadOffset), - pTable->newNumOfBlocks * sizeof(SCompBlock)); - if (twrite(pVnode->nfd, hmem + pTable->tempHeadOffset, pTable->newNumOfBlocks * sizeof(SCompBlock)) <= 0) { - dError("vid:%d sid:%d id:%s, failed to write:%s, reason:%s", vnode, sid, pObj->meterId, pVnode->nfn, - strerror(errno)); - vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); - goto _over; - } - } - twrite(pVnode->nfd, &chksum, sizeof(TSCKSUM)); - } - - tfree(pOldCompBlocks); - dTrace("vid:%d, finish writing the new header file:%s", vnode, pVnode->nfn); - vnodeCloseCommitFiles(pVnode); - - for (sid = ssid; sid <= esid; ++sid) { - pObj = (SMeterObj *)(pVnode->meterList[sid]); - if (pObj == NULL) continue; - - pTable = meterInfo + sid; - if (pTable->finalNumOfBlocks <= 0) continue; - - if (pTable->committedPoints > 0) { - vnodeUpdateCommitInfo(pObj, pTable->commitSlot, pTable->commitPos, pTable->commitCount); - } - } - - if (commitAgain) { - pVnode->commitFirstKey = pVnode->commitLastKey + 1; - goto _again; - } - - vnodeRemoveCommitLog(vnode); - -_over: - pVnode->commitInProcess = 0; - vnodeCommitOver(pVnode); - memset(&(vnodeList[vnode].commitThread), 0, sizeof(vnodeList[vnode].commitThread)); - tfree(buffer); - tfree(pOldCompBlocks); - - vnodeBroadcastStatusToUnsyncedPeer(pVnode); - dPrint("vid:%d, committing is over", vnode); - - return pVnode; -} - -void *vnodeCommitToFile(void *param) { - SVnodeObj *pVnode = (SVnodeObj *)param; - - return vnodeCommitMultiToFile(pVnode, 0, pVnode->cfg.maxSessions - 1); -} - -int vnodeGetCompBlockInfo(SMeterObj *pObj, SQuery *pQuery) { - char prefix[TSDB_FILENAME_LEN]; - char fileName[TSDB_FILENAME_LEN]; - SCompHeader compHeader; - SCompInfo compInfo; - struct stat fstat; - SVnodeObj * pVnode = &vnodeList[pObj->vnode]; - char * buffer = NULL; - TSCKSUM chksum; - - vnodeFreeFields(pQuery); - tfree(pQuery->pBlock); - - pQuery->numOfBlocks = 0; - SVnodeCfg *pCfg = &vnodeList[pObj->vnode].cfg; - - if (pQuery->hfd > 0) close(pQuery->hfd); - sprintf(prefix, "%s/vnode%d/db/v%df%d", tsDirectory, pObj->vnode, pObj->vnode, pQuery->fileId); - - sprintf(fileName, "%s.head", prefix); - pthread_mutex_lock(&(pVnode->vmutex)); - pQuery->hfd = open(fileName, O_RDONLY); - pthread_mutex_unlock(&(pVnode->vmutex)); - - if (pQuery->hfd < 0) { - dError("vid:%d sid:%d id:%s, failed to open head file:%s, reason:%s", pObj->vnode, pObj->sid, pObj->meterId, - fileName, strerror(errno)); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - - int tmsize = sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM); - buffer = (char *)calloc(1, tmsize); - if (buffer == NULL) { - dError("vid:%d sid:%d id:%s, failed to allocate memory to buffer", pObj->vnode, pObj->sid, pObj->meterId); - return -TSDB_CODE_APP_ERROR; - } - - lseek(pQuery->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - if (read(pQuery->hfd, buffer, tmsize) != tmsize) { - dError("vid:%d sid:%d id:%s, file:%s failed to read comp header, reason:%s", pObj->vnode, pObj->sid, pObj->meterId, - fileName, strerror(errno)); - taosLogError("vid:%d sid:%d id:%s, file:%s failed to read comp header", pObj->vnode, pObj->sid, pObj->meterId, - fileName); - tfree(buffer); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - - if (!taosCheckChecksumWhole((uint8_t *)buffer, tmsize)) { - dError("vid:%d sid:%d id:%s, file:%s comp header offset is broken", pObj->vnode, pObj->sid, pObj->meterId, - fileName); - taosLogError("vid:%d sid:%d id:%s, file:%s comp header offset is broken", pObj->vnode, pObj->sid, pObj->meterId, - fileName); - tfree(buffer); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - compHeader = ((SCompHeader *)buffer)[pObj->sid]; - tfree(buffer); - if (compHeader.compInfoOffset == 0) return 0; - - lseek(pQuery->hfd, compHeader.compInfoOffset, SEEK_SET); - read(pQuery->hfd, &compInfo, sizeof(SCompInfo)); - if (!taosCheckChecksumWhole((uint8_t *)(&compInfo), sizeof(SCompInfo))) { - dError("vid:%d sid:%d id:%s, file:%s compInfo checksum mismatch", pObj->vnode, pObj->sid, pObj->meterId, fileName); - taosLogError("vid:%d sid:%d id:%s, file:%s compInfo checksum mismatch", pObj->vnode, pObj->sid, pObj->meterId, - fileName); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - if (compInfo.numOfBlocks <= 0) return 0; - if (compInfo.uid != pObj->uid) return 0; - - pQuery->numOfBlocks = compInfo.numOfBlocks; - pQuery->pBlock = (SCompBlock *)calloc(1, (sizeof(SCompBlock) + sizeof(SField *)) * compInfo.numOfBlocks); - pQuery->pFields = (SField **)((char *)pQuery->pBlock + sizeof(SCompBlock) * compInfo.numOfBlocks); - - /* char *pBlock = (char *)pQuery->pBlockFields + - * sizeof(SCompBlockFields)*compInfo.numOfBlocks; */ - read(pQuery->hfd, pQuery->pBlock, compInfo.numOfBlocks * sizeof(SCompBlock)); - read(pQuery->hfd, &chksum, sizeof(TSCKSUM)); - if (chksum != taosCalcChecksum(0, (uint8_t *)(pQuery->pBlock), compInfo.numOfBlocks * sizeof(SCompBlock))) { - dError("vid:%d sid:%d id:%s, head file comp block broken, fileId: %d", pObj->vnode, pObj->sid, pObj->meterId, - pQuery->fileId); - taosLogError("vid:%d sid:%d id:%s, head file comp block broken, fileId: %d", pObj->vnode, pObj->sid, pObj->meterId, - pQuery->fileId); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - - close(pQuery->hfd); - pQuery->hfd = -1; - - sprintf(fileName, "%s.data", prefix); - if (stat(fileName, &fstat) < 0) { - dError("vid:%d sid:%d id:%s, data file:%s not there!", pObj->vnode, pObj->sid, pObj->meterId, fileName); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - - if (pQuery->dfd > 0) close(pQuery->dfd); - pQuery->dfd = open(fileName, O_RDONLY); - if (pQuery->dfd < 0) { - dError("vid:%d sid:%d id:%s, failed to open data file:%s, reason:%s", pObj->vnode, pObj->sid, pObj->meterId, - fileName, strerror(errno)); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - - sprintf(fileName, "%s.last", prefix); - if (stat(fileName, &fstat) < 0) { - dError("vid:%d sid:%d id:%s, last file:%s not there!", pObj->vnode, pObj->sid, pObj->meterId, fileName); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - - if (pQuery->lfd > 0) close(pQuery->lfd); - pQuery->lfd = open(fileName, O_RDONLY); - if (pQuery->lfd < 0) { - dError("vid:%d sid:%d id:%s, failed to open last file:%s, reason:%s", pObj->vnode, pObj->sid, pObj->meterId, - fileName, strerror(errno)); - return vnodeRecoverFromPeer(pVnode, pQuery->fileId); - } - - return pQuery->numOfBlocks; -} - -int vnodeReadColumnToMem(int fd, SCompBlock *pBlock, SField **fields, int col, char *data, int dataSize, - char *temp, char *buffer, int bufferSize) { - int len = 0, size = 0; - SField *tfields = NULL; - TSCKSUM chksum = 0; - - if (*fields == NULL) { - size = sizeof(SField) * (pBlock->numOfCols) + sizeof(TSCKSUM); - *fields = (SField *)calloc(1, size); - lseek(fd, pBlock->offset, SEEK_SET); - read(fd, *fields, size); - if (!taosCheckChecksumWhole((uint8_t *)(*fields), size)) { - dError("SField checksum error, col: %d", col); - taosLogError("SField checksum error, col: %d", col); - return -1; - } - } - - tfields = *fields; - - /* If data is NULL, that means only to read SField content. So no need to read data part. */ - if (data == NULL) return 0; - - lseek(fd, pBlock->offset + tfields[col].offset, SEEK_SET); - - if (pBlock->algorithm) { - len = read(fd, temp, tfields[col].len); - read(fd, &chksum, sizeof(TSCKSUM)); - if (chksum != taosCalcChecksum(0, (uint8_t *)temp, tfields[col].len)) { - dError("data column checksum error, col: %d", col); - taosLogError("data column checksum error, col: %d", col); - return -1; - } - - (*pDecompFunc[tfields[col].type])(temp, tfields[col].len, pBlock->numOfPoints, data, dataSize, - pBlock->algorithm, buffer, bufferSize); - - } else { - len = read(fd, data, tfields[col].len); - read(fd, &chksum, sizeof(TSCKSUM)); - if (chksum != taosCalcChecksum(0, (uint8_t *)data, tfields[col].len)) { - dError("data column checksum error, col: %d", col); - taosLogError("data column checksum error, col: %d", col); - return -1; - } - } - - if (len <= 0) { - dError("failed to read col:%d, offset:%d, reason:%s", col, (int32_t)(tfields[col].offset), strerror(errno)); - return -1; - } - - return 0; -} - -int vnodeReadCompBlockToMem(SMeterObj *pObj, SQuery *pQuery, SData *sdata[]) { - char * temp = NULL; - int i = 0, col = 0, code = 0; - SCompBlock *pBlock = NULL; - SField ** pFields = NULL; - char * buffer = NULL; - int bufferSize = 0; - int dfd = pQuery->dfd; - - tfree(pQuery->pFields[pQuery->slot]); - - pBlock = pQuery->pBlock + pQuery->slot; - pFields = pQuery->pFields + pQuery->slot; - temp = malloc(pObj->bytesPerPoint * (pBlock->numOfPoints + 1)); - - if (pBlock->last) dfd = pQuery->lfd; - - if (pBlock->algorithm == TWO_STAGE_COMP) { - bufferSize = pObj->maxBytes * pBlock->numOfPoints + EXTRA_BYTES; - buffer = (char *)calloc(1, bufferSize); - } - - if (pQuery->colList[0].colIdx != PRIMARYKEY_TIMESTAMP_COL_INDEX) { - // load timestamp column first in any cases. - code = vnodeReadColumnToMem(dfd, pBlock, pFields, PRIMARYKEY_TIMESTAMP_COL_INDEX, - pQuery->tsData->data + pQuery->pointsOffset * TSDB_KEYSIZE, - TSDB_KEYSIZE*pBlock->numOfPoints, temp, buffer, bufferSize); - col = 1; - } else { - // Read the SField data for this block first, if timestamp column is retrieved in this query, we ignore this process - code = vnodeReadColumnToMem(dfd, pBlock, pFields, 0, NULL, 0, NULL, buffer, bufferSize); - } - - if (code < 0) goto _over; - - while (col < pBlock->numOfCols && i < pQuery->numOfCols) { - SColumnInfo *pColumnInfo = &pQuery->colList[i].data; - if ((*pFields)[col].colId < pColumnInfo->colId) { - ++col; - } else if ((*pFields)[col].colId == pColumnInfo->colId) { - code = vnodeReadColumnToMem(dfd, pBlock, pFields, col, sdata[i]->data, pColumnInfo->bytes*pBlock->numOfPoints, temp, buffer, bufferSize); - if (code < 0) goto _over; - ++i; - ++col; - } else { - /* - * pQuery->colList[i].colIdx < (*pFields)[col].colId, this column is not existed in current block, - * fill space with NULL value - */ - char * output = sdata[i]->data; - int32_t bytes = pQuery->colList[i].data.bytes; - int32_t type = pQuery->colList[i].data.type; - - setNullN(output, type, bytes, pBlock->numOfPoints); - ++i; - } - } - - if (col >= pBlock->numOfCols && i < pQuery->numOfCols) { - // remain columns need to set null value - while (i < pQuery->numOfCols) { - char * output = sdata[i]->data; - int32_t bytes = pQuery->colList[i].data.bytes; - int32_t type = pQuery->colList[i].data.type; - - setNullN(output, type, bytes, pBlock->numOfPoints); - ++i; - } - } - -_over: - tfree(buffer); - tfree(temp); - if (code < 0) code = vnodeRecoverFromPeer(vnodeList + pObj->vnode, pQuery->fileId); - return code; -} - -int vnodeReadLastBlockToMem(SMeterObj *pObj, SCompBlock *pBlock, SData *sdata[]) { - char * temp = NULL; - int col = 0, code = 0; - SField *pFields = NULL; - char * buffer = NULL; - int bufferSize = 0; - - SVnodeObj *pVnode = vnodeList + pObj->vnode; - temp = malloc(pObj->bytesPerPoint * (pBlock->numOfPoints + 1)); - if (pBlock->algorithm == TWO_STAGE_COMP) { - bufferSize = pObj->maxBytes*pBlock->numOfPoints+EXTRA_BYTES; - buffer = (char *)calloc(1, pObj->maxBytes * pBlock->numOfPoints + EXTRA_BYTES); - } - - for (col = 0; col < pBlock->numOfCols; ++col) { - code = vnodeReadColumnToMem(pVnode->lfd, pBlock, &pFields, col, sdata[col]->data, - pObj->pointsPerFileBlock*pObj->schema[col].bytes+EXTRA_BYTES, temp, buffer, bufferSize); - if (code < 0) break; - sdata[col]->len = pObj->schema[col].bytes * pBlock->numOfPoints; - } - - tfree(buffer); - tfree(temp); - tfree(pFields); - if (code < 0) code = vnodeRecoverFromPeer(pVnode, pVnode->fileId); - return code; -} - -int vnodeWriteBlockToFile(SMeterObj *pObj, SCompBlock *pCompBlock, SData *data[], SData *cdata[], int points) { - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - SVnodeCfg *pCfg = &pVnode->cfg; - int wlen = 0; - SField * fields = NULL; - int size = sizeof(SField) * pObj->numOfColumns + sizeof(TSCKSUM); - int32_t offset = size; - char * buffer = NULL; - int bufferSize = 0; - - int dfd = pVnode->dfd; - - if (pCompBlock->last && (points < pObj->pointsPerFileBlock * tsFileBlockMinPercent)) { - dTrace("vid:%d sid:%d id:%s, points:%d are written to last block, block stime: %" PRId64 ", block etime: %" PRId64, - pObj->vnode, pObj->sid, pObj->meterId, points, *((TSKEY *)(data[0]->data)), - *((TSKEY * )(data[0]->data + (points - 1) * pObj->schema[0].bytes))); - pCompBlock->last = 1; - dfd = pVnode->tfd > 0 ? pVnode->tfd : pVnode->lfd; - } else { - pCompBlock->last = 0; - } - - pCompBlock->offset = lseek(dfd, 0, SEEK_END); - pCompBlock->len = 0; - - fields = (SField *)calloc(1, size); - if (fields == NULL) return -1; - - if (pCfg->compression == TWO_STAGE_COMP){ - bufferSize = pObj->maxBytes * points + EXTRA_BYTES; - buffer = (char *)malloc(bufferSize); - } - - for (int i = 0; i < pObj->numOfColumns; ++i) { - fields[i].colId = pObj->schema[i].colId; - fields[i].type = pObj->schema[i].type; - fields[i].bytes = pObj->schema[i].bytes; - fields[i].offset = offset; - // assert(data[i]->len == points*pObj->schema[i].bytes); - - if (pCfg->compression) { - cdata[i]->len = (*pCompFunc[(uint8_t)pObj->schema[i].type])(data[i]->data, points * pObj->schema[i].bytes, points, - cdata[i]->data, pObj->schema[i].bytes*pObj->pointsPerFileBlock+EXTRA_BYTES, - pCfg->compression, buffer, bufferSize); - fields[i].len = cdata[i]->len; - taosCalcChecksumAppend(0, (uint8_t *)(cdata[i]->data), cdata[i]->len + sizeof(TSCKSUM)); - offset += (cdata[i]->len + sizeof(TSCKSUM)); - - } else { - data[i]->len = pObj->schema[i].bytes * points; - fields[i].len = data[i]->len; - taosCalcChecksumAppend(0, (uint8_t *)(data[i]->data), data[i]->len + sizeof(TSCKSUM)); - offset += (data[i]->len + sizeof(TSCKSUM)); - } - - getStatistics(data[0]->data, data[i]->data, pObj->schema[i].bytes, points, pObj->schema[i].type, &fields[i].min, - &fields[i].max, &fields[i].sum, &fields[i].minIndex, &fields[i].maxIndex, &fields[i].numOfNullPoints); - } - - tfree(buffer); - - // Write SField part - taosCalcChecksumAppend(0, (uint8_t *)fields, size); - wlen = twrite(dfd, fields, size); - if (wlen <= 0) { - tfree(fields); - dError("vid:%d sid:%d id:%s, failed to write block, wlen:%d reason:%s", pObj->vnode, pObj->sid, pObj->meterId, wlen, - strerror(errno)); -#ifdef CLUSTER - return vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); -#else - return -1; -#endif - } - pVnode->vnodeStatistic.compStorage += wlen; - pVnode->dfSize += wlen; - pCompBlock->len += wlen; - tfree(fields); - - // Write data part - for (int i = 0; i < pObj->numOfColumns; ++i) { - if (pCfg->compression) { - wlen = twrite(dfd, cdata[i]->data, cdata[i]->len + sizeof(TSCKSUM)); - } else { - wlen = twrite(dfd, data[i]->data, data[i]->len + sizeof(TSCKSUM)); - } - - if (wlen <= 0) { - dError("vid:%d sid:%d id:%s, failed to write block, wlen:%d points:%d reason:%s", - pObj->vnode, pObj->sid, pObj->meterId, wlen, points, strerror(errno)); - return vnodeRecoverFromPeer(pVnode, pVnode->commitFileId); - } - - pVnode->vnodeStatistic.compStorage += wlen; - pVnode->dfSize += wlen; - pCompBlock->len += wlen; - } - - dTrace("vid:%d, vnode compStorage size is: %" PRId64, pObj->vnode, pVnode->vnodeStatistic.compStorage); - - pCompBlock->algorithm = pCfg->compression; - pCompBlock->numOfPoints = points; - pCompBlock->numOfCols = pObj->numOfColumns; - pCompBlock->keyFirst = *((TSKEY *)(data[0]->data)); // hack way to get the key - pCompBlock->keyLast = *((TSKEY *)(data[0]->data + (points - 1) * pObj->schema[0].bytes)); - pCompBlock->sversion = pObj->sversion; - assert(pCompBlock->keyFirst <= pCompBlock->keyLast); - - return 0; -} - -static int forwardInFile(SQuery *pQuery, int32_t midSlot, int32_t step, SVnodeObj *pVnode, SMeterObj *pObj); - -int vnodeSearchPointInFile(SMeterObj *pObj, SQuery *pQuery) { - TSKEY latest, oldest; - int ret = 0; - int64_t delta = 0; - int firstSlot, lastSlot, midSlot; - int numOfBlocks; - char * temp = NULL, *data = NULL; - SCompBlock *pBlock = NULL; - SVnodeObj * pVnode = &vnodeList[pObj->vnode]; - int step; - char * buffer = NULL; - int bufferSize = 0; - int dfd; - - // if file is broken, pQuery->slot = -2; if not found, pQuery->slot = -1; - - pQuery->slot = -1; - pQuery->pos = -1; - if (pVnode->numOfFiles <= 0) return 0; - - SVnodeCfg *pCfg = &pVnode->cfg; - delta = (int64_t)pCfg->daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision]; - latest = pObj->lastKeyOnFile; - oldest = (pVnode->fileId - pVnode->numOfFiles + 1) * delta; - - if (latest < oldest) return 0; - - if (!QUERY_IS_ASC_QUERY(pQuery)) { - if (pQuery->skey < oldest) return 0; - if (pQuery->ekey > latest) return 0; - if (pQuery->skey > latest) pQuery->skey = latest; - } else { - if (pQuery->skey > latest) return 0; - if (pQuery->ekey < oldest) return 0; - if (pQuery->skey < oldest) pQuery->skey = oldest; - } - - dTrace("vid:%d sid:%d id:%s, skey:%" PRId64 " ekey:%" PRId64 " oldest:%" PRId64 " latest:%" PRId64 " fileId:%d numOfFiles:%d", - pObj->vnode, pObj->sid, pObj->meterId, pQuery->skey, pQuery->ekey, oldest, latest, pVnode->fileId, - pVnode->numOfFiles); - - step = QUERY_IS_ASC_QUERY(pQuery) ? 1 : -1; - - pQuery->fileId = pQuery->skey / delta; // starting fileId - pQuery->fileId -= step; // hacker way to make while loop below works - - bufferSize = pCfg->rowsInFileBlock*sizeof(TSKEY)+EXTRA_BYTES; - buffer = (char *)calloc(1, bufferSize); - - while (1) { - pQuery->fileId += step; - - if ((pQuery->fileId > pVnode->fileId) || (pQuery->fileId < pVnode->fileId - pVnode->numOfFiles + 1)) { - tfree(buffer); - return 0; - } - - ret = vnodeGetCompBlockInfo(pObj, pQuery); - if (ret == 0) continue; - if (ret < 0) break; // file broken - - pBlock = pQuery->pBlock; - - firstSlot = 0; - lastSlot = pQuery->numOfBlocks - 1; - //numOfBlocks = pQuery->numOfBlocks; - if (QUERY_IS_ASC_QUERY(pQuery) && pBlock[lastSlot].keyLast < pQuery->skey) continue; - if (!QUERY_IS_ASC_QUERY(pQuery) && pBlock[firstSlot].keyFirst > pQuery->skey) continue; - - while (1) { - numOfBlocks = lastSlot - firstSlot + 1; - midSlot = (firstSlot + (numOfBlocks >> 1)); - - if (numOfBlocks == 1) break; - - if (pQuery->skey > pBlock[midSlot].keyLast) { - if (numOfBlocks == 2) break; - if (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->skey < pBlock[midSlot + 1].keyFirst)) break; - firstSlot = midSlot + 1; - } else if (pQuery->skey < pBlock[midSlot].keyFirst) { - if (QUERY_IS_ASC_QUERY(pQuery) && (pQuery->skey > pBlock[midSlot - 1].keyLast)) break; - lastSlot = midSlot - 1; - } else { - break; // got the slot - } - } - - pQuery->slot = midSlot; - if (!QUERY_IS_ASC_QUERY(pQuery)) { - if (pQuery->skey < pBlock[midSlot].keyFirst) break; - - if (pQuery->ekey > pBlock[midSlot].keyLast) { - pQuery->slot = midSlot + 1; - break; - } - } else { - if (pQuery->skey > pBlock[midSlot].keyLast) { - pQuery->slot = midSlot + 1; - break; - } - - if (pQuery->ekey < pBlock[midSlot].keyFirst) break; - } - - temp = malloc(pObj->pointsPerFileBlock * TSDB_KEYSIZE + EXTRA_BYTES); // only first column - data = malloc(pObj->pointsPerFileBlock * TSDB_KEYSIZE + EXTRA_BYTES); // only first column - dfd = pBlock[midSlot].last ? pQuery->lfd : pQuery->dfd; - ret = vnodeReadColumnToMem(dfd, pBlock + midSlot, pQuery->pFields + midSlot, 0, data, - pObj->pointsPerFileBlock*TSDB_KEYSIZE+EXTRA_BYTES, - temp, buffer, bufferSize); - if (ret < 0) { - ret = vnodeRecoverFromPeer(pVnode, pQuery->fileId); - break; - } // file broken - - pQuery->pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(data, pBlock[midSlot].numOfPoints, pQuery->skey, - pQuery->order.order); - pQuery->key = *((TSKEY *)(data + pObj->schema[0].bytes * pQuery->pos)); - - ret = vnodeForwardStartPosition(pQuery, pBlock, midSlot, pVnode, pObj); - break; - } - - tfree(buffer); - tfree(temp); - tfree(data); - - return ret; -} - -int vnodeForwardStartPosition(SQuery *pQuery, SCompBlock *pBlock, int32_t slotIdx, SVnodeObj *pVnode, SMeterObj *pObj) { - int step = QUERY_IS_ASC_QUERY(pQuery) ? 1 : -1; - - if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) { - int maxReads = QUERY_IS_ASC_QUERY(pQuery) ? pBlock->numOfPoints - pQuery->pos : pQuery->pos + 1; - - if (pQuery->limit.offset < maxReads) { // start position in current block - if (QUERY_IS_ASC_QUERY(pQuery)) { - pQuery->pos += pQuery->limit.offset; - } else { - pQuery->pos -= pQuery->limit.offset; - } - - pQuery->limit.offset = 0; - - } else { - pQuery->limit.offset -= maxReads; - slotIdx += step; - - return forwardInFile(pQuery, slotIdx, step, pVnode, pObj); - } - } - - return pQuery->numOfBlocks; -} - -int forwardInFile(SQuery *pQuery, int32_t slotIdx, int32_t step, SVnodeObj *pVnode, SMeterObj *pObj) { - SCompBlock *pBlock = pQuery->pBlock; - - while (slotIdx < pQuery->numOfBlocks && slotIdx >= 0 && pQuery->limit.offset >= pBlock[slotIdx].numOfPoints) { - pQuery->limit.offset -= pBlock[slotIdx].numOfPoints; - slotIdx += step; - } - - if (slotIdx < pQuery->numOfBlocks && slotIdx >= 0) { - if (QUERY_IS_ASC_QUERY(pQuery)) { - pQuery->pos = pQuery->limit.offset; - } else { - pQuery->pos = pBlock[slotIdx].numOfPoints - pQuery->limit.offset - 1; - } - pQuery->slot = slotIdx; - pQuery->limit.offset = 0; - - return pQuery->numOfBlocks; - } else { // continue in next file, forward pQuery->limit.offset points - int ret = 0; - pQuery->slot = -1; - pQuery->pos = -1; - - while (1) { - pQuery->fileId += step; - if ((pQuery->fileId > pVnode->fileId) || (pQuery->fileId < pVnode->fileId - pVnode->numOfFiles + 1)) { - pQuery->lastKey = pObj->lastKeyOnFile; - pQuery->skey = pQuery->lastKey + 1; - return 0; - } - - ret = vnodeGetCompBlockInfo(pObj, pQuery); - if (ret == 0) continue; - if (ret > 0) break; // qualified file - } - - if (ret > 0) { - int startSlot = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pQuery->numOfBlocks - 1; - return forwardInFile(pQuery, startSlot, step, pVnode, pObj); - } else { - return ret; - } - } -} - -static FORCE_INLINE TSKEY vnodeGetTSInDataBlock(SQuery *pQuery, int32_t pos, int32_t factor) { - return *(TSKEY *)(pQuery->tsData->data + (pQuery->pointsOffset * factor + pos) * TSDB_KEYSIZE); -} - -int vnodeQueryFromFile(SMeterObj *pObj, SQuery *pQuery) { - int numOfReads = 0; - - int lastPos = -1, startPos; - int col, step, code = 0; - char * pRead, *pData; - SData * sdata[TSDB_MAX_COLUMNS]; - SCompBlock *pBlock = NULL; - SVnodeObj * pVnode = &vnodeList[pObj->vnode]; - pQuery->pointsRead = 0; - int keyLen = TSDB_KEYSIZE; - - if (pQuery->over) return 0; - - if (pQuery->slot < 0) // it means a new query, we need to find the point first - code = vnodeSearchPointInFile(pObj, pQuery); - - if (code < 0 || pQuery->slot < 0 || pQuery->pos == -1) { - pQuery->over = 1; - return code; - } - - step = QUERY_IS_ASC_QUERY(pQuery) ? -1 : 1; - pBlock = pQuery->pBlock + pQuery->slot; - - if (pQuery->pos == FILE_QUERY_NEW_BLOCK) { - if (!QUERY_IS_ASC_QUERY(pQuery)) { - if (pQuery->ekey > pBlock->keyLast) pQuery->over = 1; - if (pQuery->skey < pBlock->keyFirst) pQuery->over = 1; - } else { - if (pQuery->ekey < pBlock->keyFirst) pQuery->over = 1; - if (pQuery->skey > pBlock->keyLast) pQuery->over = 1; - } - - pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pBlock->numOfPoints - 1; - } - - if (pQuery->over) return 0; - - // To make sure the start position of each buffer is aligned to 4bytes in 32-bit ARM system. - for(col = 0; col < pQuery->numOfCols; ++col) { - sdata[col] = calloc(1, sizeof(SData) + pBlock->numOfPoints * pQuery->colList[col].data.bytes + EXTRA_BYTES); - } - - /* - * timestamp column is fetched in any cases. Therefore, if the query does not fetch primary column, - * we allocate tsData buffer with twice size of the other ordinary pQuery->sdata. - * Otherwise, the query function may over-write buffer area while retrieve function has not packed the results into - * message to send to client yet. - * - * So the startPositionFactor is needed to denote which half part is used to store the result, and which - * part is available for keep data during query process. - * - * Note: the startPositionFactor must be used in conjunction with pQuery->pointsOffset - */ - int32_t startPositionFactor = 1; - if (pQuery->colList[0].colIdx == PRIMARYKEY_TIMESTAMP_COL_INDEX) { - pQuery->tsData = sdata[0]; - startPositionFactor = 0; - } - - code = vnodeReadCompBlockToMem(pObj, pQuery, sdata); - if (code < 0) { - dError("vid:%d sid:%d id:%s, failed to read block:%d numOfPoints:%d", pObj->vnode, pObj->sid, pObj->meterId, - pQuery->slot, pBlock->numOfPoints); - goto _next; - } - - int maxReads = QUERY_IS_ASC_QUERY(pQuery) ? pBlock->numOfPoints - pQuery->pos : pQuery->pos + 1; - - TSKEY startKey = vnodeGetTSInDataBlock(pQuery, 0, startPositionFactor); - TSKEY endKey = vnodeGetTSInDataBlock(pQuery, pBlock->numOfPoints - 1, startPositionFactor); - - if (QUERY_IS_ASC_QUERY(pQuery)) { - if (endKey < pQuery->ekey) { - numOfReads = maxReads; - } else { - lastPos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])( - pQuery->tsData->data + keyLen * (pQuery->pos + pQuery->pointsOffset * startPositionFactor), maxReads, - pQuery->ekey, TSQL_SO_DESC); - numOfReads = (lastPos >= 0) ? lastPos + 1 : 0; - } - } else { - if (startKey > pQuery->ekey) { - numOfReads = maxReads; - } else { - lastPos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])( - pQuery->tsData->data + keyLen * pQuery->pointsOffset * startPositionFactor, maxReads, pQuery->ekey, - TSQL_SO_ASC); - numOfReads = (lastPos >= 0) ? pQuery->pos - lastPos + 1 : 0; - } - } - - if (numOfReads > pQuery->pointsToRead - pQuery->pointsRead) { - numOfReads = pQuery->pointsToRead - pQuery->pointsRead; - } else { - if (lastPos >= 0 || numOfReads == 0) { - pQuery->keyIsMet = 1; - pQuery->over = 1; - } - } - - startPos = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos : pQuery->pos - numOfReads + 1; - - int32_t numOfQualifiedPoints = 0; - int32_t numOfActualRead = numOfReads; - - // copy data to result buffer - if (pQuery->numOfFilterCols == 0) { - // no filter condition on ordinary columns - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int16_t colBufferIndex = pQuery->pSelectExpr[i].pBase.colInfo.colIdxInBuf; - int32_t bytes = GET_COLUMN_BYTES(pQuery, i); - - pData = pQuery->sdata[i]->data + pQuery->pointsOffset * bytes; - pRead = sdata[colBufferIndex]->data + startPos * bytes; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - memcpy(pData, pRead, numOfReads * bytes); - } else { //reversed copy to output buffer - for(int32_t j = 0; j < numOfReads; ++j) { - memcpy(pData + bytes * j, pRead + (numOfReads - 1 - j) * bytes, bytes); - } - } - } - numOfQualifiedPoints = numOfReads; - } else { - // check each data one by one set the input column data - for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { - struct SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k]; - pFilterInfo->pData = sdata[pFilterInfo->info.colIdxInBuf]->data; - } - - int32_t *ids = calloc(1, numOfReads * sizeof(int32_t)); - numOfActualRead = 0; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - for (int32_t j = startPos; j < pBlock->numOfPoints; j -= step) { - TSKEY key = vnodeGetTSInDataBlock(pQuery, j, startPositionFactor); - if (key < startKey || key > endKey) { - dError("vid:%d sid:%d id:%s, timestamp in file block disordered. slot:%d, pos:%d, ts:%" PRId64 ", block " - "range:%" PRId64 "-%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, j, key, startKey, endKey); - tfree(ids); - return -TSDB_CODE_FILE_BLOCK_TS_DISORDERED; - } - - // out of query range, quit - if (key > pQuery->ekey) { - break; - } - - if (!vnodeFilterData(pQuery, &numOfActualRead, j)) { - continue; - } - - ids[numOfQualifiedPoints] = j; - if (++numOfQualifiedPoints == numOfReads) { // qualified data are enough - break; - } - } - } else { - for (int32_t j = pQuery->pos; j >= 0; --j) { - TSKEY key = vnodeGetTSInDataBlock(pQuery, j, startPositionFactor); - if (key < startKey || key > endKey) { - dError("vid:%d sid:%d id:%s, timestamp in file block disordered. slot:%d, pos:%d, ts:%" PRId64 ", block " - "range:%" PRId64 "-%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, j, key, startKey, endKey); - tfree(ids); - return -TSDB_CODE_FILE_BLOCK_TS_DISORDERED; - } - - // out of query range, quit - if (key < pQuery->ekey) { - break; - } - - if (!vnodeFilterData(pQuery, &numOfActualRead, j)) { - continue; - } - - ids[numOfQualifiedPoints] = j; - if (++numOfQualifiedPoints == numOfReads) { // qualified data are enough - break; - } - } - } - -// int32_t start = QUERY_IS_ASC_QUERY(pQuery) ? 0 : numOfReads - numOfQualifiedPoints; - for (int32_t j = 0; j < numOfQualifiedPoints; ++j) { - for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) { - int16_t colIndexInBuffer = pQuery->pSelectExpr[col].pBase.colInfo.colIdxInBuf; - int32_t bytes = GET_COLUMN_BYTES(pQuery, col); - pData = pQuery->sdata[col]->data + (pQuery->pointsOffset + j) * bytes; - pRead = sdata[colIndexInBuffer]->data + ids[j/* + start*/] * bytes; - - memcpy(pData, pRead, bytes); - } - } - - tfree(ids); - assert(numOfQualifiedPoints <= numOfReads); - } - - // Note: numOfQualifiedPoints may be 0, since no data in this block are qualified - assert(pQuery->pointsRead == 0); - - pQuery->pointsRead += numOfQualifiedPoints; - for (col = 0; col < pQuery->numOfOutputCols; ++col) { - int16_t bytes = GET_COLUMN_BYTES(pQuery, col); - pQuery->sdata[col]->len = bytes * (pQuery->pointsOffset + pQuery->pointsRead); - } - pQuery->pos -= numOfActualRead * step; - - // update the lastkey/skey - int32_t lastAccessPos = pQuery->pos + step; - pQuery->lastKey = vnodeGetTSInDataBlock(pQuery, lastAccessPos, startPositionFactor); - pQuery->skey = pQuery->lastKey - step; - -_next: - if ((pQuery->pos < 0 || pQuery->pos >= pBlock->numOfPoints || numOfReads == 0) && (pQuery->over == 0)) { - pQuery->slot = pQuery->slot - step; - pQuery->pos = FILE_QUERY_NEW_BLOCK; - } - - if ((pQuery->slot < 0 || pQuery->slot >= pQuery->numOfBlocks) && (pQuery->over == 0)) { - int ret; - - while (1) { - ret = -1; - pQuery->fileId -= step; // jump to next file - - if (QUERY_IS_ASC_QUERY(pQuery)) { - if (pQuery->fileId > pVnode->fileId) { - // to do: - // check if file is updated, if updated, open again and check if this Meter is updated - // if meter is updated, read in new block info, and - break; - } - } else { - if ((pVnode->fileId - pQuery->fileId + 1) > pVnode->numOfFiles) break; - } - - ret = vnodeGetCompBlockInfo(pObj, pQuery); - if (ret > 0) break; - if (ret < 0) code = ret; - } - - if (ret <= 0) pQuery->over = 1; - - pQuery->slot = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pQuery->numOfBlocks - 1; - } - - for(int32_t i = 0; i < pQuery->numOfCols; ++i) { - tfree(sdata[i]); - } - - return code; -} - -int vnodeUpdateFileMagic(int vnode, int fileId) { - struct stat fstat; - char fileName[256]; - - SVnodeObj *pVnode = vnodeList + vnode; - uint64_t magic = 0; - - vnodeGetHeadDataLname(fileName, NULL, NULL, vnode, fileId); - if (stat(fileName, &fstat) != 0) { - dError("vid:%d, head file:%s is not there", vnode, fileName); - return -1; - } - - int size = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM) + TSDB_FILE_HEADER_LEN; - if (fstat.st_size < size) { - dError("vid:%d, head file:%s is corrupted", vnode, fileName); - return -1; - } - -#ifdef CLUSTER - //if (fstat.st_size == size) return 0; -#else - if (fstat.st_size == size) return 0; -#endif - vnodeGetHeadDataLname(NULL, fileName, NULL, vnode, fileId); - if (stat(fileName, &fstat) == 0) { - magic = fstat.st_size; - } else { - dError("vid:%d, data file:%s is not there", vnode, fileName); - return -1; - } - - vnodeGetHeadDataLname(NULL, NULL, fileName, vnode, fileId); - if (stat(fileName, &fstat) == 0) { - magic += fstat.st_size; - } - - int slot = fileId % pVnode->maxFiles; - pVnode->fmagic[slot] = magic; - - return 0; -} - -int vnodeInitFile(int vnode) { - int code = TSDB_CODE_SUCCESS; - SVnodeObj *pVnode = vnodeList + vnode; - - pVnode->maxFiles = pVnode->cfg.daysToKeep / pVnode->cfg.daysPerFile + 1; - pVnode->maxFile1 = pVnode->cfg.daysToKeep1 / pVnode->cfg.daysPerFile; - pVnode->maxFile2 = pVnode->cfg.daysToKeep2 / pVnode->cfg.daysPerFile; - pVnode->fmagic = (uint64_t *)calloc(pVnode->maxFiles + 1, sizeof(uint64_t)); - int fileId = pVnode->fileId; - - /* - * The actual files will far exceed the files that need to exist - */ - if (pVnode->numOfFiles > pVnode->maxFiles) { - dError("vid:%d numOfFiles:%d should not larger than maxFiles:%d", vnode, pVnode->numOfFiles, pVnode->maxFiles); - } - - int numOfFiles = MIN(pVnode->numOfFiles, pVnode->maxFiles); - for (int i = 0; i < numOfFiles; ++i) { - if (vnodeUpdateFileMagic(vnode, fileId) < 0) { - if (pVnode->cfg.replications > 1) { - pVnode->badFileId = fileId; - } - dError("vid:%d fileId:%d is corrupted", vnode, fileId); - } else { - dTrace("vid:%d fileId:%d is checked", vnode, fileId); - } - - fileId--; - } - - return code; -} - -int vnodeRecoverCompHeader(int vnode, int fileId) { - // TODO: try to recover SCompHeader part - dTrace("starting to recover vnode head file comp header part, vnode: %d fileId: %d", vnode, fileId); - assert(0); - return 0; -} - -int vnodeRecoverHeadFile(int vnode, int fileId) { - // TODO: try to recover SCompHeader part - dTrace("starting to recover vnode head file, vnode: %d, fileId: %d", vnode, fileId); - assert(0); - return 0; -} - -int vnodeRecoverDataFile(int vnode, int fileId) { - // TODO: try to recover SCompHeader part - dTrace("starting to recover vnode data file, vnode: %d, fileId: %d", vnode, fileId); - assert(0); - return 0; -} diff --git a/src/vnode/detail/src/vnodeFile.spec.c b/src/vnode/detail/src/vnodeFile.spec.c deleted file mode 100644 index 4ad624d2ad..0000000000 --- a/src/vnode/detail/src/vnodeFile.spec.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "vnode.h" -#include "vnodeFile.h" - -char* vnodeGetDiskFromHeadFile(char *headName) { return tsDirectory; } - -char* vnodeGetDataDir(int vnode, int fileId) { return dataDir; } - -void vnodeAdustVnodeFile(SVnodeObj *pVnode) { - // Retention policy here - int fileId = pVnode->fileId - pVnode->numOfFiles + 1; - int cfile = taosGetTimestamp(pVnode->cfg.precision)/pVnode->cfg.daysPerFile/tsMsPerDay[(uint8_t)pVnode->cfg.precision]; - while (fileId <= cfile - pVnode->maxFiles) { - vnodeRemoveFile(pVnode->vnode, fileId); - pVnode->numOfFiles--; - fileId++; - } -} - -int vnodeCheckNewHeaderFile(int fd, SVnodeObj *pVnode) { - SCompHeader *pHeader = NULL; - SCompBlock *pBlocks = NULL; - int blockSize = 0; - SCompInfo compInfo; - int tmsize = 0; - - tmsize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM); - - pHeader = (SCompHeader *)malloc(tmsize); - if (pHeader == NULL) return 0; - - lseek(fd, TSDB_FILE_HEADER_LEN, SEEK_SET); - if (read(fd, (void *)pHeader, tmsize) != tmsize) { - goto _broken_exit; - } - - if (!taosCheckChecksumWhole((uint8_t *)pHeader, tmsize)) { - goto _broken_exit; - } - - for (int sid = 0; sid < pVnode->cfg.maxSessions; sid++) { - if (pVnode->meterList == NULL) goto _correct_exit; - if (pVnode->meterList[sid] == NULL || pHeader[sid].compInfoOffset == 0) continue; - lseek(fd, pHeader[sid].compInfoOffset, SEEK_SET); - - if (read(fd, (void *)(&compInfo), sizeof(SCompInfo)) != sizeof(SCompInfo)) { - goto _broken_exit; - } - - if (!taosCheckChecksumWhole((uint8_t *)(&compInfo), sizeof(SCompInfo))) { - goto _broken_exit; - } - - if (compInfo.uid != ((SMeterObj *)pVnode->meterList[sid])->uid) continue; - - int expectedSize = sizeof(SCompBlock) * compInfo.numOfBlocks + sizeof(TSCKSUM); - if (blockSize < expectedSize) { - pBlocks = (SCompBlock *)realloc(pBlocks, expectedSize); - if (pBlocks == NULL) { - tfree(pHeader); - return 0; - } - - blockSize = expectedSize; - } - - if (read(fd, (void *)pBlocks, expectedSize) != expectedSize) { - dError("failed to read block part"); - goto _broken_exit; - } - if (!taosCheckChecksumWhole((uint8_t *)pBlocks, expectedSize)) { - dError("block part is broken"); - goto _broken_exit; - } - - for (int i = 0; i < compInfo.numOfBlocks; i++) { - if (pBlocks[i].last && i != compInfo.numOfBlocks-1) { - dError("last block in middle, block:%d", i); - goto _broken_exit; - } - } - } - - _correct_exit: - dPrint("vid: %d new header file %s is correct", pVnode->vnode, pVnode->nfn); - tfree(pBlocks); - tfree(pHeader); - return 0; - - _broken_exit: - dError("vid: %d new header file %s is broken", pVnode->vnode, pVnode->nfn); - tfree(pBlocks); - tfree(pHeader); - return -1; -} \ No newline at end of file diff --git a/src/vnode/detail/src/vnodeFileUtil.c b/src/vnode/detail/src/vnodeFileUtil.c deleted file mode 100644 index b40e7cfd41..0000000000 --- a/src/vnode/detail/src/vnodeFileUtil.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "vnode.h" - -int vnodeCheckHeaderFile(int fd, int dfd, SVnodeCfg cfg, int mode) { - SCompHeader *pHeaders = NULL; - SVnodeCfg *pCfg = &cfg; - SCompInfo compInfo; - SCompBlock *pBlocks = NULL; - int blockSize = 0; - SField *pFields = NULL; - char *pBuf = NULL; - int size = 0; - int ret = 0; - - if (fd < 0 || dfd < 0) return -1; - - lseek(fd, TSDB_FILE_HEADER_LEN, SEEK_SET); - size = pCfg->maxSessions*sizeof(SCompHeader)+sizeof(TSCKSUM); - pHeaders = calloc(1, size); - if (pHeaders == NULL) { - return -1; - } - - read(fd, pHeaders, size); - if (!taosCheckChecksumWhole((uint8_t *)pHeaders, size)) { - return -1; - } - - for (int i = 0; i < pCfg->maxSessions; i++) { - if (pHeaders[i].compInfoOffset == 0) continue; - if (pHeaders[i].compInfoOffset < 0) { - // TODO : report error here - ret = -1; - continue; - } - lseek(fd, pHeaders[i].compInfoOffset, SEEK_SET); - read(fd, &compInfo, sizeof(SCompInfo)); - if (!taosCheckChecksumWhole((uint8_t *)&compInfo, sizeof(SCompInfo))) { - // TODO : report error - ret = -1; - continue; - } - - int tsize = sizeof(SCompBlock) * compInfo.numOfBlocks + sizeof(TSCKSUM); - if (tsize > blockSize) { - if (pBlocks == NULL) { - pBlocks = calloc(1, tsize); - } else { - pBlocks = realloc(pBlocks, tsize); - } - blockSize = tsize; - } - - read(fd, tsize); - if (!taosCheckChecksumWhole(pBlocks, tsize)) { - // TODO: Report error - ret = -1; - continue; - } - - TSKEY keyLast = 0; - for (int j = 0; j < compInfo.numOfBlocks; j++) { - SCompBlock *pBlock = pBlocks + j; - if (pBlock->last != 0 && j < compInfo.numOfBlocks-1) { - // TODO: report error - ret = -1; - break; - } - - if (pBlock->offset < TSDB_FILE_HEADER_LEN) { - // TODO : report erro - ret = -1; - break; - } - - if (pBlock->keyLast < pBlock->keyFirst) { - // TODO : report error - ret = -1; - break; - } - - if (pBlock->keyFirst <= keyLast) { - // TODO : report error - ret = -1; - break; - } - keyLast = pBlock->keyLast; - - // Check block in data - lseek(dfd, pBlock->offset, SEEK_SET); - tsize = sizeof(SField) * pBlock->numOfCols + sizeof(TSCKSUM); - pFields = realloc(pFields, tsize); - - read(dfd, pFields, tsize); - if (!taosCheckChecksumWhole((uint8_t*)pFields, tsize)) { - // TODO : report error - ret = -1; - continue; - } - - for (int k = 0; k < pBlock->numOfCols; k++) { - // TODO: Check pFields[k] content - - pBuf = realloc(pBuf, pFields[k].len); - - if (!taosCheckChecksumWhole((uint8_t *)pBuf, pFields[k].len)) { - // TODO : report error; - ret = -1; - continue; - } - } - } - } - - tfree(pBuf); - tfree(pFields); - tfree(pBlocks); - tfree(pHeaders); - return ret; -} - -int vnodePackDataFile(int vnode, int fileId) { - // TODO: check if it is able to pack current file - - // TODO: assign value to headerFile and dataFile - char *headerFile = NULL; - char *dataFile = NULL; - char *lastFile = NULL; - SVnodeObj *pVnode = vnodeList+vnode; - SCompHeader *pHeaders = NULL; - SCompBlock *pBlocks = NULL; - int blockSize = 0; - char *pBuff = 0; - int buffSize = 0; - SCompInfo compInfo; - int size = 0; - - int hfd = open(headerFile, O_RDONLY); - if (hfd < 0) { - dError("vid: %d, failed to open header file:%s\n", vnode, headerFile); - return -1; - } - int dfd = open(dataFile, O_RDONLY); - if (dfd < 0) { - dError("vid: %d, failed to open data file:%s\n", vnode, dataFile); - return -1; - } - int lfd = open(lastFile, O_RDONLY); - if (lfd < 0) { - dError("vid: %d, failed to open data file:%s\n", vnode, lastFile); - return -1; - } - - lseek(hfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - size = sizeof(SCompHeader)*pVnode->cfg.maxSessions+sizeof(TSCKSUM); - pHeaders = malloc(size); - if (pHeaders == NULL) goto _exit_failure; - read(hfd, pHeaders, size); - if (!taosCheckChecksumWhole((uint8_t *)pHeaders, size)) { - dError("vid: %d, header file %s is broken", vnode, headerFile); - goto _exit_failure; - } - - for (size_t i = 0; i < pVnode->cfg.maxSessions; i++) - { - if (pHeaders[i].compInfoOffset <= 0) continue; - SMeterObj *pObj = (SMeterObj *)pVnode->meterList[i]; - // read compInfo part - lseek(hfd, pHeaders[i].compInfoOffset, SEEK_SET); - read(hfd, &compInfo, sizeof(SCompInfo)); - if (!taosCheckChecksumWhole((uint8_t *)&compInfo, sizeof(SCompInfo))) { - dError("vid: %d sid:%d fileId:%d compInfo is broken", vnode, i, fileId); - goto _exit_failure; - } - - // read compBlock part - int tsize = compInfo.numOfBlocks * sizeof(SCompBlock) + sizeof(TSCKSUM); - if (tsize > blockSize) { - if (blockSize == 0) { - pBlocks = malloc(tsize); - } else { - pBlocks = realloc(pBlocks, tsize); - } - blockSize = tsize; - } - read(hfd, pBlocks, tsize); - if (!taosCheckChecksumWhole((uint8_t *)pBlocks, tsize)) { - dError("vid:%d sid:%d fileId:%d block part is broken", vnode, i, fileId); - goto _exit_failure; - } - - assert(compInfo.numOfBlocks > 0); - // Loop to scan the blocks and merge block when neccessary. - tsize = sizeof(SCompInfo) + compInfo.numOfBlocks *sizeof(SCompBlock) + sizeof(TSCKSUM); - pBuff = realloc(pBuff, tsize); - SCompInfo *pInfo = (SCompInfo *)pBuff; - SCompBlock *pNBlocks = pBuff + sizeof(SCompInfo); - int nCounter = 0; - for (int j; j < compInfo.numOfBlocks; j++) { - // TODO : Check if it is the last block - // if (j == compInfo.numOfBlocks - 1) {} - if (pBlocks[j].numOfPoints + pNBlocks[nCounter].numOfPoints <= pObj->pointsPerFileBlock) { - // Merge current block to current new block - } else { - // Write new block to new data file - // pNBlocks[nCounter]. - nCounter++; - } - } - } - - return 0; - -_exit_failure: - tfree(pHeaders); - if (hfd > 0) close(hfd); - if (dfd > 0) close(dfd); - if (lfd > 0) close(lfd); - return -1; -} \ No newline at end of file diff --git a/src/vnode/detail/src/vnodeFilterFunc.c b/src/vnode/detail/src/vnodeFilterFunc.c deleted file mode 100644 index 08fe78c188..0000000000 --- a/src/vnode/detail/src/vnodeFilterFunc.c +++ /dev/null @@ -1,558 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "taosmsg.h" -#include "tsqlfunction.h" -#include "vnode.h" -#include "vnodeDataFilterFunc.h" - -bool less_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int8_t *)minval < pFilter->filterInfo.upperBndi); -} - -bool less_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int16_t *)minval < pFilter->filterInfo.upperBndi); -} - -bool less_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int32_t *)minval < pFilter->filterInfo.upperBndi); -} - -bool less_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int64_t *)minval < pFilter->filterInfo.upperBndi); -} - -bool less_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(float *)minval < pFilter->filterInfo.upperBndd); -} - -bool less_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(double *)minval < pFilter->filterInfo.upperBndd); -} - -////////////////////////////////////////////////////////////////// -bool large_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int8_t *)maxval > pFilter->filterInfo.lowerBndi); -} - -bool large_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int16_t *)maxval > pFilter->filterInfo.lowerBndi); -} - -bool large_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int32_t *)maxval > pFilter->filterInfo.lowerBndi); -} - -bool large_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int64_t *)maxval > pFilter->filterInfo.lowerBndi); -} - -bool large_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(float *)maxval > pFilter->filterInfo.lowerBndd); -} - -bool large_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(double *)maxval > pFilter->filterInfo.lowerBndd); -} -///////////////////////////////////////////////////////////////////// - -bool lessEqual_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int8_t *)minval <= pFilter->filterInfo.upperBndi); -} - -bool lessEqual_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int16_t *)minval <= pFilter->filterInfo.upperBndi); -} - -bool lessEqual_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int32_t *)minval <= pFilter->filterInfo.upperBndi); -} - -bool lessEqual_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int64_t *)minval <= pFilter->filterInfo.upperBndi); -} - -bool lessEqual_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(float *)minval <= pFilter->filterInfo.upperBndd); -} - -bool lessEqual_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(double *)minval <= pFilter->filterInfo.upperBndd); -} - -////////////////////////////////////////////////////////////////////////// -bool largeEqual_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int8_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool largeEqual_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int16_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool largeEqual_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int32_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool largeEqual_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int64_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool largeEqual_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(float *)maxval >= pFilter->filterInfo.lowerBndd); -} - -bool largeEqual_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(double *)maxval >= pFilter->filterInfo.lowerBndd); -} - -//////////////////////////////////////////////////////////////////////// - -bool equal_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(int8_t *)minval == *(int8_t *)maxval) { - return (*(int8_t *)minval == pFilter->filterInfo.lowerBndi); - } else { /* range filter */ - assert(*(int8_t *)minval < *(int8_t *)maxval); - - return *(int8_t *)minval <= pFilter->filterInfo.lowerBndi && *(int8_t *)maxval >= pFilter->filterInfo.lowerBndi; - } -} - -bool equal_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(int16_t *)minval == *(int16_t *)maxval) { - return (*(int16_t *)minval == pFilter->filterInfo.lowerBndi); - } else { /* range filter */ - assert(*(int16_t *)minval < *(int16_t *)maxval); - - return *(int16_t *)minval <= pFilter->filterInfo.lowerBndi && *(int16_t *)maxval >= pFilter->filterInfo.lowerBndi; - } -} - -bool equal_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(int32_t *)minval == *(int32_t *)maxval) { - return (*(int32_t *)minval == pFilter->filterInfo.lowerBndi); - } else { /* range filter */ - assert(*(int32_t *)minval < *(int32_t *)maxval); - - return *(int32_t *)minval <= pFilter->filterInfo.lowerBndi && *(int32_t *)maxval >= pFilter->filterInfo.lowerBndi; - } -} - -bool equal_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(int64_t *)minval == *(int64_t *)maxval) { - return (*(int64_t *)minval == pFilter->filterInfo.lowerBndi); - } else { /* range filter */ - assert(*(int64_t *)minval < *(int64_t *)maxval); - - return *(int64_t *)minval <= pFilter->filterInfo.lowerBndi && *(int64_t *)maxval >= pFilter->filterInfo.lowerBndi; - } -} - -bool equal_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(float *)minval == *(float *)maxval) { - return (fabs(*(float *)minval - pFilter->filterInfo.lowerBndd) <= FLT_EPSILON); - } else { /* range filter */ - assert(*(float *)minval < *(float *)maxval); - return *(float *)minval <= pFilter->filterInfo.lowerBndd && *(float *)maxval >= pFilter->filterInfo.lowerBndd; - } -} - -bool equal_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(double *)minval == *(double *)maxval) { - return (*(double *)minval == pFilter->filterInfo.lowerBndd); - } else { /* range filter */ - assert(*(double *)minval < *(double *)maxval); - - return *(double *)minval <= pFilter->filterInfo.lowerBndi && *(double *)maxval >= pFilter->filterInfo.lowerBndi; - } -} - -bool equal_str(SColumnFilterElem *pFilter, char *minval, char *maxval) { - // query condition string is greater than the max length of string, not qualified data - if (pFilter->filterInfo.len > pFilter->bytes) { - return false; - } - - return strncmp((char *)pFilter->filterInfo.pz, minval, pFilter->bytes) == 0; -} - -bool equal_nchar(SColumnFilterElem *pFilter, char *minval, char *maxval) { - // query condition string is greater than the max length of string, not qualified data - if (pFilter->filterInfo.len > pFilter->bytes) { - return false; - } - - return wcsncmp((wchar_t *)pFilter->filterInfo.pz, (wchar_t*) minval, pFilter->bytes/TSDB_NCHAR_SIZE) == 0; -} - -//////////////////////////////////////////////////////////////// -bool like_str(SColumnFilterElem *pFilter, char *minval, char *maxval) { - SPatternCompareInfo info = PATTERN_COMPARE_INFO_INITIALIZER; - - return patternMatch((char *)pFilter->filterInfo.pz, minval, pFilter->bytes, &info) == TSDB_PATTERN_MATCH; -} - -bool like_nchar(SColumnFilterElem* pFilter, char* minval, char *maxval) { - SPatternCompareInfo info = PATTERN_COMPARE_INFO_INITIALIZER; - - return WCSPatternMatch((wchar_t*) pFilter->filterInfo.pz, (wchar_t*) minval, pFilter->bytes/TSDB_NCHAR_SIZE, &info) == TSDB_PATTERN_MATCH; -} - -//////////////////////////////////////////////////////////////// -/** - * If minval equals to maxval, it may serve as the one element filter, - * or all elements of an array are identical during pref-filter stage. - * Otherwise, it must be pre-filter of array list of elements. - * - * During pre-filter stage, if there is one element that locates in [minval, maxval], - * the filter function will return true. - */ -bool nequal_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(int8_t *)minval == *(int8_t *)maxval) { - return (*(int8_t *)minval != pFilter->filterInfo.lowerBndi); - } - - return true; -} - -bool nequal_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(int16_t *)minval == *(int16_t *)maxval) { - return (*(int16_t *)minval != pFilter->filterInfo.lowerBndi); - } - - return true; -} - -bool nequal_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(int32_t *)minval == *(int32_t *)maxval) { - return (*(int32_t *)minval != pFilter->filterInfo.lowerBndi); - } - - return true; -} - -bool nequal_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(int64_t *)minval == *(int64_t *)maxval) { - return (*(int64_t *)minval != pFilter->filterInfo.lowerBndi); - } - - return true; -} - -bool nequal_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(float *)minval == *(float *)maxval) { - return (*(float *)minval != pFilter->filterInfo.lowerBndd); - } - - return true; -} - -bool nequal_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (*(double *)minval == *(double *)maxval) { - return (*(double *)minval != pFilter->filterInfo.lowerBndd); - } - - return true; -} - -bool nequal_str(SColumnFilterElem *pFilter, char *minval, char *maxval) { - if (pFilter->filterInfo.len > pFilter->bytes) { - return true; - } - - return strncmp((char *)pFilter->filterInfo.pz, minval, pFilter->bytes) != 0; -} - -bool nequal_nchar(SColumnFilterElem *pFilter, char* minval, char *maxval) { - if (pFilter->filterInfo.len > pFilter->bytes) { - return true; - } - - return wcsncmp((wchar_t *)pFilter->filterInfo.pz, (wchar_t*)minval, pFilter->bytes/TSDB_NCHAR_SIZE) != 0; -} - -//////////////////////////////////////////////////////////////// - -bool rangeFilter_i32_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int32_t *)minval <= pFilter->filterInfo.upperBndi && *(int32_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i32_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int32_t *)minvalfilterInfo.upperBndi &&*(int32_t *)maxval> pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i32_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int32_t *)minval < pFilter->filterInfo.upperBndi && *(int32_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i32_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int32_t *)minval <= pFilter->filterInfo.upperBndi && *(int32_t *)maxval > pFilter->filterInfo.lowerBndi); -} - -/////////////////////////////////////////////////////////////////////////////// -bool rangeFilter_i8_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int8_t *)minval <= pFilter->filterInfo.upperBndi && *(int8_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i8_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int8_t *)minvalfilterInfo.upperBndi &&*(int8_t *)maxval> pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i8_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int8_t *)minval < pFilter->filterInfo.upperBndi && *(int8_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i8_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int8_t *)minval <= pFilter->filterInfo.upperBndi && *(int8_t *)maxval > pFilter->filterInfo.lowerBndi); -} - -///////////////////////////////////////////////////////////////////////////////////// -bool rangeFilter_i16_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int16_t *)minval <= pFilter->filterInfo.upperBndi && *(int16_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i16_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int16_t *)minvalfilterInfo.upperBndi &&*(int16_t *)maxval> pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i16_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int16_t *)minval < pFilter->filterInfo.upperBndi && *(int16_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i16_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int16_t *)minval <= pFilter->filterInfo.upperBndi && *(int16_t *)maxval > pFilter->filterInfo.lowerBndi); -} - -//////////////////////////////////////////////////////////////////////// -bool rangeFilter_i64_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int64_t *)minval <= pFilter->filterInfo.upperBndi && *(int64_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i64_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int64_t *)minvalfilterInfo.upperBndi &&*(int64_t *)maxval> pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i64_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int64_t *)minval < pFilter->filterInfo.upperBndi && *(int64_t *)maxval >= pFilter->filterInfo.lowerBndi); -} - -bool rangeFilter_i64_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(int64_t *)minval <= pFilter->filterInfo.upperBndi && *(int64_t *)maxval > pFilter->filterInfo.lowerBndi); -} - -//////////////////////////////////////////////////////////////////////// -bool rangeFilter_ds_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(float *)minval <= pFilter->filterInfo.upperBndd && *(float *)maxval >= pFilter->filterInfo.lowerBndd); -} - -bool rangeFilter_ds_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(float *)minvalfilterInfo.upperBndd &&*(float *)maxval> pFilter->filterInfo.lowerBndd); -} - -bool rangeFilter_ds_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(float *)minval < pFilter->filterInfo.upperBndd && *(float *)maxval >= pFilter->filterInfo.lowerBndd); -} - -bool rangeFilter_ds_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(float *)minval <= pFilter->filterInfo.upperBndd && *(float *)maxval > pFilter->filterInfo.lowerBndd); -} - -////////////////////////////////////////////////////////////////////////// -bool rangeFilter_dd_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(double *)minval <= pFilter->filterInfo.upperBndd && *(double *)maxval >= pFilter->filterInfo.lowerBndd); -} - -bool rangeFilter_dd_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(double *)minvalfilterInfo.upperBndd &&*(double *)maxval> pFilter->filterInfo.lowerBndd); -} - -bool rangeFilter_dd_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(double *)minval < pFilter->filterInfo.upperBndd && *(double *)maxval >= pFilter->filterInfo.lowerBndd); -} - -bool rangeFilter_dd_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) { - return (*(double *)minval <= pFilter->filterInfo.upperBndd && *(double *)maxval > pFilter->filterInfo.lowerBndd); -} - -//////////////////////////////////////////////////////////////////////////// -bool (*filterFunc_i8[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - less_i8, - large_i8, - equal_i8, - lessEqual_i8, - largeEqual_i8, - nequal_i8, - NULL, -}; - -bool (*filterFunc_i16[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - less_i16, - large_i16, - equal_i16, - lessEqual_i16, - largeEqual_i16, - nequal_i16, - NULL, -}; - -bool (*filterFunc_i32[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - less_i32, - large_i32, - equal_i32, - lessEqual_i32, - largeEqual_i32, - nequal_i32, - NULL, -}; - -bool (*filterFunc_i64[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - less_i64, - large_i64, - equal_i64, - lessEqual_i64, - largeEqual_i64, - nequal_i64, - NULL, -}; - -bool (*filterFunc_ds[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - less_ds, - large_ds, - equal_ds, - lessEqual_ds, - largeEqual_ds, - nequal_ds, - NULL, -}; - -bool (*filterFunc_dd[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - less_dd, - large_dd, - equal_dd, - lessEqual_dd, - largeEqual_dd, - nequal_dd, - NULL, -}; - -bool (*filterFunc_str[])(SColumnFilterElem* pFilter, char* minval, char *maxval) = { - NULL, - NULL, - NULL, - equal_str, - NULL, - NULL, - nequal_str, - like_str, -}; - -bool (*filterFunc_nchar[])(SColumnFilterElem* pFitler, char* minval, char* maxval) = { - NULL, - NULL, - NULL, - equal_nchar, - NULL, - NULL, - nequal_nchar, - like_nchar, -}; - -bool (*rangeFilterFunc_i8[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - rangeFilter_i8_ee, - rangeFilter_i8_ie, - rangeFilter_i8_ei, - rangeFilter_i8_ii, -}; - -bool (*rangeFilterFunc_i16[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - rangeFilter_i16_ee, - rangeFilter_i16_ie, - rangeFilter_i16_ei, - rangeFilter_i16_ii, -}; - -bool (*rangeFilterFunc_i32[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - rangeFilter_i32_ee, - rangeFilter_i32_ie, - rangeFilter_i32_ei, - rangeFilter_i32_ii, -}; - -bool (*rangeFilterFunc_i64[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - rangeFilter_i64_ee, - rangeFilter_i64_ie, - rangeFilter_i64_ei, - rangeFilter_i64_ii, -}; - -bool (*rangeFilterFunc_ds[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - rangeFilter_ds_ee, - rangeFilter_ds_ie, - rangeFilter_ds_ei, - rangeFilter_ds_ii, -}; - -bool (*rangeFilterFunc_dd[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = { - NULL, - rangeFilter_dd_ee, - rangeFilter_dd_ie, - rangeFilter_dd_ei, - rangeFilter_dd_ii, -}; - -__filter_func_t* vnodeGetRangeFilterFuncArray(int32_t type) { - switch(type) { - case TSDB_DATA_TYPE_BOOL: return rangeFilterFunc_i8; - case TSDB_DATA_TYPE_TINYINT: return rangeFilterFunc_i8; - case TSDB_DATA_TYPE_SMALLINT: return rangeFilterFunc_i16; - case TSDB_DATA_TYPE_INT: return rangeFilterFunc_i32; - case TSDB_DATA_TYPE_TIMESTAMP: //timestamp uses bigint filter - case TSDB_DATA_TYPE_BIGINT: return rangeFilterFunc_i64; - case TSDB_DATA_TYPE_FLOAT: return rangeFilterFunc_ds; - case TSDB_DATA_TYPE_DOUBLE: return rangeFilterFunc_dd; - default:return NULL; - } -} - -__filter_func_t* vnodeGetValueFilterFuncArray(int32_t type) { - switch(type) { - case TSDB_DATA_TYPE_BOOL: return filterFunc_i8; - case TSDB_DATA_TYPE_TINYINT: return filterFunc_i8; - case TSDB_DATA_TYPE_SMALLINT: return filterFunc_i16; - case TSDB_DATA_TYPE_INT: return filterFunc_i32; - case TSDB_DATA_TYPE_TIMESTAMP: //timestamp uses bigint filter - case TSDB_DATA_TYPE_BIGINT: return filterFunc_i64; - case TSDB_DATA_TYPE_FLOAT: return filterFunc_ds; - case TSDB_DATA_TYPE_DOUBLE: return filterFunc_dd; - case TSDB_DATA_TYPE_BINARY: return filterFunc_str; - case TSDB_DATA_TYPE_NCHAR: return filterFunc_nchar; - default: return NULL; - } -} - -bool vnodeSupportPrefilter(int32_t type) { return type != TSDB_DATA_TYPE_BINARY && type != TSDB_DATA_TYPE_NCHAR; } diff --git a/src/vnode/detail/src/vnodeImport.c b/src/vnode/detail/src/vnodeImport.c deleted file mode 100644 index 9179a89ca7..0000000000 --- a/src/vnode/detail/src/vnodeImport.c +++ /dev/null @@ -1,1573 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "vnode.h" -#include "vnodeUtil.h" -#include "vnodeStatus.h" - -extern void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId); -extern int vnodeReadColumnToMem(int fd, SCompBlock *pBlock, SField **fields, int col, char *data, int dataSize, - char *temp, char *buffer, int bufferSize); -extern int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints); -extern void vnodeGetHeadDataLname(char *headName, char *dataName, char *lastName, int vnode, int fileId); -extern int vnodeCreateEmptyCompFile(int vnode, int fileId); -extern int vnodeUpdateFreeSlot(SVnodeObj *pVnode); -extern SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode); -extern int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode); - -#define KEY_AT_INDEX(payload, step, idx) (*(TSKEY *)((char *)(payload) + (step) * (idx))) -typedef struct { - void * signature; - SShellObj *pShell; - SMeterObj *pObj; - int retry; - TSKEY firstKey; - TSKEY lastKey; - int importedRows; - int commit; // start to commit if it is set to 1 - - int slot; // slot/block to start writing the import data - int pos; // pos to start writing the import data in the slot/block - TSKEY key; - - // only for file - int numOfPoints; - int64_t offset; // offset in data file - char * payload; - char * opayload; // allocated space for payload from client - int rows; -} SImportInfo; - -typedef struct { - // in .head file - SCompHeader *pHeader; - size_t pHeaderSize; - - SCompInfo compInfo; - SCompBlock *pBlocks; - // in .data file - int blockId; - uint8_t blockLoadState; - - SField *pField; - size_t pFieldSize; - - SData *data[TSDB_MAX_COLUMNS]; - char * buffer; - - char *temp; - - char * tempBuffer; - size_t tempBufferSize; - // Variables for sendfile - int64_t compInfoOffset; - int64_t nextNo0Offset; // next sid whose compInfoOffset > 0 - int64_t hfSize; - int64_t driftOffset; - - int oldNumOfBlocks; - int newNumOfBlocks; - int last; -} SImportHandle; - -typedef struct { - int slot; - int pos; - int oslot; // old slot - TSKEY nextKey; -} SBlockIter; - -typedef struct { - int64_t spos; - int64_t epos; - int64_t totalRows; - char * offset[]; -} SMergeBuffer; - -int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport); - -int vnodeFindKeyInCache(SImportInfo *pImport, int order) { - SMeterObj * pObj = pImport->pObj; - int code = 0; - SQuery query; - SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; - - TSKEY key = order ? pImport->firstKey : pImport->lastKey; - memset(&query, 0, sizeof(query)); - query.order.order = order; - query.skey = key; - query.ekey = order ? pImport->lastKey : pImport->firstKey; - vnodeSearchPointInCache(pObj, &query); - - if (query.slot < 0) { - pImport->slot = pInfo->commitSlot; - if (pInfo->commitPoint >= pObj->pointsPerBlock) pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks; - pImport->pos = 0; - pImport->key = 0; - dTrace("vid:%d sid:%d id:%s, key:%" PRId64 ", import to head of cache", pObj->vnode, pObj->sid, pObj->meterId, key); - code = 0; - } else { - pImport->slot = query.slot; - pImport->pos = query.pos; - pImport->key = query.key; - - if (key != query.key) { - if (order == 0) { - // since pos is the position which has smaller key, data shall be imported after it - pImport->pos++; - if (pImport->pos >= pObj->pointsPerBlock) { - pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks; - pImport->pos = 0; - } - } else { - if (pImport->pos < 0) pImport->pos = 0; - } - } - code = 0; - } - - return code; -} - -void vnodeGetValidDataRange(int vnode, TSKEY now, TSKEY *minKey, TSKEY *maxKey) { - SVnodeObj *pVnode = vnodeList + vnode; - - int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision]; - int fid = now / delta; - *minKey = (fid - pVnode->maxFiles + 1) * delta; - *maxKey = (fid + 2) * delta - 1; - return; -} - -int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion, - int *pNumOfPoints, TSKEY now) { - SSubmitMsg *pSubmit = (SSubmitMsg *)cont; - SVnodeObj * pVnode = vnodeList + pObj->vnode; - int rows = 0; - char * payload = NULL; - int code = TSDB_CODE_SUCCESS; - SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); - SShellObj * pShell = (SShellObj *)param; - TSKEY firstKey, lastKey; - - payload = pSubmit->payLoad; - - rows = htons(pSubmit->numOfRows); - assert(rows > 0); - int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); - if (expectedLen != contLen) { - dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId, - expectedLen, contLen); - return TSDB_CODE_WRONG_MSG_SIZE; - } - - // Check timestamp context. - TSKEY minKey = 0, maxKey = 0; - firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); - lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); - assert(firstKey <= lastKey); - vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey); - if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) { - dError( - "vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %" PRId64 " lastKey: %" PRId64 " minAllowedKey:%" PRId64 " " - "maxAllowedKey:%" PRId64, - pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey); - return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; - } - // forward to peers - if (pShell && pVnode->cfg.replications > 1) { - code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_IMPORT, sversion); - if (code != 0) return code; - } - - if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { - if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; - code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion); - if (code != 0) return code; - } - - /* - * The timestamp of all records in a submit payload are always in ascending order, guaranteed by client, so here only - * the first key. - */ - if (firstKey > pObj->lastKey) { // Just call insert - code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, sversion, pNumOfPoints, now); - } else { // trigger import - if (sversion != pObj->sversion) { - dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->sversion, sversion); - return TSDB_CODE_OTHERS; - } - - // check the table status for perform import historical data - if ((code = vnodeSetMeterInsertImportStateEx(pObj, TSDB_METER_STATE_IMPORTING)) != TSDB_CODE_SUCCESS) { - return code; - } - - SImportInfo import = {0}; - - dTrace("vid:%d sid:%d id:%s, try to import %d rows data, firstKey:%" PRId64 ", lastKey:%" PRId64 ", object lastKey:%" PRId64, - pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, pObj->lastKey); - - import.firstKey = firstKey; - import.lastKey = lastKey; - import.pObj = pObj; - import.pShell = pShell; - import.payload = payload; - import.rows = rows; - - // FIXME: mutex here seems meaningless and num here still can be changed - int32_t num = 0; - pthread_mutex_lock(&pVnode->vmutex); - num = pObj->numOfQueries; - pthread_mutex_unlock(&pVnode->vmutex); - - int32_t commitInProcess = 0; - - pthread_mutex_lock(&pPool->vmutex); - if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) { - // mutual exclusion with read (need to change here) - pthread_mutex_unlock(&pPool->vmutex); - vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); - return TSDB_CODE_ACTION_IN_PROGRESS; - - } else { - pPool->commitInProcess = 1; - pthread_mutex_unlock(&pPool->vmutex); - code = vnodeImportData(pObj, &import); - *pNumOfPoints = import.importedRows; - } - pVnode->version++; - vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); - } - - return code; -} - -/* Function to search keys in a range - * - * Assumption: keys in payload are in ascending order - * - * @payload: data records, key in ascending order - * @step: bytes each record takes - * @rows: number of data records - * @skey: range start (included) - * @ekey: range end (included) - * @srows: rtype, start index of records - * @nrows: rtype, number of records in range - * - * @rtype: 0 means find data in the range - * -1 means find no data in the range - */ -static int vnodeSearchKeyInRange(char *payload, int step, int rows, TSKEY skey, TSKEY ekey, int *srow, int *nrows) { - if (rows <= 0 || KEY_AT_INDEX(payload, step, 0) > ekey || KEY_AT_INDEX(payload, step, rows - 1) < skey || skey > ekey) - return -1; - - int left = 0; - int right = rows - 1; - int mid; - - // Binary search the first key in payload >= skey - do { - mid = (left + right) / 2; - if (skey < KEY_AT_INDEX(payload, step, mid)) { - right = mid; - } else if (skey > KEY_AT_INDEX(payload, step, mid)) { - left = mid + 1; - } else { - break; - } - } while (left < right); - - if (skey <= KEY_AT_INDEX(payload, step, mid)) { - *srow = mid; - } else { - if (mid + 1 >= rows) { - return -1; - } else { - *srow = mid + 1; - } - } - - assert(skey <= KEY_AT_INDEX(payload, step, *srow)); - - *nrows = 0; - for (int i = *srow; i < rows; i++) { - if (KEY_AT_INDEX(payload, step, i) <= ekey) { - (*nrows)++; - } else { - break; - } - } - - if (*nrows == 0) return -1; - - return 0; -} - -int vnodeOpenMinFilesForImport(int vnode, int fid) { - char dname[TSDB_FILENAME_LEN] = "\0"; - SVnodeObj * pVnode = vnodeList + vnode; - struct stat filestat; - int minFileSize; - - minFileSize = TSDB_FILE_HEADER_LEN + sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM); - - vnodeGetHeadDataLname(pVnode->cfn, dname, pVnode->lfn, vnode, fid); - - // Open .head file - pVnode->hfd = open(pVnode->cfn, O_RDONLY); - if (pVnode->hfd < 0) { - dError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno)); - taosLogError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno)); - goto _error_open; - } - - fstat(pVnode->hfd, &filestat); - if (filestat.st_size < minFileSize) { - dError("vid:%d, head file:%s is corrupted", vnode, pVnode->cfn); - taosLogError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn); - goto _error_open; - } - - // Open .data file - pVnode->dfd = open(dname, O_RDWR); - if (pVnode->dfd < 0) { - dError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno)); - taosLogError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno)); - goto _error_open; - } - - fstat(pVnode->dfd, &filestat); - if (filestat.st_size < TSDB_FILE_HEADER_LEN) { - dError("vid:%d, data file:%s corrupted", vnode, dname); - taosLogError("vid:%d, data file:%s corrupted", vnode, dname); - goto _error_open; - } - - // Open .last file - pVnode->lfd = open(pVnode->lfn, O_RDWR); - if (pVnode->lfd < 0) { - dError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno)); - taosLogError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno)); - goto _error_open; - } - - fstat(pVnode->lfd, &filestat); - if (filestat.st_size < TSDB_FILE_HEADER_LEN) { - dError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn); - taosLogError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn); - goto _error_open; - } - - return 0; - -_error_open: - if (pVnode->hfd > 0) close(pVnode->hfd); - pVnode->hfd = 0; - - if (pVnode->dfd > 0) close(pVnode->dfd); - pVnode->dfd = 0; - - if (pVnode->lfd > 0) close(pVnode->lfd); - pVnode->lfd = 0; - - return -1; -} - -/* Function to open .t file and sendfile the first part - */ -int vnodeOpenTempFilesForImport(SImportHandle *pHandle, SMeterObj *pObj, int fid) { - char dHeadName[TSDB_FILENAME_LEN] = "\0"; - SVnodeObj * pVnode = vnodeList + pObj->vnode; - struct stat filestat; - int sid; - - // cfn: .head - if (readlink(pVnode->cfn, dHeadName, TSDB_FILENAME_LEN) < 0) return -1; - - size_t len = strlen(dHeadName); - // switch head name - switch (dHeadName[len - 1]) { - case '0': - dHeadName[len - 1] = '1'; - break; - case '1': - dHeadName[len - 1] = '0'; - break; - default: - dError("vid: %d, fid: %d, head target filename not end with 0 or 1", pVnode->vnode, fid); - return -1; - } - - vnodeGetHeadTname(pVnode->nfn, NULL, pVnode->vnode, fid); - if (symlink(dHeadName, pVnode->nfn) < 0) return -1; - - pVnode->nfd = open(pVnode->nfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); - if (pVnode->nfd < 0) { - dError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno)); - taosLogError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno)); - return -1; - } - - fstat(pVnode->hfd, &filestat); - pHandle->hfSize = filestat.st_size; - - // Find the next sid whose compInfoOffset > 0 - for (sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; sid++) { - if (pHandle->pHeader[sid].compInfoOffset > 0) break; - } - - pHandle->nextNo0Offset = (sid == pVnode->cfg.maxSessions) ? pHandle->hfSize : pHandle->pHeader[sid].compInfoOffset; - - // FIXME: sendfile the original part - // TODO: Here, we need to take the deleted table case in consideration, this function - // just assume the case is handled before calling this function - if (pHandle->pHeader[pObj->sid].compInfoOffset > 0) { - pHandle->compInfoOffset = pHandle->pHeader[pObj->sid].compInfoOffset; - } else { - pHandle->compInfoOffset = pHandle->nextNo0Offset; - } - - assert(pHandle->compInfoOffset <= pHandle->hfSize); - - lseek(pVnode->hfd, 0, SEEK_SET); - lseek(pVnode->nfd, 0, SEEK_SET); - if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->compInfoOffset) < 0) { - return -1; - } - - // Leave a SCompInfo space here - lseek(pVnode->nfd, sizeof(SCompInfo), SEEK_CUR); - - return 0; -} - -typedef enum { DATA_LOAD_TIMESTAMP = 0x1, DATA_LOAD_OTHER_DATA = 0x2 } DataLoadMod; - -/* Function to load a block data at the requirement of mod - */ -static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int blockId, uint8_t loadMod, int *code) { - size_t size; - SCompBlock *pBlock = pHandle->pBlocks + blockId; - *code = TSDB_CODE_SUCCESS; - - SVnodeObj *pVnode = vnodeList + pObj->vnode; - - int dfd = pBlock->last ? pVnode->lfd : pVnode->dfd; - - if (pHandle->blockId != blockId) { - pHandle->blockId = blockId; - pHandle->blockLoadState = 0; - } - - if (pHandle->blockLoadState == 0){ // Reload pField - size = sizeof(SField) * pBlock->numOfCols + sizeof(TSCKSUM); - if (pHandle->pFieldSize < size) { - pHandle->pField = (SField *)realloc((void *)(pHandle->pField), size); - if (pHandle->pField == NULL) { - dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, - pObj->meterId, size); - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - return -1; - } - pHandle->pFieldSize = size; - } - - lseek(dfd, pBlock->offset, SEEK_SET); - if (read(dfd, (void *)(pHandle->pField), pHandle->pFieldSize) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to read data file, size:%zu reason:%s", pVnode->vnode, pObj->sid, - pObj->meterId, pHandle->pFieldSize, strerror(errno)); - *code = TSDB_CODE_FILE_CORRUPTED; - return -1; - } - - if (!taosCheckChecksumWhole((uint8_t *)(pHandle->pField), pHandle->pFieldSize)) { - dError("vid:%d sid:%d meterId:%s, data file %s is broken since checksum mismatch", pVnode->vnode, pObj->sid, - pObj->meterId, pVnode->lfn); - *code = TSDB_CODE_FILE_CORRUPTED; - return -1; - } - } - - { // Allocate necessary buffer - size = pObj->bytesPerPoint * pObj->pointsPerFileBlock + - (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns; - if (pHandle->buffer == NULL) { - pHandle->buffer = malloc(size); - if (pHandle->buffer == NULL) { - dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, - pObj->meterId, size); - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - return -1; - } - - // TODO: Init data - pHandle->data[0] = (SData *)(pHandle->buffer); - for (int col = 1; col < pObj->numOfColumns; col++) { - pHandle->data[col] = (SData *)((char *)(pHandle->data[col - 1]) + sizeof(SData) + EXTRA_BYTES + - sizeof(TSCKSUM) + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); - } - } - - if (pHandle->temp == NULL) { - pHandle->temp = malloc(size); - if (pHandle->temp == NULL) { - dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, - pObj->meterId, size); - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - return -1; - } - } - - if (pHandle->tempBuffer == NULL) { - pHandle->tempBufferSize = pObj->maxBytes * pObj->pointsPerFileBlock + EXTRA_BYTES + sizeof(TSCKSUM); - pHandle->tempBuffer = malloc(pHandle->tempBufferSize); - if (pHandle->tempBuffer == NULL) { - dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, - pObj->meterId, pHandle->tempBufferSize); - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - return -1; - } - } - } - - if ((loadMod & DATA_LOAD_TIMESTAMP) && - (~(pHandle->blockLoadState & DATA_LOAD_TIMESTAMP))) { // load only timestamp part - if (vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), PRIMARYKEY_TIMESTAMP_COL_INDEX, - pHandle->data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY) * pBlock->numOfPoints, - pHandle->temp, pHandle->tempBuffer, pHandle->tempBufferSize) < 0) { - *code = TSDB_CODE_FILE_CORRUPTED; - return -1; - } - - pHandle->blockLoadState |= DATA_LOAD_TIMESTAMP; - } - - if ((loadMod & DATA_LOAD_OTHER_DATA) && (~(pHandle->blockLoadState & DATA_LOAD_OTHER_DATA))) { // load other columns - for (int col = 1; col < pBlock->numOfCols; col++) { - if (vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), col, pHandle->data[col]->data, - pBlock->numOfPoints * pObj->schema[col].bytes, pHandle->temp, pHandle->tempBuffer, - pHandle->tempBufferSize) < 0) { - *code = TSDB_CODE_FILE_CORRUPTED; - return -1; - } - } - - pHandle->blockLoadState |= DATA_LOAD_OTHER_DATA; - } - - return 0; -} - -static int vnodeCloseImportFiles(SMeterObj *pObj, SImportHandle *pHandle) { - SVnodeObj *pVnode = vnodeList + pObj->vnode; - char dpath[TSDB_FILENAME_LEN] = "\0"; - SCompInfo compInfo; - -#ifdef _ALPINE - off_t offset = 0; -#else - __off_t offset = 0; -#endif - - if (pVnode->nfd > 0) { - offset = lseek(pVnode->nfd, 0, SEEK_CUR); - assert(offset == pHandle->nextNo0Offset + pHandle->driftOffset); - - { // Write the SCompInfo part - compInfo.uid = pObj->uid; - compInfo.last = pHandle->last; - compInfo.numOfBlocks = pHandle->newNumOfBlocks + pHandle->oldNumOfBlocks; - compInfo.delimiter = TSDB_VNODE_DELIMITER; - taosCalcChecksumAppend(0, (uint8_t *)(&compInfo), sizeof(SCompInfo)); - - lseek(pVnode->nfd, pHandle->compInfoOffset, SEEK_SET); - if (twrite(pVnode->nfd, (void *)(&compInfo), sizeof(SCompInfo)) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to wirte SCompInfo, reason:%s", pObj->vnode, pObj->sid, pObj->meterId, - strerror(errno)); - return -1; - } - } - - // Write the rest of the SCompBlock part - if (pHandle->hfSize > pHandle->nextNo0Offset) { - lseek(pVnode->nfd, 0, SEEK_END); - lseek(pVnode->hfd, pHandle->nextNo0Offset, SEEK_SET); - if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->hfSize - pHandle->nextNo0Offset) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to sendfile, size:%" PRId64 ", reason:%s", pObj->vnode, pObj->sid, - pObj->meterId, pHandle->hfSize - pHandle->nextNo0Offset, strerror(errno)); - return -1; - } - } - - // Write SCompHeader part - pHandle->pHeader[pObj->sid].compInfoOffset = pHandle->compInfoOffset; - for (int sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; ++sid) { - if (pHandle->pHeader[sid].compInfoOffset > 0) { - pHandle->pHeader[sid].compInfoOffset += pHandle->driftOffset; - } - } - - taosCalcChecksumAppend(0, (uint8_t *)(pHandle->pHeader), pHandle->pHeaderSize); - lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - if (twrite(pVnode->nfd, (void *)(pHandle->pHeader), pHandle->pHeaderSize) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to wirte SCompHeader part, size:%zu, reason:%s", pObj->vnode, pObj->sid, - pObj->meterId, pHandle->pHeaderSize, strerror(errno)); - return -1; - } - } - - // Close opened files - close(pVnode->dfd); - pVnode->dfd = 0; - - close(pVnode->hfd); - pVnode->hfd = 0; - - close(pVnode->lfd); - pVnode->lfd = 0; - - if (pVnode->nfd > 0) { - close(pVnode->nfd); - pVnode->nfd = 0; - - readlink(pVnode->cfn, dpath, TSDB_FILENAME_LEN); - rename(pVnode->nfn, pVnode->cfn); - remove(dpath); - } - - return 0; -} - -static void vnodeConvertRowsToCols(SMeterObj *pObj, const char *payload, int rows, SData *data[], int rowOffset) { - int sdataRow; - int offset; - - for (int row = 0; row < rows; ++row) { - sdataRow = row + rowOffset; - offset = 0; - for (int col = 0; col < pObj->numOfColumns; ++col) { - memcpy(data[col]->data + sdataRow * pObj->schema[col].bytes, payload + pObj->bytesPerPoint * row + offset, - pObj->schema[col].bytes); - - offset += pObj->schema[col].bytes; - } - } -} - -static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int rows, int fid) { - SMeterObj * pObj = (SMeterObj *)(pImport->pObj); - SVnodeObj * pVnode = vnodeList + pObj->vnode; - SImportHandle importHandle; - size_t size = 0; - SData * data[TSDB_MAX_COLUMNS]; - char * buffer = NULL; - SData * cdata[TSDB_MAX_COLUMNS]; - char * cbuffer = NULL; - SCompBlock compBlock; - TSCKSUM checksum = 0; - int pointsImported = 0; - int code = TSDB_CODE_SUCCESS; - SCachePool * pPool = (SCachePool *)pVnode->pCachePool; - SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache); - TSKEY lastKeyImported = 0; - - TSKEY delta = pVnode->cfg.daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision]; - TSKEY minFileKey = fid * delta; - TSKEY maxFileKey = minFileKey + delta - 1; - TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); - TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); - - assert(firstKey >= minFileKey && firstKey <= maxFileKey && lastKey >= minFileKey && lastKey <= maxFileKey); - - // create neccessary files - pVnode->commitFirstKey = firstKey; - if (vnodeCreateNeccessaryFiles(pVnode) < 0) return TSDB_CODE_OTHERS; - - assert(pVnode->commitFileId == fid); - - // Open least files to import .head(hfd) .data(dfd) .last(lfd) - if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return TSDB_CODE_FILE_CORRUPTED; - - memset(&importHandle, 0, sizeof(SImportHandle)); - - { // Load SCompHeader part from .head file - importHandle.pHeaderSize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM); - importHandle.pHeader = (SCompHeader *)malloc(importHandle.pHeaderSize); - if (importHandle.pHeader == NULL) { - dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, - pObj->meterId, importHandle.pHeaderSize); - code = TSDB_CODE_SERV_OUT_OF_MEMORY; - goto _error_merge; - } - - lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - if (read(pVnode->hfd, (void *)(importHandle.pHeader), importHandle.pHeaderSize) < importHandle.pHeaderSize) { - dError("vid: %d, sid: %d, meterId: %s, fid: %d failed to read SCompHeader part, reason:%s", pObj->vnode, - pObj->sid, pObj->meterId, fid, strerror(errno)); - code = TSDB_CODE_FILE_CORRUPTED; - goto _error_merge; - } - - if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pHeader), importHandle.pHeaderSize)) { - dError("vid: %d, sid: %d, meterId: %s, fid: %d SCompHeader part is broken", pObj->vnode, pObj->sid, pObj->meterId, - fid); - code = TSDB_CODE_FILE_CORRUPTED; - goto _error_merge; - } - } - - { // Initialize data[] and cdata[], which is used to hold data to write to data file - size = pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns; - - buffer = (char *)malloc(size); - if (buffer == NULL) { - dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, - pObj->meterId, size); - code = TSDB_CODE_SERV_OUT_OF_MEMORY; - goto _error_merge; - } - - cbuffer = (char *)malloc(size); - if (cbuffer == NULL) { - dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, - pObj->meterId, size); - code = TSDB_CODE_SERV_OUT_OF_MEMORY; - goto _error_merge; - } - - data[0] = (SData *)buffer; - cdata[0] = (SData *)cbuffer; - - for (int col = 1; col < pObj->numOfColumns; col++) { - data[col] = (SData *)((char *)data[col - 1] + sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM) + - pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); - cdata[col] = (SData *)((char *)cdata[col - 1] + sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM) + - pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); - } - } - - if (importHandle.pHeader[pObj->sid].compInfoOffset == 0) { // No data in this file, just write it - _write_empty_point: - if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) { - code = TSDB_CODE_OTHERS; - goto _error_merge; - } - importHandle.oldNumOfBlocks = 0; - importHandle.driftOffset += sizeof(SCompInfo); - lastKeyImported = lastKey; - - for (int rowsWritten = 0; rowsWritten < rows;) { - int rowsToWrite = MIN(pVnode->cfg.rowsInFileBlock, (rows - rowsWritten) /* the rows left */); - vnodeConvertRowsToCols(pObj, payload + rowsWritten * pObj->bytesPerPoint, rowsToWrite, data, 0); - pointsImported += rowsToWrite; - - compBlock.last = 1; - if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite) < 0) { - // TODO: deal with ERROR here - } - - importHandle.last = compBlock.last; - - checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); - twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)); - importHandle.newNumOfBlocks++; - importHandle.driftOffset += sizeof(SCompBlock); - - rowsWritten += rowsToWrite; - } - twrite(pVnode->nfd, &checksum, sizeof(TSCKSUM)); - importHandle.driftOffset += sizeof(TSCKSUM); - } else { // Else if there are old data in this file. - { // load SCompInfo and SCompBlock part - lseek(pVnode->hfd, importHandle.pHeader[pObj->sid].compInfoOffset, SEEK_SET); - if (read(pVnode->hfd, (void *)(&(importHandle.compInfo)), sizeof(SCompInfo)) < sizeof(SCompInfo)) { - dError("vid:%d sid:%d meterId:%s, failed to read .head file, reason:%s", pVnode->vnode, pObj->sid, - pObj->meterId, strerror(errno)); - code = TSDB_CODE_FILE_CORRUPTED; - goto _error_merge; - } - - if ((importHandle.compInfo.delimiter != TSDB_VNODE_DELIMITER) || - (!taosCheckChecksumWhole((uint8_t *)(&(importHandle.compInfo)), sizeof(SCompInfo)))) { - dError("vid:%d sid:%d meterId:%s, .head file %s is broken, delemeter:%x", pVnode->vnode, pObj->sid, - pObj->meterId, pVnode->cfn, importHandle.compInfo.delimiter); - code = TSDB_CODE_FILE_CORRUPTED; - goto _error_merge; - } - - // Check the context of SCompInfo part - if (importHandle.compInfo.uid != pObj->uid) { // The data belongs to the other meter - goto _write_empty_point; - } - - importHandle.oldNumOfBlocks = importHandle.compInfo.numOfBlocks; - importHandle.last = importHandle.compInfo.last; - - size = sizeof(SCompBlock) * importHandle.compInfo.numOfBlocks + sizeof(TSCKSUM); - importHandle.pBlocks = (SCompBlock *)malloc(size); - if (importHandle.pBlocks == NULL) { - dError("vid:%d sid:%d meterId:%s, failed to allocate importHandle.pBlock, size:%ul", pVnode->vnode, pObj->sid, - pObj->meterId, size); - code = TSDB_CODE_SERV_OUT_OF_MEMORY; - goto _error_merge; - } - - if (read(pVnode->hfd, (void *)(importHandle.pBlocks), size) < size) { - dError("vid:%d sid:%d meterId:%s, failed to read importHandle.pBlock, reason:%s", pVnode->vnode, pObj->sid, - pObj->meterId, strerror(errno)); - code = TSDB_CODE_FILE_CORRUPTED; - goto _error_merge; - } - - if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pBlocks), size)) { - dError("vid:%d sid:%d meterId:%s, pBlock part is broken in %s", pVnode->vnode, pObj->sid, pObj->meterId, - pVnode->cfn); - code = TSDB_CODE_FILE_CORRUPTED; - goto _error_merge; - } - } - - /* Now we have _payload_, we have _importHandle.pBlocks_, just merge payload into the importHandle.pBlocks - * - * Input: payload, pObj->bytesPerBlock, rows, importHandle.pBlocks - */ - { - int payloadIter = 0; - SBlockIter blockIter = {0, 0, 0, 0}; - - while (1) { - if (payloadIter >= rows) { // payload end, break - // write the remaining blocks to the file - if (pVnode->nfd > 0) { - int blocksLeft = importHandle.compInfo.numOfBlocks - blockIter.oslot; - if (blocksLeft > 0) { - checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot), - sizeof(SCompBlock) * blocksLeft); - if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot), - sizeof(SCompBlock) * blocksLeft) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, - pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * blocksLeft, strerror(errno)); - code = TSDB_CODE_OTHERS; - goto _error_merge; - } - } - - if (twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM)) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, - pObj->meterId, pVnode->nfn, sizeof(TSCKSUM), strerror(errno)); - code = TSDB_CODE_OTHERS; - goto _error_merge; - } - } - break; - } - - if (blockIter.slot >= importHandle.compInfo.numOfBlocks) { // blocks end, break - // Should never come here - assert(false); - } - - TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); - - { // Binary search the (slot, pos) which is >= key as well as nextKey - int left = blockIter.slot; - int right = importHandle.compInfo.numOfBlocks - 1; - TSKEY minKey = importHandle.pBlocks[left].keyFirst; - TSKEY maxKey = importHandle.pBlocks[right].keyLast; - - assert(minKey <= maxKey); - - if (key < minKey) { // Case 1. write just ahead the blockIter.slot - blockIter.slot = left; - blockIter.pos = 0; - blockIter.nextKey = minKey; - } else if (key > maxKey) { // Case 2. write to the end - if (importHandle.pBlocks[right].last) { // Case 2.1 last block in .last file, need to merge - assert(importHandle.last != 0); - importHandle.last = 0; - blockIter.slot = right; - blockIter.pos = importHandle.pBlocks[right].numOfPoints; - } else { // Case 2.2 just write after the last block - blockIter.slot = right + 1; - blockIter.pos = 0; - } - blockIter.nextKey = maxFileKey + 1; - } else { // Case 3. need to search the block for slot and pos - if (key == minKey || key == maxKey) { - if (tsAffectedRowsMod) pointsImported++; - payloadIter++; - continue; - } - - // Here: minKey < key < maxKey - - int mid; - TSKEY blockMinKey; - TSKEY blockMaxKey; - - // Binary search the slot - do { - mid = (left + right) / 2; - blockMinKey = importHandle.pBlocks[mid].keyFirst; - blockMaxKey = importHandle.pBlocks[mid].keyLast; - - assert(blockMinKey <= blockMaxKey); - - if (key < blockMinKey) { - right = mid; - } else if (key > blockMaxKey) { - left = mid + 1; - } else { /* blockMinKey <= key <= blockMaxKey */ - break; - } - } while (left < right); - - if (key == blockMinKey || key == blockMaxKey) { // duplicate key - if (tsAffectedRowsMod) pointsImported++; - payloadIter++; - continue; - } - - // Get the slot - if (key > blockMaxKey) { /* pos = 0 or pos = ? */ - blockIter.slot = mid + 1; - } else { /* key < blockMinKey (pos = 0) || (key > blockMinKey && key < blockMaxKey) (pos=?) */ - blockIter.slot = mid; - } - - // Get the pos - assert(blockIter.slot < importHandle.compInfo.numOfBlocks); - - if (key == importHandle.pBlocks[blockIter.slot].keyFirst || - key == importHandle.pBlocks[blockIter.slot].keyLast) { - if (tsAffectedRowsMod) pointsImported++; - payloadIter++; - continue; - } - - assert(key < importHandle.pBlocks[blockIter.slot].keyLast); - - /* */ - if (key < importHandle.pBlocks[blockIter.slot].keyFirst) { - blockIter.pos = 0; - blockIter.nextKey = importHandle.pBlocks[blockIter.slot].keyFirst; - } else { - SCompBlock *pBlock = importHandle.pBlocks + blockIter.slot; - if (pBlock->sversion != pObj->sversion) { /*TODO*/ - } - if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, DATA_LOAD_TIMESTAMP, &code) < 0) { - goto _error_merge; - } - int pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])( - importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, pBlock->numOfPoints, key, TSQL_SO_ASC); - assert(pos != 0); - if (KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), pos) == key) { - if (tsAffectedRowsMod) pointsImported++; - payloadIter++; - continue; - } - - blockIter.pos = pos; - blockIter.nextKey = (blockIter.slot + 1 < importHandle.compInfo.numOfBlocks) - ? importHandle.pBlocks[blockIter.slot + 1].keyFirst - : maxFileKey + 1; - // Need to merge with this block - if (importHandle.pBlocks[blockIter.slot].last) { // this is to merge with the last block - assert((blockIter.slot == (importHandle.compInfo.numOfBlocks - 1))); - importHandle.last = 0; - } - } - } - } - - int aslot = MIN(blockIter.slot, importHandle.compInfo.numOfBlocks - 1); - int64_t sversion = importHandle.pBlocks[aslot].sversion; - if (sversion != pObj->sversion) { - code = TSDB_CODE_OTHERS; - goto _error_merge; - } - - // Open the new .t file if not opened yet. - if (pVnode->nfd <= 0) { - if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) { - code = TSDB_CODE_OTHERS; - goto _error_merge; - } - } - - if (blockIter.slot > blockIter.oslot) { // write blocks in range [blockIter.oslot, blockIter.slot) to .t file - checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot), - sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot)); - if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot), - sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot)) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, - pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot), - strerror(errno)); - code = TSDB_CODE_OTHERS; - goto _error_merge; - } - - blockIter.oslot = blockIter.slot; - } - - if (blockIter.pos == 0) { // No need to merge - // copy payload part to data - int rowOffset = 0; - for (; payloadIter < rows; rowOffset++) { - if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) break; - - vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); - pointsImported++; - lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); - payloadIter++; - } - - // write directly to .data file - compBlock.last = 0; - if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) { - // TODO: Deal with the ERROR here - } - - checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); - if (twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)) < 0) { - // TODO : deal with the ERROR here - } - importHandle.newNumOfBlocks++; - importHandle.driftOffset += sizeof(SCompBlock); - } else { // Merge block and payload from payloadIter - - if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, - DATA_LOAD_TIMESTAMP | DATA_LOAD_OTHER_DATA, &code) < 0) { // Load neccessary blocks - goto _error_merge; - } - - importHandle.oldNumOfBlocks--; - importHandle.driftOffset -= sizeof(SCompBlock); - - int rowOffset = blockIter.pos; // counter for data - - // Copy the front part - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy((void *)(data[col]->data), (void *)(importHandle.data[col]->data), - pObj->schema[col].bytes * blockIter.pos); - } - - // Merge part - while (1) { - if (rowOffset >= pVnode->cfg.rowsInFileBlock) { // data full in a block to commit - compBlock.last = 0; - if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) { - // TODO : deal with the ERROR here - } - - checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); - if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, - pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno)); - goto _error_merge; - } - importHandle.newNumOfBlocks++; - importHandle.driftOffset += sizeof(SCompBlock); - rowOffset = 0; - } - - if ((payloadIter >= rows || KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) && - blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints) - break; - - if (payloadIter >= rows || - KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) { // payload end - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes, - importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos, pObj->schema[col].bytes); - } - blockIter.pos++; - rowOffset++; - } else if (blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints) { // block end - vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); - pointsImported++; - lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); - payloadIter++; - rowOffset++; - } else { - if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) == - KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), - blockIter.pos)) { // duplicate key - if (tsAffectedRowsMod) pointsImported++; - payloadIter++; - continue; - } else if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < - KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), - blockIter.pos)) { - vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); - pointsImported++; - lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); - payloadIter++; - rowOffset++; - } else { - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes, - importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos, - pObj->schema[col].bytes); - } - blockIter.pos++; - rowOffset++; - } - } - } - if (rowOffset > 0) { // data full in a block to commit - compBlock.last = 0; - if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) { - // TODO : deal with the ERROR here - } - - checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); - if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) { - dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, - pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno)); - goto _error_merge; - } - importHandle.newNumOfBlocks++; - importHandle.driftOffset += sizeof(SCompBlock); - rowOffset = 0; - } - - blockIter.slot++; - blockIter.oslot = blockIter.slot; - } - } - } - } - - // Write the SCompInfo part - if (vnodeCloseImportFiles(pObj, &importHandle) < 0) { - code = TSDB_CODE_OTHERS; - goto _error_merge; - } - - pImport->importedRows += pointsImported; - - pthread_mutex_lock(&(pPool->vmutex)); - if (pInfo->numOfBlocks > 0) { - int slot = (pInfo->currentSlot - pInfo->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - TSKEY firstKeyInCache = *((TSKEY *)(pInfo->cacheBlocks[slot]->offset[0])); - - // data may be in commited cache, cache shall be released - if (lastKeyImported > firstKeyInCache) { - while (slot != pInfo->commitSlot) { - SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; - vnodeFreeCacheBlock(pCacheBlock); - slot = (slot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - } - - if (pInfo->commitPoint == pObj->pointsPerBlock) { - if (pInfo->cacheBlocks[pInfo->commitSlot]->pMeterObj == pObj) { - vnodeFreeCacheBlock(pInfo->cacheBlocks[pInfo->commitSlot]); - } - } - } - } - pthread_mutex_unlock(&(pPool->vmutex)); - - // TODO: free the allocated memory - tfree(buffer); - tfree(cbuffer); - tfree(importHandle.pHeader); - tfree(importHandle.pBlocks); - tfree(importHandle.pField); - tfree(importHandle.buffer); - tfree(importHandle.temp); - tfree(importHandle.tempBuffer); - - return code; - -_error_merge: - tfree(buffer); - tfree(cbuffer); - tfree(importHandle.pHeader); - tfree(importHandle.pBlocks); - tfree(importHandle.pField); - tfree(importHandle.buffer); - tfree(importHandle.temp); - tfree(importHandle.tempBuffer); - - close(pVnode->dfd); - pVnode->dfd = 0; - - close(pVnode->hfd); - pVnode->hfd = 0; - - close(pVnode->lfd); - pVnode->lfd = 0; - - if (pVnode->nfd > 0) { - close(pVnode->nfd); - pVnode->nfd = 0; - remove(pVnode->nfn); - } - - return code; -} - -#define FORWARD_ITER(iter, step, slotLimit, posLimit) \ - { \ - if ((iter.pos) + (step) < (posLimit)) { \ - (iter.pos) = (iter.pos) + (step); \ - } else { \ - (iter.pos) = 0; \ - (iter.slot) = ((iter.slot) + 1) % (slotLimit); \ - } \ - } - -int isCacheEnd(SBlockIter iter, SMeterObj *pTable) { - SCacheInfo *pInfo = (SCacheInfo *)(pTable->pCache); - int slot = 0; - int pos = 0; - - if (pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints == pTable->pointsPerBlock) { - slot = (pInfo->currentSlot + 1) % (pInfo->maxBlocks); - pos = 0; - } else { - slot = pInfo->currentSlot; - pos = pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints; - } - return ((iter.slot == slot) && (iter.pos == pos)); -} - -static void vnodeFlushMergeBuffer(SMergeBuffer *pBuffer, SBlockIter *pWriteIter, SBlockIter *pCacheIter, - SMeterObj *pObj, SCacheInfo *pInfo, int checkBound) { - // Function to flush the merge buffer data to cache - if (pWriteIter->pos == pObj->pointsPerBlock) { - pWriteIter->pos = 0; - pWriteIter->slot = (pWriteIter->slot + 1) % pInfo->maxBlocks; - } - - while (pBuffer->spos != pBuffer->epos) { - if (checkBound && pWriteIter->slot == pCacheIter->slot && pWriteIter->pos == pCacheIter->pos) break; - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy(pInfo->cacheBlocks[pWriteIter->slot]->offset[col] + pObj->schema[col].bytes * pWriteIter->pos, - pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes); - } - - if (pWriteIter->pos + 1 < pObj->pointsPerBlock) { - (pWriteIter->pos)++; - } else { - pInfo->cacheBlocks[pWriteIter->slot]->numOfPoints = pWriteIter->pos + 1; - pWriteIter->slot = (pWriteIter->slot + 1) % pInfo->maxBlocks; - pWriteIter->pos = 0; - } - - pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows; - } - - if ((!checkBound) && pWriteIter->pos != 0) { - pInfo->cacheBlocks[pWriteIter->slot]->numOfPoints = pWriteIter->pos; - } -} - -int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int rows) { - SMeterObj * pObj = pImport->pObj; - SVnodeObj * pVnode = vnodeList + pObj->vnode; - int code = -1; - SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache); - int payloadIter; - SCachePool * pPool = (SCachePool *)(pVnode->pCachePool); - int isCacheIterEnd = 0; - int spayloadIter = 0; - int isAppendData = 0; - int rowsImported = 0; - int totalRows = 0; - size_t size = 0; - SMergeBuffer *pBuffer = NULL; - - TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); - TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); - - assert(firstKey <= lastKey && firstKey > pObj->lastKeyOnFile); - - // TODO: make this condition less strict - if (pObj->freePoints < rows || pObj->freePoints < (pObj->pointsPerBlock << 1)) { // No free room to hold the data - dError("vid:%d sid:%d id:%s, import failed, cache is full, freePoints:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->freePoints); - pImport->importedRows = 0; - pImport->commit = 1; - code = TSDB_CODE_ACTION_IN_PROGRESS; - return code; - } - - if (pInfo->numOfBlocks == 0) { - if (vnodeAllocateCacheBlock(pObj) < 0) { - pImport->importedRows = 0; - pImport->commit = 1; - code = TSDB_CODE_ACTION_IN_PROGRESS; - return code; - } - } - - // Find the first importable record from payload - pImport->lastKey = lastKey; - for (payloadIter = 0; payloadIter < rows; payloadIter++) { - TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); - if (key == pObj->lastKey) { - if (tsAffectedRowsMod) rowsImported++; - continue; - } - if (key > pObj->lastKey) { // Just as insert - pImport->slot = pInfo->currentSlot; - pImport->pos = pInfo->cacheBlocks[pImport->slot]->numOfPoints; - isCacheIterEnd = 1; - break; - } else { - pImport->firstKey = key; - if (vnodeFindKeyInCache(pImport, 1) < 0) { - goto _exit; - } - - if (pImport->firstKey != pImport->key) break; - if (tsAffectedRowsMod) rowsImported++; - } - } - - if (payloadIter == rows) { - pImport->importedRows += rowsImported; - code = 0; - goto _exit; - } - - spayloadIter = payloadIter; - if (pImport->pos == pObj->pointsPerBlock) assert(isCacheIterEnd); - - // Allocate a new merge buffer work as buffer - totalRows = pObj->pointsPerBlock + rows - payloadIter + 1; - size = sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns + pObj->bytesPerPoint * totalRows; - pBuffer = (SMergeBuffer *)malloc(size); - if (pBuffer == NULL) { - dError("vid:%d sid:%d meterId:%s, failed to allocate memory, size:%d", pObj->vnode, pObj->sid, pObj->meterId, size); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - pBuffer->spos = 0; - pBuffer->epos = 0; - pBuffer->totalRows = totalRows; - pBuffer->offset[0] = (char *)pBuffer + sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns; - for (int col = 1; col < pObj->numOfColumns; col++) { - pBuffer->offset[col] = pBuffer->offset[col - 1] + pObj->schema[col - 1].bytes * totalRows; - } - - // TODO: take pImport->pos = pObj->pointsPerBlock into consideration - { // Do the merge staff - SBlockIter cacheIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to traverse old cache data - SBlockIter writeIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to write data to cache - int availPoints = pObj->pointsPerBlock - pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints; - - assert(availPoints >= 0); - - while (1) { - if ((payloadIter >= rows) && isCacheIterEnd) break; - - if ((pBuffer->epos + 1) % pBuffer->totalRows == pBuffer->spos) { // merge buffer is full, flush - vnodeFlushMergeBuffer(pBuffer, &writeIter, &cacheIter, pObj, pInfo, 1); - } - - TSKEY payloadKey = (payloadIter < rows) ? KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) : INT64_MAX; - TSKEY cacheKey = (isCacheIterEnd) ? INT64_MAX : KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), cacheIter.pos); - - if (cacheKey < payloadKey) { // if (payload end || (cacheIter not end && payloadKey > blockKey)), consume cache - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos, - pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos, - pObj->schema[col].bytes); - } - FORWARD_ITER(cacheIter, 1, pInfo->maxBlocks, pObj->pointsPerBlock); - isCacheIterEnd = isCacheEnd(cacheIter, pObj); - } else if (cacheKey > payloadKey) { // cacheIter end || (payloadIter not end && payloadKey < blockKey), consume payload - if (availPoints == 0) { // Need to allocate a new cache block - pthread_mutex_lock(&(pPool->vmutex)); - // TODO: Need to check if there are enough slots to hold a new one - SCacheBlock *pNewBlock = vnodeGetFreeCacheBlock(pVnode); - if (pNewBlock == NULL) { // Failed to allocate a new cache block, need to commit and loop over the remaining cache records - pthread_mutex_unlock(&(pPool->vmutex)); - payloadIter = rows; - code = TSDB_CODE_ACTION_IN_PROGRESS; - pImport->commit = 1; - continue; - } - - assert(pInfo->numOfBlocks <= pInfo->maxBlocks); - if (pInfo->numOfBlocks == pInfo->maxBlocks) { - vnodeFreeCacheBlock(pInfo->cacheBlocks[(pInfo->currentSlot + 1) % pInfo->maxBlocks]); - } - - pNewBlock->pMeterObj = pObj; - pNewBlock->offset[0] = (char *)pNewBlock + sizeof(SCacheBlock) + sizeof(char *) * pObj->numOfColumns; - for (int col = 1; col < pObj->numOfColumns; col++) - pNewBlock->offset[col] = pNewBlock->offset[col - 1] + pObj->schema[col - 1].bytes * pObj->pointsPerBlock; - - int newSlot = (writeIter.slot + 1) % pInfo->maxBlocks; - pInfo->blocks++; - int tblockId = pInfo->blocks; - - if (writeIter.slot != pInfo->currentSlot) { - for (int tslot = pInfo->currentSlot; tslot != writeIter.slot;) { - int nextSlot = (tslot + 1) % pInfo->maxBlocks; - pInfo->cacheBlocks[nextSlot] = pInfo->cacheBlocks[tslot]; - pInfo->cacheBlocks[nextSlot]->slot = nextSlot; - pInfo->cacheBlocks[nextSlot]->blockId = tblockId--; - tslot = (tslot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - } - } - - int index = pNewBlock->index; - if (cacheIter.slot == writeIter.slot) { - pNewBlock->numOfPoints = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints; - int pointsLeft = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints - cacheIter.pos; - if (pointsLeft > 0) { - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy((void *)(pNewBlock->offset[col] + pObj->schema[col].bytes*cacheIter.pos), - pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos, - pObj->schema[col].bytes * pointsLeft); - } - } - } - pNewBlock->blockId = tblockId; - pNewBlock->slot = newSlot; - pNewBlock->index = index; - pInfo->cacheBlocks[newSlot] = pNewBlock; - pInfo->numOfBlocks++; - pInfo->unCommittedBlocks++; - pInfo->currentSlot = (pInfo->currentSlot + 1) % pInfo->maxBlocks; - pthread_mutex_unlock(&(pPool->vmutex)); - cacheIter.slot = (cacheIter.slot + 1) % pInfo->maxBlocks; - // move a cache of data forward - availPoints = pObj->pointsPerBlock; - } - - int offset = 0; - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos, - payload + pObj->bytesPerPoint * payloadIter + offset, pObj->schema[col].bytes); - offset += pObj->schema[col].bytes; - } - if (spayloadIter == payloadIter) {// update pVnode->firstKey - pthread_mutex_lock(&(pVnode->vmutex)); - if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < pVnode->firstKey) pVnode->firstKey = firstKey; - pthread_mutex_unlock(&(pVnode->vmutex)); - } - if (isCacheIterEnd) { - pObj->lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); - if (!isAppendData) isAppendData = 1; - } - - rowsImported++; - availPoints--; - payloadIter++; - - } else { - if (tsAffectedRowsMod) rowsImported++; - payloadIter++; - continue; - } - pBuffer->epos = (pBuffer->epos + 1) % pBuffer->totalRows; - } - - if (pBuffer->spos != pBuffer->epos) { // Flush the remaining data in the merge buffer - vnodeFlushMergeBuffer(pBuffer, &writeIter, &cacheIter, pObj, pInfo, 0); - } else { - // Should never come here - assert(false); - } - - if (isAppendData) { - pthread_mutex_lock(&(pVnode->vmutex)); - if (pObj->lastKey > pVnode->lastKey) pVnode->lastKey = pObj->lastKey; - pthread_mutex_unlock(&(pVnode->vmutex)); - } - } - pImport->importedRows += rowsImported; - atomic_fetch_sub_32(&(pObj->freePoints), rowsImported); - - code = TSDB_CODE_SUCCESS; - -_exit: - tfree(pBuffer); - return code; -} - -int vnodeImportDataToFiles(SImportInfo *pImport, char *payload, const int rows) { - int code = 0; - // TODO : Check the correctness of pObj and pVnode - SMeterObj *pObj = (SMeterObj *)(pImport->pObj); - SVnodeObj *pVnode = vnodeList + pObj->vnode; - - int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision]; - int sfid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0) / delta; - int efid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1) / delta; - - for (int fid = sfid; fid <= efid; fid++) { - TSKEY skey = fid * delta; - TSKEY ekey = skey + delta - 1; - int srow = 0, nrows = 0; - - if (vnodeSearchKeyInRange(payload, pObj->bytesPerPoint, rows, skey, ekey, &srow, &nrows) < 0) continue; - - assert(nrows > 0); - - dTrace("vid:%d sid:%d meterId:%s, %d rows of data will be imported to file %d, srow:%d firstKey:%" PRId64 " lastKey:%" PRId64, - pObj->vnode, pObj->sid, pObj->meterId, nrows, fid, srow, KEY_AT_INDEX(payload, pObj->bytesPerPoint, srow), - KEY_AT_INDEX(payload, pObj->bytesPerPoint, (srow + nrows - 1))); - - code = vnodeMergeDataIntoFile(pImport, payload + (srow * pObj->bytesPerPoint), nrows, fid); - if (code != TSDB_CODE_SUCCESS) break; - } - - return code; -} - -// TODO : add offset in pShell to make it avoid repeatedly deal with messages -int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) { - int code = 0; - int srow = 0, nrows = 0; - SVnodeObj * pVnode = vnodeList + pObj->vnode; - SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); - - // 1. import data in range (pObj->lastKeyOnFile, INT64_MAX) into cache - if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, pObj->lastKeyOnFile + 1, INT64_MAX, - &srow, &nrows) >= 0) { - assert(nrows > 0); - code = vnodeImportDataToCache(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows); - if (pImport->commit) { // Need to commit now - pPool->commitInProcess = 0; - vnodeProcessCommitTimer(pVnode, NULL); - return code; - } - - if (code != TSDB_CODE_SUCCESS) return code; - } - - // 2. import data (0, pObj->lastKeyOnFile) into files - if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, 0, pObj->lastKeyOnFile - 1, &srow, - &nrows) >= 0) { - assert(nrows > 0); - code = vnodeImportDataToFiles(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows); - } - - pPool->commitInProcess = 0; - - return code; -} diff --git a/src/vnode/detail/src/vnodeMeter.c b/src/vnode/detail/src/vnodeMeter.c deleted file mode 100644 index e03a40995c..0000000000 --- a/src/vnode/detail/src/vnodeMeter.c +++ /dev/null @@ -1,825 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "trpc.h" -#include "tschemautil.h" -#include "ttime.h" -#include "tutil.h" -#include "vnode.h" -#include "vnodeShell.h" -#include "vnodeUtil.h" -#include "vnodeStatus.h" - -#define VALID_TIMESTAMP(key, curKey, prec) (((key) >= 0) && ((key) <= ((curKey) + 36500 * tsMsPerDay[prec]))) - -int tsMeterSizeOnFile; -void vnodeUpdateMeter(void *param, void *tmdId); -void vnodeRecoverMeterObjectFile(int vnode); - -int (*vnodeProcessAction[])(SMeterObj *, char *, int, char, void *, int, int *, TSKEY) = {vnodeInsertPoints, - vnodeImportPoints}; - -void vnodeFreeMeterObj(SMeterObj *pObj) { - if (pObj == NULL) return; - - dTrace("vid:%d sid:%d id:%s, meter is cleaned up", pObj->vnode, pObj->sid, pObj->meterId); - - vnodeFreeCacheInfo(pObj); - if (vnodeList[pObj->vnode].meterList != NULL) { - vnodeList[pObj->vnode].meterList[pObj->sid] = NULL; - } - - memset(pObj->meterId, 0, tListLen(pObj->meterId)); - tfree(pObj); -} - -int vnodeUpdateVnodeStatistic(FILE *fp, SVnodeObj *pVnode) { - fseek(fp, TSDB_FILE_HEADER_VERSION_SIZE, SEEK_SET); - fwrite(&(pVnode->vnodeStatistic), sizeof(SVnodeStatisticInfo), 1, fp); - - return 0; -} - -void vnodeUpdateVnodeFileHeader(FILE *fp, SVnodeObj *pVnode) { - fseek(fp, TSDB_FILE_HEADER_LEN * 1 / 4, SEEK_SET); - -#ifdef _TD_ARM_32_ - fprintf(fp, "%lld %lld %lld ", pVnode->lastCreate, pVnode->lastRemove, pVnode->version); - fprintf(fp, "%lld %d %d ", pVnode->lastKeyOnFile, pVnode->fileId, pVnode->numOfFiles); -#else - fprintf(fp, "%ld %ld %ld ", pVnode->lastCreate, pVnode->lastRemove, pVnode->version); - fprintf(fp, "%ld %d %d ", pVnode->lastKeyOnFile, pVnode->fileId, pVnode->numOfFiles); -#endif -} - -int vnodeCreateMeterObjFile(int vnode) { - FILE * fp; - char fileName[TSDB_FILENAME_LEN]; - int32_t size; - // SMeterObj *pObj; - - sprintf(fileName, "%s/vnode%d/meterObj.v%d", tsDirectory, vnode, vnode); - fp = fopen(fileName, "w+"); - if (fp == NULL) { - dError("failed to create vnode:%d file:%s, errno:%d, reason:%s", vnode, fileName, errno, strerror(errno)); - if (errno == EACCES) { - return TSDB_CODE_NO_DISK_PERMISSIONS; - } else if (errno == ENOSPC) { - return TSDB_CODE_SERV_NO_DISKSPACE; - } else { - return TSDB_CODE_VG_INIT_FAILED; - } - } else { - vnodeCreateFileHeader(fp); - vnodeUpdateVnodeFileHeader(fp, vnodeList + vnode); - fseek(fp, TSDB_FILE_HEADER_LEN, SEEK_SET); - - size = sizeof(SMeterObjHeader) * vnodeList[vnode].cfg.maxSessions + sizeof(TSCKSUM); - tfree(vnodeList[vnode].meterIndex); - vnodeList[vnode].meterIndex = calloc(1, size); - taosCalcChecksumAppend(0, (uint8_t *)(vnodeList[vnode].meterIndex), size); - fwrite(vnodeList[vnode].meterIndex, size, 1, fp); - - fclose(fp); - } - - return TSDB_CODE_SUCCESS; -} - -FILE *vnodeOpenMeterObjFile(int vnode) { - FILE * fp; - char fileName[TSDB_FILENAME_LEN]; - struct stat fstat; - - // check if directory exists - sprintf(fileName, "%s/vnode%d", tsDirectory, vnode); - if (stat(fileName, &fstat) < 0) return NULL; - - sprintf(fileName, "%s/vnode%d/meterObj.v%d", tsDirectory, vnode, vnode); - if (stat(fileName, &fstat) < 0) return NULL; - - fp = fopen(fileName, "r+"); - if (fp != NULL) { - if (vnodeCheckFileIntegrity(fp) < 0) { - dError("file:%s is corrupted, need to restore it first, exit program", fileName); - fclose(fp); - - // todo: how to recover - exit(1); - } - } else { - dError("failed to open %s, reason:%s", fileName, strerror(errno)); - } - - return fp; -} - -int vnodeSaveMeterObjToFile(SMeterObj *pObj) { - int64_t offset, length, new_length, new_offset; - FILE * fp; - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - char * buffer = NULL; - - fp = vnodeOpenMeterObjFile(pObj->vnode); - if (fp == NULL) return -1; - - buffer = (char *)malloc(tsMeterSizeOnFile); - if (buffer == NULL) { - dError("Failed to allocate memory while saving meter object to file, meterId", pObj->meterId); - fclose(fp); - return -1; - } - - offset = pVnode->meterIndex[pObj->sid].offset; - length = pVnode->meterIndex[pObj->sid].length; - - new_length = offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn) + pObj->sqlLen + sizeof(TSCKSUM); - - memcpy(buffer, pObj, offsetof(SMeterObj, reserved)); - memcpy(buffer + offsetof(SMeterObj, reserved), pObj->schema, pObj->numOfColumns * sizeof(SColumn)); - memcpy(buffer + offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn), pObj->pSql, pObj->sqlLen); - taosCalcChecksumAppend(0, (uint8_t *)buffer, new_length); - - if (offset == 0 || length < new_length) { // New, append to file end - fseek(fp, 0, SEEK_END); - new_offset = ftell(fp); - fwrite(buffer, new_length, 1, fp); - pVnode->meterIndex[pObj->sid].offset = new_offset; - pVnode->meterIndex[pObj->sid].length = new_length; - } else if (offset < 0) { // deleted meter, append to end of file - fseek(fp, -offset, SEEK_SET); - fwrite(buffer, new_length, 1, fp); - pVnode->meterIndex[pObj->sid].offset = -offset; - pVnode->meterIndex[pObj->sid].length = new_length; - } else { // meter exists, overwrite it, offset > 0 - fseek(fp, offset, SEEK_SET); - fwrite(buffer, new_length, 1, fp); - pVnode->meterIndex[pObj->sid].offset = (pObj->meterId[0] == 0) ? -offset : offset; - pVnode->meterIndex[pObj->sid].length = new_length; - } - // taosCalcChecksumAppend(0, pVnode->meterIndex, sizeof(SMeterObjHeader)*pVnode->cfg.maxSessions+sizeof(TSCKSUM)); - // NOTE: no checksum, since it makes creating table slow - fseek(fp, TSDB_FILE_HEADER_LEN + sizeof(SMeterObjHeader) * pObj->sid, SEEK_SET); - fwrite(&(pVnode->meterIndex[pObj->sid]), sizeof(SMeterObjHeader), 1, fp); - // update checksum - // fseek(fp, TSDB_FILE_HEADER_LEN+sizeof(SMeterObjHeader)*(pVnode->cfg.maxSessions), SEEK_SET); - // fwrite(((char *)(pVnode->meterIndex) + sizeof(SMeterObjHeader)*(pVnode->cfg.maxSessions)), sizeof(TSCKSUM), 1, fp); - - tfree(buffer); - - vnodeUpdateVnodeStatistic(fp, pVnode); - vnodeUpdateVnodeFileHeader(fp, pVnode); - /* vnodeUpdateFileCheckSum(fp); */ - fclose(fp); - - return 0; -} - -int vnodeSaveAllMeterObjToFile(int vnode) { - int64_t offset, length, new_length, new_offset; - FILE * fp; - SMeterObj *pObj; - SVnodeObj *pVnode = &vnodeList[vnode]; - char * buffer = NULL; - - fp = vnodeOpenMeterObjFile(vnode); - if (fp == NULL) return -1; - - buffer = (char *)malloc(tsMeterSizeOnFile); - if (buffer == NULL) { - dError("Failed to allocate memory while saving all meter objects to file"); - return -1; - } - - for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) { - pObj = pVnode->meterList[sid]; - if (pObj == NULL) continue; - - offset = pVnode->meterIndex[sid].offset; - length = pVnode->meterIndex[sid].length; - - new_length = offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn) + pObj->sqlLen + sizeof(TSCKSUM); - - memcpy(buffer, pObj, offsetof(SMeterObj, reserved)); - memcpy(buffer + offsetof(SMeterObj, reserved), pObj->schema, pObj->numOfColumns * sizeof(SColumn)); - memcpy(buffer + offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn), pObj->pSql, pObj->sqlLen); - taosCalcChecksumAppend(0, (uint8_t *)buffer, new_length); - - if (offset == 0 || length > new_length) { // New, append to file end - new_offset = fseek(fp, 0, SEEK_END); - fwrite(buffer, new_length, 1, fp); - pVnode->meterIndex[sid].offset = new_offset; - pVnode->meterIndex[sid].length = new_length; - } else if (offset < 0) { // deleted meter, append to end of file - fseek(fp, -offset, SEEK_SET); - fwrite(buffer, new_length, 1, fp); - pVnode->meterIndex[sid].offset = -offset; - pVnode->meterIndex[sid].length = new_length; - } else { // meter exists, overwrite it, offset > 0 - fseek(fp, offset, SEEK_SET); - fwrite(buffer, new_length, 1, fp); - pVnode->meterIndex[sid].offset = offset; - pVnode->meterIndex[sid].length = new_length; - } - } - // taosCalcChecksumAppend(0, pVnode->meterIndex, sizeof(SMeterObjHeader)*pVnode->cfg.maxSessions+sizeof(TSCKSUM)); - fseek(fp, TSDB_FILE_HEADER_LEN, SEEK_SET); - fwrite(pVnode->meterIndex, sizeof(SMeterObjHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM), 1, fp); - - tfree(buffer); - - vnodeUpdateVnodeStatistic(fp, pVnode); - vnodeUpdateVnodeFileHeader(fp, pVnode); - /* vnodeUpdateFileCheckSum(fp); */ - fclose(fp); - - return 0; -} - -int vnodeSaveVnodeCfg(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc) { - FILE *fp; - - fp = vnodeOpenMeterObjFile(vnode); - if (fp == NULL) { - dError("failed to open vnode:%d file", vnode); - return -1; - } - - fseek(fp, TSDB_FILE_HEADER_LEN * 2 / 4, SEEK_SET); - fwrite(pCfg, sizeof(SVnodeCfg), 1, fp); - - char temp[TSDB_FILE_HEADER_LEN / 4]; - memset(temp, 0, sizeof(temp)); - fseek(fp, TSDB_FILE_HEADER_LEN * 3 / 4, SEEK_SET); - fwrite(temp, sizeof(temp), 1, fp); - - if (pCfg->replications >= 1) { - fseek(fp, TSDB_FILE_HEADER_LEN * 3 / 4, SEEK_SET); - fwrite(pDesc, sizeof(SVPeerDesc), pCfg->replications, fp); - } - - /* vnodeUpdateFileCheckSum(fp); */ - fclose(fp); - - return TSDB_CODE_SUCCESS; -} - -int vnodeSaveVnodeInfo(int vnode) { - FILE * fp; - SVnodeObj *pVnode = &vnodeList[vnode]; - - fp = vnodeOpenMeterObjFile(vnode); - if (fp == NULL) return -1; - - vnodeUpdateVnodeFileHeader(fp, pVnode); - /* vnodeUpdateFileCheckSum(fp); */ - fclose(fp); - - return 0; -} - -int vnodeRestoreMeterObj(char *buffer, int64_t length) { - SMeterObj *pSavedObj, *pObj; - int size; - - pSavedObj = (SMeterObj *)buffer; - if (pSavedObj->vnode < 0 || pSavedObj->vnode >= TSDB_MAX_VNODES) { - dTrace("vid:%d is out of range, corrupted meter obj file", pSavedObj->vnode); - return -1; - } - - SVnodeCfg *pCfg = &vnodeList[pSavedObj->vnode].cfg; - if (pSavedObj->sid < 0 || pSavedObj->sid >= pCfg->maxSessions) { - dTrace("vid:%d, sid:%d is larger than max:%d", pSavedObj->vnode, pSavedObj->sid, pCfg->maxSessions); - return -1; - } - - if (pSavedObj->meterId[0] == 0) return TSDB_CODE_SUCCESS; - - size = sizeof(SMeterObj) + pSavedObj->sqlLen + 1; - pObj = (SMeterObj *)malloc(size); - if (pObj == NULL) { - dError("vid:%d sid:%d, no memory to allocate", pSavedObj->vnode, pSavedObj->sid); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - pObj->schema = (SColumn *)malloc(pSavedObj->numOfColumns * sizeof(SColumn)); - if (NULL == pObj->schema){ - dError("vid:%d sid:%d, no memory to allocate for schema", pSavedObj->vnode, pSavedObj->sid); - free(pObj); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - memcpy(pObj, pSavedObj, offsetof(SMeterObj, reserved)); - pObj->numOfQueries = 0; - pObj->pCache = vnodeAllocateCacheInfo(pObj); - if (NULL == pObj->pCache){ - dError("vid:%d sid:%d, no memory to allocate for cache", pSavedObj->vnode, pSavedObj->sid); - tfree(pObj->schema); - tfree(pObj); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - vnodeList[pSavedObj->vnode].meterList[pSavedObj->sid] = pObj; - pObj->pStream = NULL; - - memcpy(pObj->schema, buffer + offsetof(SMeterObj, reserved), pSavedObj->numOfColumns * sizeof(SColumn)); - pObj->state = TSDB_METER_STATE_READY; - - if (pObj->sqlLen > 0) - memcpy((char *)pObj + sizeof(SMeterObj), - ((char *)pSavedObj) + offsetof(SMeterObj, reserved) + sizeof(SColumn) * pSavedObj->numOfColumns, - pSavedObj->sqlLen); - pObj->pSql = (char *)pObj + sizeof(SMeterObj); - - pObj->lastKey = pObj->lastKeyOnFile; - if (pObj->lastKey > vnodeList[pObj->vnode].lastKey) vnodeList[pObj->vnode].lastKey = pObj->lastKey; - - // taosSetSecurityInfo(pObj->vnode, pObj->sid, pObj->meterId, pObj->spi, pObj->encrypt, pObj->secret, pObj->cipheringKey); - - dTrace("vid:%d sid:%d id:%s, meter is restored, uid:%" PRIu64 "", pObj->vnode, pObj->sid, pObj->meterId, pObj->uid); - return TSDB_CODE_SUCCESS; -} - -int vnodeOpenMetersVnode(int vnode) { - FILE * fp; - char * buffer; - int64_t sid; - int64_t offset, length; - SVnodeObj *pVnode = &vnodeList[vnode]; - - fp = vnodeOpenMeterObjFile(vnode); - if (fp == NULL) return 0; - - fseek(fp, TSDB_FILE_HEADER_VERSION_SIZE, SEEK_SET); - fread(&(pVnode->vnodeStatistic), sizeof(SVnodeStatisticInfo), 1, fp); - - fseek(fp, TSDB_FILE_HEADER_LEN * 1 / 4, SEEK_SET); -#ifdef _TD_ARM_32_ - fscanf(fp, "%lld %lld %lld ", &(pVnode->lastCreate), &(pVnode->lastRemove), &(pVnode->version)); - fscanf(fp, "%lld %d %d ", &(pVnode->lastKeyOnFile), &(pVnode->fileId), &(pVnode->numOfFiles)); -#else - fscanf(fp, "%ld %ld %ld ", &(pVnode->lastCreate), &(pVnode->lastRemove), &(pVnode->version)); - fscanf(fp, "%ld %d %d ", &(pVnode->lastKeyOnFile), &(pVnode->fileId), &(pVnode->numOfFiles)); -#endif - - fseek(fp, TSDB_FILE_HEADER_LEN * 2 / 4, SEEK_SET); - fread(&pVnode->cfg, sizeof(SVnodeCfg), 1, fp); - - if (vnodeIsValidVnodeCfg(&pVnode->cfg) == false) { - dError("vid:%d, maxSessions:%d cacheBlockSize:%d replications:%d daysPerFile:%d daysToKeep:%d invalid, clear it", - vnode, pVnode->cfg.maxSessions, pVnode->cfg.cacheBlockSize, pVnode->cfg.replications, - pVnode->cfg.daysPerFile, pVnode->cfg.daysToKeep); - pVnode->cfg.maxSessions = 0; // error in vnode file - return 0; - } - - fseek(fp, TSDB_FILE_HEADER_LEN * 3 / 4, SEEK_SET); - fread(&pVnode->vpeers, sizeof(SVPeerDesc), TSDB_VNODES_SUPPORT, fp); - - fseek(fp, TSDB_FILE_HEADER_LEN, SEEK_SET); - - tsMeterSizeOnFile = sizeof(SMeterObj) + TSDB_MAX_COLUMNS * sizeof(SColumn) + TSDB_MAX_SAVED_SQL_LEN + sizeof(TSCKSUM); - - int size = sizeof(SMeterObj *) * pVnode->cfg.maxSessions; - pVnode->meterList = (void *)malloc(size); - if (pVnode->meterList == NULL) return -1; - memset(pVnode->meterList, 0, size); - size = sizeof(SMeterObjHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM); - pVnode->meterIndex = (SMeterObjHeader *)calloc(1, size); - if (pVnode->meterIndex == NULL) { - tfree(pVnode->meterList); - return -1; - } - - // Read SMeterObjHeader list from file - if (fread(pVnode->meterIndex, size, 1, fp) < 0) return -1; - // if (!taosCheckChecksumWhole(pVnode->meterIndex, size)) { - // dError("vid: %d meter obj file header is broken since checksum mismatch", vnode); - // return -1; - // } - - // Read the meter object from file and recover the structure - buffer = malloc(tsMeterSizeOnFile); - memset(buffer, 0, tsMeterSizeOnFile); - for (sid = 0; sid < pVnode->cfg.maxSessions; ++sid) { - offset = pVnode->meterIndex[sid].offset; - length = pVnode->meterIndex[sid].length; - if (offset <= 0 || length <= 0) continue; - - fseek(fp, offset, SEEK_SET); - if (fread(buffer, length, 1, fp) <= 0) break; - if (taosCheckChecksumWhole((uint8_t *)buffer, length)) { - vnodeRestoreMeterObj(buffer, length - sizeof(TSCKSUM)); - } else { - dError("meter object file is broken since checksum mismatch, vnode: %d sid: %d, try to recover", vnode, sid); - continue; - /* vnodeRecoverMeterObjectFile(vnode); */ - } - } - - tfree(buffer); - fclose(fp); - - return 0; -} - -void vnodeCloseMetersVnode(int vnode) { - SVnodeObj *pVnode = vnodeList + vnode; - SMeterObj *pObj; - - if (pVnode->meterList) { - for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) { - pObj = pVnode->meterList[sid]; - if (pObj == NULL) continue; - vnodeFreeCacheInfo(pObj); - tfree(pObj->schema); - tfree(pObj); - } - - tfree(pVnode->meterList); - } - - pVnode->meterList = NULL; -} - -int vnodeCreateMeterObj(SMeterObj *pNew, SConnSec *pSec) { - SMeterObj *pObj; - int code; - - pObj = vnodeList[pNew->vnode].meterList[pNew->sid]; - code = TSDB_CODE_SUCCESS; - - if (pObj && pObj->uid == pNew->uid) { - if (pObj->sversion == pNew->sversion) { - dTrace("vid:%d sid:%d id:%s sversion:%d, identical meterObj, ignore create", pNew->vnode, pNew->sid, - pNew->meterId, pNew->sversion); - return -1; - } - - dTrace("vid:%d sid:%d id:%s, update schema", pNew->vnode, pNew->sid, pNew->meterId); - if (!vnodeIsMeterState(pObj, TSDB_METER_STATE_UPDATING)) vnodeUpdateMeter(pNew, NULL); - return TSDB_CODE_SUCCESS; - } - - if (pObj) { - dWarn("vid:%d sid:%d id:%s, old meter is there, remove it", pNew->vnode, pNew->sid, pNew->meterId); - vnodeRemoveMeterObj(pNew->vnode, pNew->sid); - } - - pNew->pCache = vnodeAllocateCacheInfo(pNew); - if (pNew->pCache == NULL) { - code = TSDB_CODE_NO_RESOURCE; - } else { - vnodeList[pNew->vnode].meterList[pNew->sid] = pNew; - pNew->state = TSDB_METER_STATE_READY; - if (pNew->timeStamp > vnodeList[pNew->vnode].lastCreate) vnodeList[pNew->vnode].lastCreate = pNew->timeStamp; - vnodeSaveMeterObjToFile(pNew); - // vnodeCreateMeterMgmt(pNew, pSec); - vnodeCreateStream(pNew); - dTrace("vid:%d, sid:%d id:%s, meterObj is created, uid:%" PRIu64 "", pNew->vnode, pNew->sid, pNew->meterId, pNew->uid); - } - - return code; -} - -int vnodeRemoveMeterObj(int vnode, int sid) { - SMeterObj *pObj; - - if (vnode < 0 || vnode >= TSDB_MAX_VNODES) { - dError("vid:%d is out of range", vnode); - return 0; - } - - SVnodeCfg *pCfg = &vnodeList[vnode].cfg; - if (sid < 0 || sid >= pCfg->maxSessions) { - dError("vid:%d, sid:%d is larger than max:%d or less than 0", vnode, sid, pCfg->maxSessions); - return 0; - } - - // vnode has been closed, no meters in this vnode - if (vnodeList[vnode].meterList == NULL) return 0; - - pObj = vnodeList[vnode].meterList[sid]; - if (pObj == NULL) { - return TSDB_CODE_SUCCESS; - } - - if (!vnodeIsSafeToDeleteMeter(&vnodeList[vnode], sid)) { - return TSDB_CODE_ACTION_IN_PROGRESS; - } - - // after remove this meter, change its state to DELETED - pObj->state = TSDB_METER_STATE_DROPPED; - pObj->timeStamp = taosGetTimestampMs(); - vnodeList[vnode].lastRemove = pObj->timeStamp; - - vnodeRemoveStream(pObj); - vnodeSaveMeterObjToFile(pObj); - vnodeFreeMeterObj(pObj); - - return 0; -} - -int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion, - int *numOfInsertPoints, TSKEY now) { - int expectedLen, i; - short numOfPoints; - SSubmitMsg *pSubmit = (SSubmitMsg *)cont; - char * pData; - TSKEY tsKey; - int points = 0; - int code = TSDB_CODE_SUCCESS; - SVnodeObj * pVnode = vnodeList + pObj->vnode; - - numOfPoints = htons(pSubmit->numOfRows); - expectedLen = numOfPoints * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); - if (expectedLen != contLen) { - dError("vid:%d sid:%d id:%s, invalid submit msg length:%d, expected:%d, bytesPerPoint: %d", - pObj->vnode, pObj->sid, pObj->meterId, contLen, expectedLen, pObj->bytesPerPoint); - code = TSDB_CODE_WRONG_MSG_SIZE; - goto _over; - } - - // to guarantee time stamp is the same for all vnodes - pData = pSubmit->payLoad; - tsKey = now; - if (*((TSKEY *)pData) == 0) { - for (i = 0; i < numOfPoints; ++i) { - *((TSKEY *)pData) = tsKey++; - pData += pObj->bytesPerPoint; - } - } - - if (numOfPoints >= (pVnode->cfg.blocksPerMeter - 2) * pObj->pointsPerBlock) { - code = TSDB_CODE_BATCH_SIZE_TOO_BIG; - dError("vid:%d sid:%d id:%s, batch size too big, insert points:%d, it shall be smaller than:%d", pObj->vnode, pObj->sid, - pObj->meterId, numOfPoints, (pVnode->cfg.blocksPerMeter - 2) * pObj->pointsPerBlock); - return code; - } - - /* - * please refer to TBASE-926, data may be lost when the cache is full - */ - if (source == TSDB_DATA_SOURCE_SHELL && pVnode->cfg.replications > 1) { - code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_INSERT, sversion); - if (code != TSDB_CODE_SUCCESS) return code; - } - - SCachePool *pPool = (SCachePool *)pVnode->pCachePool; - if (pObj->freePoints < numOfPoints || pObj->freePoints < (pObj->pointsPerBlock << 1) || - pPool->notFreeSlots > pVnode->cfg.cacheNumOfBlocks.totalBlocks - 2) { - code = TSDB_CODE_ACTION_IN_PROGRESS; - dTrace("vid:%d sid:%d id:%s, cache is full, freePoints:%d, notFreeSlots:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->freePoints, pPool->notFreeSlots); - vnodeProcessCommitTimer(pVnode, NULL); - return code; - } - - // FIXME: Here should be after the comparison of sversions. - if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { - if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; - code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_INSERT, cont, contLen, sversion); - if (code != TSDB_CODE_SUCCESS) return code; - } - - if (pObj->sversion < sversion) { - dTrace("vid:%d sid:%d id:%s, schema is changed, new:%d old:%d", pObj->vnode, pObj->sid, pObj->meterId, sversion, - pObj->sversion); - vnodeSendMeterCfgMsg(pObj->vnode, pObj->sid); - code = TSDB_CODE_ACTION_IN_PROGRESS; - return code; - } else if (pObj->sversion > sversion) { - dTrace("vid:%d sid:%d id:%s, client schema out of date, sql is invalid. client sversion:%d vnode sversion:%d", - pObj->vnode, pObj->sid, pObj->meterId, pObj->sversion, sversion); - code = TSDB_CODE_INVALID_SQL; - return code; - } - - pData = pSubmit->payLoad; - - TSKEY firstKey = *((TSKEY *)pData); - TSKEY lastKey = *((TSKEY *)(pData + pObj->bytesPerPoint * (numOfPoints - 1))); - int cfid = now/pVnode->cfg.daysPerFile/tsMsPerDay[(uint8_t)pVnode->cfg.precision]; - - TSKEY minAllowedKey = (cfid - pVnode->maxFiles + 1)*pVnode->cfg.daysPerFile*tsMsPerDay[(uint8_t)pVnode->cfg.precision]; - TSKEY maxAllowedKey = (cfid + 2)*pVnode->cfg.daysPerFile*tsMsPerDay[(uint8_t)pVnode->cfg.precision] - 2; - if (firstKey < minAllowedKey || firstKey > maxAllowedKey || lastKey < minAllowedKey || lastKey > maxAllowedKey) { - dError("vid:%d sid:%d id:%s, vnode lastKeyOnFile:%" PRId64 ", data is out of range, numOfPoints:%d firstKey:%" PRId64 " lastKey:%" PRId64 " minAllowedKey:%" PRId64 " maxAllowedKey:%" PRId64, - pObj->vnode, pObj->sid, pObj->meterId, pVnode->lastKeyOnFile, numOfPoints,firstKey, lastKey, minAllowedKey, maxAllowedKey); - return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; - } - - if ((code = vnodeSetMeterInsertImportStateEx(pObj, TSDB_METER_STATE_INSERTING)) != TSDB_CODE_SUCCESS) { - goto _over; - } - - for (i = 0; i < numOfPoints; ++i) { // meter will be dropped, abort current insertion - if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) { - dWarn("vid:%d sid:%d id:%s, meter is dropped, abort insert, state:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->state); - - code = TSDB_CODE_NOT_ACTIVE_TABLE; - break; - } - - if (*((TSKEY *)pData) <= pObj->lastKey) { - dWarn("vid:%d sid:%d id:%s, received key:%" PRId64 " not larger than lastKey:%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, - *((TSKEY *)pData), pObj->lastKey); - pData += pObj->bytesPerPoint; - continue; - } - - if (!VALID_TIMESTAMP(*((TSKEY *)pData), tsKey, (uint8_t)pVnode->cfg.precision)) { - code = TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; - break; - } - - if (vnodeInsertPointToCache(pObj, pData) < 0) { - code = TSDB_CODE_ACTION_IN_PROGRESS; - break; - } - - pObj->lastKey = *((TSKEY *)pData); - pData += pObj->bytesPerPoint; - points++; - } - - atomic_fetch_add_64(&(pVnode->vnodeStatistic.pointsWritten), points * (pObj->numOfColumns - 1)); - atomic_fetch_add_64(&(pVnode->vnodeStatistic.totalStorage), points * pObj->bytesPerPoint); - - pthread_mutex_lock(&(pVnode->vmutex)); - - if (pObj->lastKey > pVnode->lastKey) pVnode->lastKey = pObj->lastKey; - - if (firstKey < pVnode->firstKey) pVnode->firstKey = firstKey; - assert(pVnode->firstKey > 0); - - pVnode->version++; - - pthread_mutex_unlock(&(pVnode->vmutex)); - - vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERTING); - -_over: - dTrace("vid:%d sid:%d id:%s, %d out of %d points are inserted, lastKey:%" PRId64 " source:%d, vnode total storage: %" PRId64 "", - pObj->vnode, pObj->sid, pObj->meterId, points, numOfPoints, pObj->lastKey, source, - pVnode->vnodeStatistic.totalStorage); - - *numOfInsertPoints = points; - return code; -} - -/** - * continue running of the function may cause the free vnode crash with high probability - * todo fix it by set flag to disable commit in any cases - * - * @param param - * @param tmrId - */ -void vnodeProcessUpdateSchemaTimer(void *param, void *tmrId) { - SMeterObj * pObj = (SMeterObj *)param; - SVnodeObj * pVnode = vnodeList + pObj->vnode; - - /* - * vnode may have been dropped, check it in the first place - * if the vnode is freed, the pObj is not valid any more, the pObj->vnode is meanless - * so may be the vid should be passed into this function as a parameter? - */ - if (pVnode->meterList == NULL) { - dTrace("vnode is deleted, abort update schema"); - return; - } - - SCachePool *pPool = (SCachePool *)pVnode->pCachePool; - - pthread_mutex_lock(&pPool->vmutex); - if (pPool->commitInProcess) { - dTrace("vid:%d sid:%d mid:%s, committing in process, commit later", pObj->vnode, pObj->sid, pObj->meterId); - if (taosTmrStart(vnodeProcessUpdateSchemaTimer, 10, pObj, vnodeTmrCtrl) == NULL) { - vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); - } - - pthread_mutex_unlock(&pPool->vmutex); - return; - } - - pPool->commitInProcess = 1; - pthread_mutex_unlock(&pPool->vmutex); - - vnodeCommitMultiToFile(pVnode, pObj->sid, pObj->sid); -} - -void vnodeUpdateMeter(void *param, void *tmrId) { - SMeterObj *pNew = (SMeterObj *)param; - if (pNew == NULL || pNew->vnode < 0 || pNew->sid < 0) return; - - SVnodeObj* pVnode = &vnodeList[pNew->vnode]; - - if (pVnode->meterList == NULL) { - dTrace("vid:%d sid:%d id:%s, vnode is deleted, status:%s, abort update schema", - pNew->vnode, pNew->sid, pNew->meterId, taosGetVnodeStatusStr(vnodeList[pNew->vnode].vnodeStatus)); - free(pNew->schema); - free(pNew); - return; - } - - SMeterObj *pObj = pVnode->meterList[pNew->sid]; - if (pObj == NULL || vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) { - dTrace("vid:%d sid:%d id:%s, meter is deleted, abort update schema", pNew->vnode, pNew->sid, pNew->meterId); - free(pNew->schema); - free(pNew); - return; - } - - int32_t state = vnodeSetMeterState(pObj, TSDB_METER_STATE_UPDATING); - if (state >= TSDB_METER_STATE_DROPPING) { - dError("vid:%d sid:%d id:%s, meter is deleted, failed to update, state:%d", - pObj->vnode, pObj->sid, pObj->meterId, state); - return; - } - - int32_t num = 0; - pthread_mutex_lock(&pVnode->vmutex); - num = pObj->numOfQueries; - pthread_mutex_unlock(&pVnode->vmutex); - - if (num > 0 || state != TSDB_METER_STATE_READY) { - // the state may have been changed by vnodeSetMeterState, recover it in the first place - vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); - dTrace("vid:%d sid:%d id:%s, update failed, retry later, numOfQueries:%d, state:%d", - pNew->vnode, pNew->sid, pNew->meterId, num, state); - - // retry update meter in 50ms - if (taosTmrStart(vnodeUpdateMeter, 50, pNew, vnodeTmrCtrl) == NULL) { - dError("vid:%d sid:%d id:%s, failed to start update timer, no retry", pNew->vnode, pNew->sid, pNew->meterId); - free(pNew->schema); - free(pNew); - } - return; - } - - // commit first - if (!vnodeIsCacheCommitted(pObj)) { - // commit data first - if (taosTmrStart(vnodeProcessUpdateSchemaTimer, 0, pObj, vnodeTmrCtrl) == NULL) { - dError("vid:%d sid:%d id:%s, failed to start commit timer", pObj->vnode, pObj->sid, pObj->meterId); - vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); - free(pNew->schema); - free(pNew); - return; - } - - if (taosTmrStart(vnodeUpdateMeter, 50, pNew, vnodeTmrCtrl) == NULL) { - dError("vid:%d sid:%d id:%s, failed to start update timer", pNew->vnode, pNew->sid, pNew->meterId); - vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); - free(pNew->schema); - free(pNew); - } - - dTrace("vid:%d sid:%d meterId:%s, there are data in cache, commit first, update later", - pNew->vnode, pNew->sid, pNew->meterId); - vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); - return; - } - - strcpy(pObj->meterId, pNew->meterId); - pObj->numOfColumns = pNew->numOfColumns; - pObj->timeStamp = pNew->timeStamp; - pObj->bytesPerPoint = pNew->bytesPerPoint; - pObj->maxBytes = pNew->maxBytes; - if (pObj->timeStamp > vnodeList[pObj->vnode].lastCreate) vnodeList[pObj->vnode].lastCreate = pObj->timeStamp; - - tfree(pObj->schema); - pObj->schema = pNew->schema; - - vnodeFreeCacheInfo(pObj); - pObj->pCache = vnodeAllocateCacheInfo(pObj); - - pObj->sversion = pNew->sversion; - vnodeSaveMeterObjToFile(pObj); - vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); - - dTrace("vid:%d sid:%d id:%s, schema is updated, state:%d", pObj->vnode, pObj->sid, pObj->meterId, pObj->state); - free(pNew); -} - -void vnodeRecoverMeterObjectFile(int vnode) { - // TODO: start the recovery process - assert(0); -} diff --git a/src/vnode/detail/src/vnodeQueryImpl.c b/src/vnode/detail/src/vnodeQueryImpl.c deleted file mode 100644 index f3e5cc27b3..0000000000 --- a/src/vnode/detail/src/vnodeQueryImpl.c +++ /dev/null @@ -1,7714 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "hash.h" -#include "hashfunc.h" -#include "os.h" -#include "qextbuffer.h" -#include "taosmsg.h" -#include "ttime.h" - -#include "qinterpolation.h" -#include "tscJoinProcess.h" -#include "tscSecondaryMerge.h" -#include "tscompression.h" -#include "ttime.h" -#include "vnode.h" -#include "vnodeRead.h" -#include "vnodeUtil.h" - -#include "vnodeCache.h" -#include "vnodeDataFilterFunc.h" -#include "vnodeFile.h" -#include "vnodeQueryImpl.h" -#include "vnodeStatus.h" - -enum { - TS_JOIN_TS_EQUAL = 0, - TS_JOIN_TS_NOT_EQUALS = 1, - TS_JOIN_TAG_NOT_EQUALS = 2, -}; - -enum { - DISK_BLOCK_NO_NEED_TO_LOAD = 0, - DISK_BLOCK_LOAD_TS = 1, - DISK_BLOCK_LOAD_BLOCK = 2, -}; - -#define IS_DISK_DATA_BLOCK(q) ((q)->fileId >= 0) - -static int32_t readDataFromDiskFile(int fd, SQInfo *pQInfo, SQueryFilesInfo *pQueryFile, char *buf, uint64_t offset, - int32_t size); - -static void vnodeInitLoadCompBlockInfo(SLoadCompBlockInfo *pCompBlockLoadInfo); -static int32_t moveToNextBlock(SQueryRuntimeEnv *pRuntimeEnv, int32_t step, __block_search_fn_t searchFn, - bool loadData); -static int32_t doMergeMetersResultsToGroupRes(STableQuerySupportObj *pSupporter, SQuery *pQuery, - SQueryRuntimeEnv *pRuntimeEnv, SMeterDataInfo *pMeterDataInfo, - int32_t start, int32_t end); - -static TSKEY getTimestampInCacheBlock(SQueryRuntimeEnv *pRuntimeEnv, SCacheBlock *pBlock, int32_t index); -static TSKEY getTimestampInDiskBlock(SQueryRuntimeEnv *pRuntimeEnv, int32_t index); - -static void savePointPosition(SPositionInfo *position, int32_t fileId, int32_t slot, int32_t pos); -static int32_t getNextDataFileCompInfo(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj, int32_t step); - -static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult); - -static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo); -static int32_t flushFromResultBuf(STableQuerySupportObj *pSupporter, const SQuery *pQuery, - const SQueryRuntimeEnv *pRuntimeEnv); -static void getBasicCacheInfoSnapshot(SQuery *pQuery, SCacheInfo *pCacheInfo, int32_t vid); -static TSKEY getQueryPositionForCacheInvalid(SQueryRuntimeEnv *pRuntimeEnv, __block_search_fn_t searchFn); -static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId); -static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow); - -static int32_t getGroupResultId(int32_t groupIndex) { - int32_t base = 200000; - return base + (groupIndex * 10000); -} - -static FORCE_INLINE bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; } - -// check the offset value integrity -static FORCE_INLINE int32_t validateHeaderOffsetSegment(SQInfo *pQInfo, char *filePath, int32_t vid, char *data, - int32_t size) { - if (!taosCheckChecksumWhole((uint8_t *)data + TSDB_FILE_HEADER_LEN, size)) { - dLError("QInfo:%p vid:%d, failed to read header file:%s, file offset area is broken", pQInfo, vid, filePath); - return -1; - } - return 0; -} - -static FORCE_INLINE int32_t getCompHeaderSegSize(SVnodeCfg *pCfg) { - return pCfg->maxSessions * sizeof(SCompHeader) + sizeof(TSCKSUM); -} - -static FORCE_INLINE int32_t getCompHeaderStartPosition(SVnodeCfg *pCfg) { - return TSDB_FILE_HEADER_LEN + getCompHeaderSegSize(pCfg); -} - -static FORCE_INLINE int32_t validateCompBlockOffset(SQInfo *pQInfo, SMeterObj *pMeterObj, SCompHeader *pCompHeader, - SQueryFilesInfo *pQueryFileInfo, int32_t headerSize) { - if (pCompHeader->compInfoOffset < headerSize || pCompHeader->compInfoOffset > pQueryFileInfo->headerFileSize) { - dError("QInfo:%p vid:%d sid:%d id:%s, compInfoOffset:%" PRId64 " is not valid, size:%" PRId64, pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pCompHeader->compInfoOffset, - pQueryFileInfo->headerFileSize); - - return -1; - } - - return 0; -} - -// check compinfo integrity -static FORCE_INLINE int32_t validateCompBlockInfoSegment(SQInfo *pQInfo, const char *filePath, int32_t vid, - SCompInfo *compInfo, int64_t offset) { - if (!taosCheckChecksumWhole((uint8_t *)compInfo, sizeof(SCompInfo))) { - dLError("QInfo:%p vid:%d, failed to read header file:%s, file compInfo broken, offset:%" PRId64, pQInfo, vid, - filePath, offset); - return -1; - } - return 0; -} - -static FORCE_INLINE int32_t validateCompBlockSegment(SQInfo *pQInfo, const char *filePath, SCompInfo *compInfo, - char *pBlock, int32_t vid, TSCKSUM checksum) { - uint32_t size = compInfo->numOfBlocks * sizeof(SCompBlock); - - if (checksum != taosCalcChecksum(0, (uint8_t *)pBlock, size)) { - dLError("QInfo:%p vid:%d, failed to read header file:%s, file compblock is broken:%zu", pQInfo, vid, filePath, - (char *)compInfo + sizeof(SCompInfo)); - return -1; - } - - return 0; -} - -bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) { - if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) { - return false; - } - - for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) { - SColIndexEx *pColIndex = &pGroupbyExpr->columnInfo[i]; - if (pColIndex->flag == TSDB_COL_NORMAL) { - /* - * make sure the normal column locates at the second position if tbname exists in group by clause - */ - if (pGroupbyExpr->numOfGroupCols > 1) { - assert(pColIndex->colIdx > 0); - } - - return true; - } - } - - return false; -} - -int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) { - assert(pGroupbyExpr != NULL); - - int32_t colId = -2; - int16_t type = TSDB_DATA_TYPE_NULL; - - for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) { - SColIndexEx *pColIndex = &pGroupbyExpr->columnInfo[i]; - if (pColIndex->flag == TSDB_COL_NORMAL) { - colId = pColIndex->colId; - break; - } - } - - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - if (colId == pQuery->colList[i].data.colId) { - type = pQuery->colList[i].data.type; - break; - } - } - - return type; -} - -bool isSelectivityWithTagsQuery(SQuery *pQuery) { - bool hasTags = false; - int32_t numOfSelectivity = 0; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functId = pQuery->pSelectExpr[i].pBase.functionId; - if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) { - hasTags = true; - continue; - } - - if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) { - numOfSelectivity++; - } - } - - if (numOfSelectivity > 0 && hasTags) { - return true; - } - - return false; -} - -static void vnodeFreeFieldsEx(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - vnodeFreeFields(pQuery); - - vnodeInitLoadCompBlockInfo(&pRuntimeEnv->loadCompBlockInfo); -} - -static bool vnodeIsCompBlockInfoLoaded(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj, int32_t fileIndex) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - // check if data file header of this table has been loaded into memory, avoid to reloaded comp Block info - SLoadCompBlockInfo *pLoadCompBlockInfo = &pRuntimeEnv->loadCompBlockInfo; - - // if vnodeFreeFields is called, the pQuery->pFields is NULL - if (pLoadCompBlockInfo->fileListIndex == fileIndex && pLoadCompBlockInfo->sid == pMeterObj->sid && - pQuery->pFields != NULL && pQuery->fileId > 0) { - assert(pRuntimeEnv->vnodeFileInfo.pFileInfo[fileIndex].fileID == pLoadCompBlockInfo->fileId && - pQuery->numOfBlocks > 0); - return true; - } - - return false; -} - -static void vnodeSetCompBlockInfoLoaded(SQueryRuntimeEnv *pRuntimeEnv, int32_t fileIndex, int32_t sid) { - SLoadCompBlockInfo *pCompBlockLoadInfo = &pRuntimeEnv->loadCompBlockInfo; - - pCompBlockLoadInfo->sid = sid; - pCompBlockLoadInfo->fileListIndex = fileIndex; - pCompBlockLoadInfo->fileId = pRuntimeEnv->vnodeFileInfo.pFileInfo[fileIndex].fileID; -} - -static void vnodeInitLoadCompBlockInfo(SLoadCompBlockInfo *pCompBlockLoadInfo) { - pCompBlockLoadInfo->sid = -1; - pCompBlockLoadInfo->fileId = -1; - pCompBlockLoadInfo->fileListIndex = -1; -} - -static int32_t vnodeIsDatablockLoaded(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj, int32_t fileIndex, - bool loadTS) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SLoadDataBlockInfo *pLoadInfo = &pRuntimeEnv->loadBlockInfo; - - /* this block has been loaded into memory, return directly */ - if (pLoadInfo->fileId == pQuery->fileId && pLoadInfo->slotIdx == pQuery->slot && pQuery->slot != -1 && - pLoadInfo->sid == pMeterObj->sid && pLoadInfo->fileListIndex == fileIndex) { - // previous load operation does not load the primary timestamp column, we only need to load the timestamp column - if (pLoadInfo->tsLoaded == false && pLoadInfo->tsLoaded != loadTS) { - return DISK_BLOCK_LOAD_TS; - } else { - return DISK_BLOCK_NO_NEED_TO_LOAD; - } - } - - return DISK_BLOCK_LOAD_BLOCK; -} - -static void vnodeSetDataBlockInfoLoaded(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj, int32_t fileIndex, - bool tsLoaded) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SLoadDataBlockInfo *pLoadInfo = &pRuntimeEnv->loadBlockInfo; - - pLoadInfo->fileId = pQuery->fileId; - pLoadInfo->slotIdx = pQuery->slot; - pLoadInfo->fileListIndex = fileIndex; - pLoadInfo->sid = pMeterObj->sid; - pLoadInfo->tsLoaded = tsLoaded; -} - -static void vnodeInitDataBlockInfo(SLoadDataBlockInfo *pBlockLoadInfo) { - pBlockLoadInfo->slotIdx = -1; - pBlockLoadInfo->fileId = -1; - pBlockLoadInfo->sid = -1; - pBlockLoadInfo->fileListIndex = -1; -} - -static void vnodeSetCurrentFileNames(SQueryFilesInfo *pVnodeFilesInfo) { - assert(pVnodeFilesInfo->current >= 0 && pVnodeFilesInfo->current < pVnodeFilesInfo->numOfFiles); - - SHeaderFileInfo *pCurrentFileInfo = &pVnodeFilesInfo->pFileInfo[pVnodeFilesInfo->current]; - - /* - * set the full file path for current opened files - * the maximum allowed path string length is PATH_MAX in Linux, 100 bytes is used to - * suppress the compiler warnings - */ - char str[PATH_MAX + 100] = {0}; - int32_t PATH_WITH_EXTRA = PATH_MAX + 100; - - int32_t vnodeId = pVnodeFilesInfo->vnodeId; - int32_t fileId = pCurrentFileInfo->fileID; - - int32_t len = snprintf(str, PATH_WITH_EXTRA, "%sv%df%d.head", pVnodeFilesInfo->dbFilePathPrefix, vnodeId, fileId); - assert(len <= PATH_MAX); - - strncpy(pVnodeFilesInfo->headerFilePath, str, PATH_MAX); - - len = snprintf(str, PATH_WITH_EXTRA, "%sv%df%d.data", pVnodeFilesInfo->dbFilePathPrefix, vnodeId, fileId); - assert(len <= PATH_MAX); - - strncpy(pVnodeFilesInfo->dataFilePath, str, PATH_MAX); - - len = snprintf(str, PATH_WITH_EXTRA, "%sv%df%d.last", pVnodeFilesInfo->dbFilePathPrefix, vnodeId, fileId); - assert(len <= PATH_MAX); - - strncpy(pVnodeFilesInfo->lastFilePath, str, PATH_MAX); -} - -/** - * if the header is smaller than a threshold value(header size + initial offset value) - * - * @param vnodeId - * @param headerFileSize - * @return - */ -static FORCE_INLINE bool isHeaderFileEmpty(int32_t vnodeId, size_t headerFileSize) { - SVnodeCfg *pVnodeCfg = &vnodeList[vnodeId].cfg; - return headerFileSize <= getCompHeaderStartPosition(pVnodeCfg); -} - -static bool checkIsHeaderFileEmpty(SQueryFilesInfo *pVnodeFilesInfo) { - struct stat fstat = {0}; - if (stat(pVnodeFilesInfo->headerFilePath, &fstat) < 0) { - return true; - } - - pVnodeFilesInfo->headerFileSize = fstat.st_size; - return isHeaderFileEmpty(pVnodeFilesInfo->vnodeId, pVnodeFilesInfo->headerFileSize); -} - -static void doCloseQueryFileInfoFD(SQueryFilesInfo *pVnodeFilesInfo) { - tclose(pVnodeFilesInfo->headerFd); - tclose(pVnodeFilesInfo->dataFd); - tclose(pVnodeFilesInfo->lastFd); - - pVnodeFilesInfo->current = -1; - pVnodeFilesInfo->headerFileSize = -1; -} - -static void doInitQueryFileInfoFD(SQueryFilesInfo *pVnodeFilesInfo) { - pVnodeFilesInfo->current = -1; - pVnodeFilesInfo->headerFileSize = -1; - - pVnodeFilesInfo->headerFd = FD_INITIALIZER; // set the initial value - pVnodeFilesInfo->dataFd = FD_INITIALIZER; - pVnodeFilesInfo->lastFd = FD_INITIALIZER; -} - -/* - * close the opened fd are delegated to invoker - */ -static int32_t doOpenQueryFile(SQInfo *pQInfo, SQueryFilesInfo *pVnodeFileInfo) { - SHeaderFileInfo *pHeaderFileInfo = &pVnodeFileInfo->pFileInfo[pVnodeFileInfo->current]; - - /* - * current header file is empty or broken, return directly. - * - * if the header is smaller than or equals to the minimum file size value, this file is empty. No need to open this - * file and the corresponding files. - */ - if (checkIsHeaderFileEmpty(pVnodeFileInfo)) { - qTrace("QInfo:%p vid:%d, fileId:%d, index:%d, size:%d, ignore file, empty or broken", pQInfo, - pVnodeFileInfo->vnodeId, pHeaderFileInfo->fileID, pVnodeFileInfo->current, pVnodeFileInfo->headerFileSize); - - return -1; - } - - pVnodeFileInfo->headerFd = open(pVnodeFileInfo->headerFilePath, O_RDONLY); - if (!FD_VALID(pVnodeFileInfo->headerFd)) { - dError("QInfo:%p failed open head file:%s reason:%s", pQInfo, pVnodeFileInfo->headerFilePath, strerror(errno)); - return -1; - } - - pVnodeFileInfo->dataFd = open(pVnodeFileInfo->dataFilePath, O_RDONLY); - if (!FD_VALID(pVnodeFileInfo->dataFd)) { - dError("QInfo:%p failed open data file:%s reason:%s", pQInfo, pVnodeFileInfo->dataFilePath, strerror(errno)); - return -1; - } - - pVnodeFileInfo->lastFd = open(pVnodeFileInfo->lastFilePath, O_RDONLY); - if (!FD_VALID(pVnodeFileInfo->lastFd)) { - dError("QInfo:%p failed open last file:%s reason:%s", pQInfo, pVnodeFileInfo->lastFilePath, strerror(errno)); - return -1; - } - - return TSDB_CODE_SUCCESS; -} - -static void doCloseQueryFiles(SQueryFilesInfo *pVnodeFileInfo) { - if (pVnodeFileInfo->current >= 0) { - assert(pVnodeFileInfo->current < pVnodeFileInfo->numOfFiles && pVnodeFileInfo->current >= 0); - - pVnodeFileInfo->headerFileSize = -1; - doCloseQueryFileInfoFD(pVnodeFileInfo); - } - - assert(pVnodeFileInfo->current == -1); -} - -/** - * For each query, only one header file along with corresponding files is opened, in order to - * avoid too many memory files opened at the same time. - * - * @param pRuntimeEnv - * @param fileIndex - * @return -1 failed, 0 success - */ -int32_t vnodeGetHeaderFile(SQueryRuntimeEnv *pRuntimeEnv, int32_t fileIndex) { - assert(fileIndex >= 0 && fileIndex < pRuntimeEnv->vnodeFileInfo.numOfFiles); - - SQuery *pQuery = pRuntimeEnv->pQuery; - SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); // only for log output - - SQueryFilesInfo *pVnodeFileInfo = &pRuntimeEnv->vnodeFileInfo; - - if (pVnodeFileInfo->current != fileIndex) { - if (pVnodeFileInfo->current >= 0) { - assert(pVnodeFileInfo->headerFileSize > 0); - } - - // do close the current memory mapped header file and corresponding fd - doCloseQueryFiles(pVnodeFileInfo); - assert(pVnodeFileInfo->headerFileSize == -1); - - // set current opened file Index - pVnodeFileInfo->current = fileIndex; - - // set the current opened files(header, data, last) path - vnodeSetCurrentFileNames(pVnodeFileInfo); - - if (doOpenQueryFile(pQInfo, pVnodeFileInfo) != TSDB_CODE_SUCCESS) { - doCloseQueryFiles(pVnodeFileInfo); // all the fds may be partially opened, close them anyway. - return -1; - } - } - - return TSDB_CODE_SUCCESS; -} - -/* - * read comp block info from header file - * - */ -static int vnodeGetCompBlockInfo(SMeterObj *pMeterObj, SQueryRuntimeEnv *pRuntimeEnv, int32_t fileIndex) { - SQuery *pQuery = pRuntimeEnv->pQuery; - SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - - SVnodeCfg * pCfg = &vnodeList[pMeterObj->vnode].cfg; - SHeaderFileInfo *pHeadeFileInfo = &pRuntimeEnv->vnodeFileInfo.pFileInfo[fileIndex]; - - int64_t st = taosGetTimestampUs(); - - // if the corresponding data/header files are already closed, re-open them here - if (vnodeIsCompBlockInfoLoaded(pRuntimeEnv, pMeterObj, fileIndex) && - pRuntimeEnv->vnodeFileInfo.current == fileIndex) { - dTrace("QInfo:%p vid:%d sid:%d id:%s, fileId:%d compBlock info is loaded, not reload", GET_QINFO_ADDR(pQuery), - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pHeadeFileInfo->fileID); - return pQuery->numOfBlocks; - } - - SQueryCostSummary *pSummary = &pRuntimeEnv->summary; - pSummary->readCompInfo++; - pSummary->numOfSeek++; - - int32_t ret = vnodeGetHeaderFile(pRuntimeEnv, fileIndex); - if (ret != TSDB_CODE_SUCCESS) { - return -1; // failed to load the header file data into memory - } - - char * buf = calloc(1, getCompHeaderSegSize(pCfg)); - SQueryFilesInfo *pVnodeFileInfo = &pRuntimeEnv->vnodeFileInfo; - - lseek(pVnodeFileInfo->headerFd, TSDB_FILE_HEADER_LEN, SEEK_SET); - read(pVnodeFileInfo->headerFd, buf, getCompHeaderSegSize(pCfg)); - - // check the offset value integrity - if (validateHeaderOffsetSegment(pQInfo, pRuntimeEnv->vnodeFileInfo.headerFilePath, pMeterObj->vnode, - buf - TSDB_FILE_HEADER_LEN, getCompHeaderSegSize(pCfg)) < 0) { - free(buf); - return -1; - } - - SCompHeader *compHeader = (SCompHeader *)(buf + sizeof(SCompHeader) * pMeterObj->sid); - - // no data in this file for specified meter, abort - if (compHeader->compInfoOffset == 0) { - free(buf); - return 0; - } - - // corrupted file may cause the invalid compInfoOffset, check needs - if (validateCompBlockOffset(pQInfo, pMeterObj, compHeader, &pRuntimeEnv->vnodeFileInfo, - getCompHeaderStartPosition(pCfg)) < 0) { - free(buf); - return -1; - } - - lseek(pVnodeFileInfo->headerFd, compHeader->compInfoOffset, SEEK_SET); - - SCompInfo compInfo = {0}; - read(pVnodeFileInfo->headerFd, &compInfo, sizeof(SCompInfo)); - - // check compblock info integrity - if (validateCompBlockInfoSegment(pQInfo, pRuntimeEnv->vnodeFileInfo.headerFilePath, pMeterObj->vnode, &compInfo, - compHeader->compInfoOffset) < 0) { - free(buf); - return -1; - } - - if (compInfo.numOfBlocks <= 0 || compInfo.uid != pMeterObj->uid) { - free(buf); - return 0; - } - - // free allocated SField data - vnodeFreeFieldsEx(pRuntimeEnv); - pQuery->numOfBlocks = (int32_t)compInfo.numOfBlocks; - - /* - * +-------------+-----------+----------------+ - * | comp block | checksum | SField Pointer | - * +-------------+-----------+----------------+ - */ - int32_t compBlockSize = compInfo.numOfBlocks * sizeof(SCompBlock); - size_t bufferSize = compBlockSize + sizeof(TSCKSUM) + POINTER_BYTES * pQuery->numOfBlocks; - - // prepare buffer to hold compblock data - if (pQuery->blockBufferSize != bufferSize) { - pQuery->pBlock = realloc(pQuery->pBlock, bufferSize); - pQuery->blockBufferSize = (int32_t)bufferSize; - } - - memset(pQuery->pBlock, 0, bufferSize); - - // read data: comp block + checksum - read(pVnodeFileInfo->headerFd, pQuery->pBlock, compBlockSize + sizeof(TSCKSUM)); - TSCKSUM checksum = *(TSCKSUM *)((char *)pQuery->pBlock + compBlockSize); - - // check comp block integrity - if (validateCompBlockSegment(pQInfo, pRuntimeEnv->vnodeFileInfo.headerFilePath, &compInfo, (char *)pQuery->pBlock, - pMeterObj->vnode, checksum) < 0) { - free(buf); - return -1; - } - - pQuery->pFields = (SField **)((char *)pQuery->pBlock + compBlockSize + sizeof(TSCKSUM)); - vnodeSetCompBlockInfoLoaded(pRuntimeEnv, fileIndex, pMeterObj->sid); - - int64_t et = taosGetTimestampUs(); - qTrace("QInfo:%p vid:%d sid:%d id:%s, fileId:%d, load compblock info, size:%d, elapsed:%f ms", pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pRuntimeEnv->vnodeFileInfo.pFileInfo[fileIndex].fileID, - compBlockSize, (et - st) / 1000.0); - - pSummary->totalCompInfoSize += compBlockSize; - pSummary->loadCompInfoUs += (et - st); - - free(buf); - return pQuery->numOfBlocks; -} - -bool doRevisedResultsByLimit(SQInfo *pQInfo) { - SQuery *pQuery = &pQInfo->query; - - if ((pQuery->limit.limit > 0) && (pQuery->pointsRead + pQInfo->pointsRead > pQuery->limit.limit)) { - pQuery->pointsRead = pQuery->limit.limit - pQInfo->pointsRead; - - setQueryStatus(pQuery, QUERY_COMPLETED); // query completed - return true; - } - - return false; -} - -static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, int64_t StartQueryTimestamp, void *inputData, - char *primaryColumnData, int32_t size, int32_t functionId, SField *pField, bool hasNull, - int32_t blockStatus, void *param, int32_t scanFlag); - -void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo); - -static void destroyTimeWindowRes(SWindowResult *pOneOutputRes, int32_t nOutputCols); - -static int32_t binarySearchForBlockImpl(SCompBlock *pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) { - int32_t firstSlot = 0; - int32_t lastSlot = numOfBlocks - 1; - - int32_t midSlot = firstSlot; - - while (1) { - numOfBlocks = lastSlot - firstSlot + 1; - midSlot = (firstSlot + (numOfBlocks >> 1)); - - if (numOfBlocks == 1) break; - - if (skey > pBlock[midSlot].keyLast) { - if (numOfBlocks == 2) break; - if ((order == TSQL_SO_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break; - firstSlot = midSlot + 1; - } else if (skey < pBlock[midSlot].keyFirst) { - if ((order == TSQL_SO_ASC) && (skey > pBlock[midSlot - 1].keyLast)) break; - lastSlot = midSlot - 1; - } else { - break; // got the slot - } - } - - return midSlot; -} - -static int32_t binarySearchForBlock(SQuery *pQuery, int64_t key) { - return binarySearchForBlockImpl(pQuery->pBlock, pQuery->numOfBlocks, key, pQuery->order.order); -} - -#if 0 -/* unmap previous buffer */ -static UNUSED_FUNC int32_t resetMMapWindow(SHeaderFileInfo *pQueryFileInfo) { - munmap(pQueryFileInfo->pDataFileData, pQueryFileInfo->defaultMappingSize); - - pQueryFileInfo->dtFileMappingOffset = 0; - pQueryFileInfo->pDataFileData = mmap(NULL, pQueryFileInfo->defaultMappingSize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, - pQueryFileInfo->dataFd, pQueryFileInfo->dtFileMappingOffset); - if (pQueryFileInfo->pDataFileData == MAP_FAILED) { - dError("failed to mmaping data file:%s, reason:%s", pQueryFileInfo->dataFilePath, strerror(errno)); - return -1; - } - - return 0; -} - -static int32_t moveMMapWindow(SHeaderFileInfo *pQueryFileInfo, uint64_t offset) { - uint64_t upperBnd = (pQueryFileInfo->dtFileMappingOffset + pQueryFileInfo->defaultMappingSize - 1); - - /* data that are located in current mmapping window */ - if ((offset >= pQueryFileInfo->dtFileMappingOffset && offset <= upperBnd) && - pQueryFileInfo->pDataFileData != MAP_FAILED) { - // if it mapping failed, try again when it is called. - return 0; - } - - /* - * 1. there is import data that locate farther from the beginning, but with less timestamp, so we need to move the - * window backwards - * 2. otherwise, move the mmaping window forward - */ - upperBnd = (offset / pQueryFileInfo->defaultMappingSize + 1) * pQueryFileInfo->defaultMappingSize - 1; - - /* unmap previous buffer */ - if (pQueryFileInfo->pDataFileData != MAP_FAILED) { - int32_t ret = munmap(pQueryFileInfo->pDataFileData, pQueryFileInfo->defaultMappingSize); - pQueryFileInfo->pDataFileData = MAP_FAILED; - if (ret != 0) { - dError("failed to unmmaping data file:%s, handle:%d, offset:%ld, reason:%s", pQueryFileInfo->dataFilePath, - pQueryFileInfo->dataFd, pQueryFileInfo->dtFileMappingOffset, strerror(errno)); - return -1; - } - } - - /* mmap from the new position */ - pQueryFileInfo->dtFileMappingOffset = upperBnd - pQueryFileInfo->defaultMappingSize + 1; - pQueryFileInfo->pDataFileData = mmap(NULL, pQueryFileInfo->defaultMappingSize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, - pQueryFileInfo->dataFd, pQueryFileInfo->dtFileMappingOffset); - if (pQueryFileInfo->pDataFileData == MAP_FAILED) { - dError("failed to mmaping data file:%s, handle:%d, offset:%ld, reason:%s", pQueryFileInfo->dataFilePath, - pQueryFileInfo->dataFd, pQueryFileInfo->dtFileMappingOffset, strerror(errno)); - return -1; - } - - /* advise kernel the usage of mmaped data */ - if (madvise(pQueryFileInfo->pDataFileData, pQueryFileInfo->defaultMappingSize, MADV_SEQUENTIAL) == -1) { - dError("failed to advise kernel the usage of data file:%s, handle:%d, reason:%s", pQueryFileInfo->dataFilePath, - pQueryFileInfo->dataFd, strerror(errno)); - } - - return 0; -} - -static int32_t copyDataFromMMapBuffer(int fd, SQInfo *pQInfo, SHeaderFileInfo *pQueryFile, char *buf, uint64_t offset, - int32_t size) { - assert(size >= 0); - - int32_t ret = moveMMapWindow(pQueryFile, offset); - dTrace("QInfo:%p finished move to correct position:%ld", pQInfo, taosGetTimestampUs()); - - if (pQueryFile->pDataFileData == MAP_FAILED || ret != TSDB_CODE_SUCCESS) { - dTrace("QInfo:%p move window failed. ret:%d", pQInfo, ret); - return -1; - } - - uint64_t upperBnd = pQueryFile->dtFileMappingOffset + pQueryFile->defaultMappingSize - 1; - - /* data are enclosed in current mmap window */ - if (offset + size <= upperBnd) { - uint64_t startPos = offset - pQueryFile->dtFileMappingOffset; - memcpy(buf, pQueryFile->pDataFileData + startPos, size); - - dTrace("QInfo:%p copy data completed, size:%d, time:%ld", pQInfo, size, taosGetTimestampUs()); - - } else { - uint32_t firstPart = upperBnd - offset + 1; - memcpy(buf, pQueryFile->pDataFileData + (offset - pQueryFile->dtFileMappingOffset), firstPart); - - dTrace("QInfo:%p copy data first part,size:%d, time:%ld", pQInfo, firstPart, taosGetTimestampUs()); - - char *dst = buf + firstPart; - - /* remain data */ - uint32_t remain = size - firstPart; - while (remain > 0) { - int32_t ret1 = moveMMapWindow(pQueryFile, pQueryFile->dtFileMappingOffset + pQueryFile->defaultMappingSize); - if (ret1 != 0) { - return ret1; - } - - uint32_t len = (remain > pQueryFile->defaultMappingSize) ? pQueryFile->defaultMappingSize : remain; - - /* start from the 0 position */ - memcpy(dst, pQueryFile->pDataFileData, len); - remain -= len; - dst += len; - - dTrace("QInfo:%p copy data part,size:%d, time:%ld", pQInfo, len, taosGetTimestampUs()); - } - } - - return 0; -} - -#endif - -static int32_t readDataFromDiskFile(int fd, SQInfo *pQInfo, SQueryFilesInfo *pQueryFile, char *buf, uint64_t offset, - int32_t size) { - assert(size >= 0); - - int32_t ret = (int32_t)lseek(fd, offset, SEEK_SET); - if (ret == -1) { - // qTrace("QInfo:%p seek failed, reason:%s", pQInfo, strerror(errno)); - return -1; - } - - ret = read(fd, buf, size); - // qTrace("QInfo:%p read data %d completed", pQInfo, size); - return 0; -} - -static int32_t loadColumnIntoMem(SQuery *pQuery, SQueryFilesInfo *pQueryFileInfo, SCompBlock *pBlock, SField *pFields, - int32_t col, SData *sdata, void *tmpBuf, char *buffer, int32_t buffersize) { - char *dst = (pBlock->algorithm) ? tmpBuf : sdata->data; - - int64_t offset = pBlock->offset + pFields[col].offset; - SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - - int fd = pBlock->last ? pQueryFileInfo->lastFd : pQueryFileInfo->dataFd; - int32_t ret = readDataFromDiskFile(fd, pQInfo, pQueryFileInfo, dst, offset, pFields[col].len); - if (ret != 0) { - return ret; - } - - // load checksum - TSCKSUM checksum = 0; - ret = readDataFromDiskFile(fd, pQInfo, pQueryFileInfo, (char *)&checksum, offset + pFields[col].len, sizeof(TSCKSUM)); - if (ret != 0) { - return ret; - } - - // check column data integrity - if (checksum != taosCalcChecksum(0, (const uint8_t *)dst, pFields[col].len)) { - dLError("QInfo:%p, column data checksum error, file:%s, col: %d, offset:%" PRId64, GET_QINFO_ADDR(pQuery), - pQueryFileInfo->dataFilePath, col, offset); - - return -1; - } - - if (pBlock->algorithm) { - (*pDecompFunc[pFields[col].type])(tmpBuf, pFields[col].len, pBlock->numOfPoints, sdata->data, - pFields[col].bytes * pBlock->numOfPoints, pBlock->algorithm, buffer, buffersize); - } - - return 0; -} - -static int32_t loadDataBlockFieldsInfo(SQueryRuntimeEnv *pRuntimeEnv, SCompBlock *pBlock, SField **pField) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SQInfo * pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - SMeterObj * pMeterObj = pRuntimeEnv->pMeterObj; - SQueryFilesInfo *pVnodeFilesInfo = &pRuntimeEnv->vnodeFileInfo; - - size_t size = sizeof(SField) * (pBlock->numOfCols) + sizeof(TSCKSUM); - - // if *pField != NULL, this block is loaded once, in current query do nothing - if (*pField == NULL) { // load the fields information once - *pField = malloc(size); - } - - SQueryCostSummary *pSummary = &pRuntimeEnv->summary; - pSummary->totalFieldSize += size; - pSummary->readField++; - pSummary->numOfSeek++; - - int64_t st = taosGetTimestampUs(); - - int fd = pBlock->last ? pVnodeFilesInfo->lastFd : pVnodeFilesInfo->dataFd; - int32_t ret = readDataFromDiskFile(fd, pQInfo, pVnodeFilesInfo, (char *)(*pField), pBlock->offset, size); - if (ret != 0) { - return ret; - } - - // check fields integrity - if (!taosCheckChecksumWhole((uint8_t *)(*pField), size)) { - dLError("QInfo:%p vid:%d sid:%d id:%s, slot:%d, failed to read sfields, file:%s, sfields area broken:%" PRId64, - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->slot, pVnodeFilesInfo->dataFilePath, - pBlock->offset); - return -1; - } - - int64_t et = taosGetTimestampUs(); - qTrace("QInfo:%p vid:%d sid:%d id:%s, slot:%d, load field info, size:%d, elapsed:%f ms", pQInfo, pMeterObj->vnode, - pMeterObj->sid, pMeterObj->meterId, pQuery->slot, size, (et - st) / 1000.0); - - pSummary->loadFieldUs += (et - st); - return 0; -} - -static void fillWithNull(SQuery *pQuery, char *dst, int32_t col, int32_t numOfPoints) { - int32_t bytes = pQuery->colList[col].data.bytes; - int32_t type = pQuery->colList[col].data.type; - - setNullN(dst, type, bytes, numOfPoints); -} - -static int32_t loadPrimaryTSColumn(SQueryRuntimeEnv *pRuntimeEnv, SCompBlock *pBlock, SField **pField, - int32_t *columnBytes) { - SQuery *pQuery = pRuntimeEnv->pQuery; - assert(PRIMARY_TSCOL_LOADED(pQuery) == false); - - if (columnBytes != NULL) { - (*columnBytes) += (*pField)[PRIMARYKEY_TIMESTAMP_COL_INDEX].len + sizeof(TSCKSUM); - } - - int32_t ret = loadColumnIntoMem(pQuery, &pRuntimeEnv->vnodeFileInfo, pBlock, *pField, PRIMARYKEY_TIMESTAMP_COL_INDEX, - pRuntimeEnv->primaryColBuffer, pRuntimeEnv->unzipBuffer, - pRuntimeEnv->secondaryUnzipBuffer, pRuntimeEnv->unzipBufSize); - return ret; -} - -static int32_t loadDataBlockIntoMem(SCompBlock *pBlock, SField **pField, SQueryRuntimeEnv *pRuntimeEnv, int32_t fileIdx, - bool loadPrimaryCol, bool loadSField) { - int32_t i = 0, j = 0; - - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - SData ** sdata = pRuntimeEnv->colDataBuffer; - - assert(fileIdx == pRuntimeEnv->vnodeFileInfo.current); - - SData **primaryTSBuf = &pRuntimeEnv->primaryColBuffer; - void * tmpBuf = pRuntimeEnv->unzipBuffer; - int32_t columnBytes = 0; - - SQueryCostSummary *pSummary = &pRuntimeEnv->summary; - - int32_t status = vnodeIsDatablockLoaded(pRuntimeEnv, pMeterObj, fileIdx, loadPrimaryCol); - if (status == DISK_BLOCK_NO_NEED_TO_LOAD) { - dTrace( - "QInfo:%p vid:%d sid:%d id:%s, fileId:%d, data block has been loaded, no need to load again, ts:%d, slot:%d," - " brange:%lld-%lld, rows:%d", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->fileId, loadPrimaryCol, - pQuery->slot, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfPoints); - - if (loadSField && (pQuery->pFields == NULL || pQuery->pFields[pQuery->slot] == NULL)) { - loadDataBlockFieldsInfo(pRuntimeEnv, pBlock, &pQuery->pFields[pQuery->slot]); - } - - return TSDB_CODE_SUCCESS; - } else if (status == DISK_BLOCK_LOAD_TS) { - dTrace("QInfo:%p vid:%d sid:%d id:%s, fileId:%d, data block has been loaded, incrementally load ts", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->fileId); - - assert(PRIMARY_TSCOL_LOADED(pQuery) == false && loadSField == true); - if (pQuery->pFields == NULL || pQuery->pFields[pQuery->slot] == NULL) { - loadDataBlockFieldsInfo(pRuntimeEnv, pBlock, &pQuery->pFields[pQuery->slot]); - } - - // load primary timestamp - int32_t ret = loadPrimaryTSColumn(pRuntimeEnv, pBlock, pField, &columnBytes); - - vnodeSetDataBlockInfoLoaded(pRuntimeEnv, pMeterObj, fileIdx, loadPrimaryCol); - return ret; - } - - /* failed to load fields info, return with error info */ - if (loadSField && (loadDataBlockFieldsInfo(pRuntimeEnv, pBlock, pField) != 0)) { - return -1; - } - - int64_t st = taosGetTimestampUs(); - - if (loadPrimaryCol) { - if (PRIMARY_TSCOL_LOADED(pQuery)) { - *primaryTSBuf = sdata[0]; - } else { - int32_t ret = loadPrimaryTSColumn(pRuntimeEnv, pBlock, pField, &columnBytes); - if (ret != TSDB_CODE_SUCCESS) { - return ret; - } - - pSummary->numOfSeek++; - j += 1; // first column of timestamp is not needed to be read again - } - } - - int32_t ret = 0; - - /* the first round always be 1, the secondary round is determined by queried function */ - int32_t round = (IS_MASTER_SCAN(pRuntimeEnv)) ? 0 : 1; - - while (j < pBlock->numOfCols && i < pQuery->numOfCols) { - if ((*pField)[j].colId < pQuery->colList[i].data.colId) { - ++j; - } else if ((*pField)[j].colId == pQuery->colList[i].data.colId) { - // add additional check for data type - if ((*pField)[j].type != pQuery->colList[i].data.type) { - ret = TSDB_CODE_INVALID_QUERY_MSG; - break; - } - - /* - * during supplementary scan: - * 1. primary ts column (always loaded) - * 2. query specified columns - * 3. in case of filter column required, filter columns must be loaded. - */ - if (pQuery->colList[i].req[round] == 1 || pQuery->colList[i].data.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { - // if data of this column in current block are all null, do NOT read it from disk - if ((*pField)[j].numOfNullPoints == pBlock->numOfPoints) { - fillWithNull(pQuery, sdata[i]->data, i, pBlock->numOfPoints); - } else { - columnBytes += (*pField)[j].len + sizeof(TSCKSUM); - ret = loadColumnIntoMem(pQuery, &pRuntimeEnv->vnodeFileInfo, pBlock, *pField, j, sdata[i], tmpBuf, - pRuntimeEnv->secondaryUnzipBuffer, pRuntimeEnv->unzipBufSize); - - pSummary->numOfSeek++; - } - } - ++i; - ++j; - } else { - /* - * pQuery->colList[i].colIdx < (*pFields)[j].colId this column is not existed in current block, - * fill with NULL value - */ - fillWithNull(pQuery, sdata[i]->data, i, pBlock->numOfPoints); - - pSummary->totalGenData += (pBlock->numOfPoints * pQuery->colList[i].data.bytes); - ++i; - } - } - - if (j >= pBlock->numOfCols && i < pQuery->numOfCols) { - // remain columns need to set null value - while (i < pQuery->numOfCols) { - fillWithNull(pQuery, sdata[i]->data, i, pBlock->numOfPoints); - - pSummary->totalGenData += (pBlock->numOfPoints * pQuery->colList[i].data.bytes); - ++i; - } - } - - int64_t et = taosGetTimestampUs(); - qTrace("QInfo:%p vid:%d sid:%d id:%s, slot:%d, load block completed, ts loaded:%d, rec:%d, elapsed:%f ms", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->slot, loadPrimaryCol, - pBlock->numOfPoints, (et - st) / 1000.0); - - pSummary->totalBlockSize += columnBytes; - pSummary->loadBlocksUs += (et - st); - pSummary->readDiskBlocks++; - - vnodeSetDataBlockInfoLoaded(pRuntimeEnv, pMeterObj, fileIdx, loadPrimaryCol); - return ret; -} - -SBlockInfo getBlockBasicInfo(SQueryRuntimeEnv *pRuntimeEnv, void *pBlock, int32_t blockType) { - SBlockInfo blockInfo = {0}; - if (IS_FILE_BLOCK(blockType)) { - SCompBlock *pDiskBlock = (SCompBlock *)pBlock; - - blockInfo.keyFirst = pDiskBlock->keyFirst; - blockInfo.keyLast = pDiskBlock->keyLast; - blockInfo.size = pDiskBlock->numOfPoints; - blockInfo.numOfCols = pDiskBlock->numOfCols; - } else { - SCacheBlock *pCacheBlock = (SCacheBlock *)pBlock; - - blockInfo.keyFirst = getTimestampInCacheBlock(pRuntimeEnv, pCacheBlock, 0); - blockInfo.keyLast = getTimestampInCacheBlock(pRuntimeEnv, pCacheBlock, pCacheBlock->numOfPoints - 1); - blockInfo.size = pCacheBlock->numOfPoints; - blockInfo.numOfCols = pCacheBlock->pMeterObj->numOfColumns; - } - - return blockInfo; -} - -/** - * - * @param pQuery - * @param pBlockInfo - * @param forwardStep - * @return TRUE means query not completed, FALSE means query is completed - */ -static bool queryPausedInCurrentBlock(SQuery *pQuery, SBlockInfo *pBlockInfo, int32_t forwardStep) { - // current query completed - if ((pQuery->lastKey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->lastKey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - setQueryStatus(pQuery, QUERY_COMPLETED); - return true; - } - - // output buffer is full, pause current query - if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) { - assert((QUERY_IS_ASC_QUERY(pQuery) && forwardStep + pQuery->pos <= pBlockInfo->size) || - (!QUERY_IS_ASC_QUERY(pQuery) && pQuery->pos - forwardStep + 1 >= 0)); - - return true; - } - - if (Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED)) { - return true; - } - - // query completed - if ((pQuery->ekey <= pBlockInfo->keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->ekey >= pBlockInfo->keyFirst && !QUERY_IS_ASC_QUERY(pQuery))) { - setQueryStatus(pQuery, QUERY_COMPLETED); - return true; - } - - return false; -} - -/** - * save triple tuple of (fileId, slot, pos) to SPositionInfo - */ -void savePointPosition(SPositionInfo *position, int32_t fileId, int32_t slot, int32_t pos) { - /* - * slot == -1 && pos == -1 means no data left anymore - */ - assert(fileId >= -1 && slot >= -1 && pos >= -1); - - position->fileId = fileId; - position->slot = slot; - position->pos = pos; -} - -bool isCacheBlockValid(SQuery *pQuery, SCacheBlock *pBlock, SMeterObj *pMeterObj, int32_t slot) { - if (pMeterObj != pBlock->pMeterObj || pBlock->blockId > pQuery->blockId) { - SMeterObj *pNewMeterObj = pBlock->pMeterObj; - char * id = (pNewMeterObj != NULL) ? pNewMeterObj->meterId : NULL; - - dWarn( - "QInfo:%p vid:%d sid:%d id:%s, cache block is overwritten, slot:%d blockId:%d qBlockId:%d, meterObj:%p, " - "blockMeterObj:%p, blockMeter id:%s, first:%d, last:%d, numOfBlocks:%d", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->slot, pBlock->blockId, - pQuery->blockId, pMeterObj, pNewMeterObj, id, pQuery->firstSlot, pQuery->currentSlot, pQuery->numOfBlocks); - - return false; - } - - /* - * The check for empty block: - * pBlock->numOfPoints == 0. There is a empty block, which is caused by allocate-and-write data into cache - * procedure. The block has been allocated but data has not been put into yet. If the block is the last - * block(newly allocated block), abort query. Otherwise, skip it and go on. - */ - if (pBlock->numOfPoints == 0) { - dWarn( - "QInfo:%p vid:%d sid:%d id:%s, cache block is empty. slot:%d first:%d, last:%d, numOfBlocks:%d," - "allocated but not write data yet.", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, slot, pQuery->firstSlot, - pQuery->currentSlot, pQuery->numOfBlocks); - - return false; - } - - SCacheInfo* pCacheInfo = (SCacheInfo*) pMeterObj->pCache; - if (pCacheInfo->commitPoint == pMeterObj->pointsPerBlock && pQuery->slot == pCacheInfo->currentSlot) { - dWarn("QInfo:%p vid:%d sid:%d id:%s, cache block is committed, ignore. slot:%d first:%d, last:%d, numOfBlocks:%d", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, slot, pQuery->firstSlot, - pQuery->currentSlot, pQuery->numOfBlocks); - return false; - } - - return true; -} - -// todo all functions that call this function should check the returned data blocks status -SCacheBlock *getCacheDataBlock(SMeterObj *pMeterObj, SQueryRuntimeEnv *pRuntimeEnv, int32_t slot) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - if (pCacheInfo == NULL || pCacheInfo->cacheBlocks == NULL || slot < 0 || slot >= pCacheInfo->maxBlocks) { - return NULL; - } - - vnodeFreeFields(pQuery); - getBasicCacheInfoSnapshot(pQuery, pCacheInfo, pMeterObj->vnode); - - SCacheBlock *pBlock = pCacheInfo->cacheBlocks[slot]; - if (pBlock == NULL) { // the cache info snapshot must be existed. - int32_t curNumOfBlocks = pCacheInfo->numOfBlocks; - int32_t curSlot = pCacheInfo->currentSlot; - - dError( - "QInfo:%p NULL Block In Cache, snapshot (available blocks:%d, last block:%d), current (available blocks:%d, " - "last block:%d), accessed null block:%d, pBlockId:%d", - GET_QINFO_ADDR(pQuery), pQuery->numOfBlocks, pQuery->currentSlot, curNumOfBlocks, curSlot, slot, - pQuery->blockId); - - return NULL; - } - - // block is empty or block does not belongs to current table, return NULL value - if (!isCacheBlockValid(pQuery, pBlock, pMeterObj, slot)) { - return NULL; - } - - // the accessed cache block has been loaded already, return directly - if (vnodeIsDatablockLoaded(pRuntimeEnv, pMeterObj, -1, true) == DISK_BLOCK_NO_NEED_TO_LOAD) { - TSKEY skey = getTimestampInCacheBlock(pRuntimeEnv, pBlock, 0); - TSKEY ekey = getTimestampInCacheBlock(pRuntimeEnv, pBlock, pBlock->numOfPoints - 1); - - dTrace( - "QInfo:%p vid:%d sid:%d id:%s, fileId:%d, cache block has been loaded, no need to load again, ts:%d, " - "slot:%d, brange:%lld-%lld, rows:%d", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->fileId, 1, pQuery->slot, - skey, ekey, pBlock->numOfPoints); - - return &pRuntimeEnv->cacheBlock; - } - - // keep the structure as well as the block data into local buffer - memcpy(&pRuntimeEnv->cacheBlock, pBlock, sizeof(SCacheBlock)); - - SCacheBlock *pNewBlock = &pRuntimeEnv->cacheBlock; - - // the commit data points will be ignored - int32_t offset = 0; - int32_t numOfPoints = pNewBlock->numOfPoints; - if (pQuery->firstSlot == pQuery->commitSlot) { - assert(pQuery->commitPoint >= 0 && pQuery->commitPoint <= pNewBlock->numOfPoints); - - offset = pQuery->commitPoint; - numOfPoints = pNewBlock->numOfPoints - offset; - - if (offset != 0) { - dTrace( - "%p ignore the data in cache block that are commit already, numOfblock:%d slot:%d ignore points:%d. " - "first:%d last:%d", - GET_QINFO_ADDR(pQuery), pQuery->numOfBlocks, pQuery->slot, pQuery->commitPoint, pQuery->firstSlot, - pQuery->currentSlot); - } - - pNewBlock->numOfPoints = numOfPoints; - - // current block are all commit already, ignore it - if (pNewBlock->numOfPoints == 0) { - dTrace( - "%p ignore current in cache block that are all commit already, numOfblock:%d slot:%d" - "first:%d last:%d", - GET_QINFO_ADDR(pQuery), pQuery->numOfBlocks, pQuery->slot, pQuery->firstSlot, pQuery->currentSlot); - return NULL; - } - } - - // keep the data from in cache into the temporarily allocated buffer - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - SColumnInfoEx *pColumnInfoEx = &pQuery->colList[i]; - - int16_t columnIndex = pColumnInfoEx->colIdx; - int16_t columnIndexInBuf = pColumnInfoEx->colIdxInBuf; - - SColumn *pCol = &pMeterObj->schema[columnIndex]; - - int16_t bytes = pCol->bytes; - int16_t type = pCol->type; - - char *dst = pRuntimeEnv->colDataBuffer[columnIndexInBuf]->data; - - if (pQuery->colList[i].colIdx != -1) { - assert(pCol->colId == pQuery->colList[i].data.colId && bytes == pColumnInfoEx->data.bytes && - type == pColumnInfoEx->data.type); - - memcpy(dst, pBlock->offset[columnIndex] + offset * bytes, numOfPoints * bytes); - } else { - setNullN(dst, type, bytes, numOfPoints); - } - } - - assert(numOfPoints == pNewBlock->numOfPoints); - - // if the primary timestamp are not loaded by default, always load it here into buffer - if (!PRIMARY_TSCOL_LOADED(pQuery)) { - memcpy(pRuntimeEnv->primaryColBuffer->data, pBlock->offset[0] + offset * TSDB_KEYSIZE, TSDB_KEYSIZE * numOfPoints); - } - - pQuery->fileId = -1; - pQuery->slot = slot; - - if (!isCacheBlockValid(pQuery, pNewBlock, pMeterObj, slot)) { - return NULL; - } - - /* - * the accessed cache block still belongs to current meterObj, go on - * update the load data block info - */ - vnodeSetDataBlockInfoLoaded(pRuntimeEnv, pMeterObj, -1, true); - - TSKEY skey = getTimestampInCacheBlock(pRuntimeEnv, pNewBlock, 0); - TSKEY ekey = getTimestampInCacheBlock(pRuntimeEnv, pNewBlock, numOfPoints - 1); - - dTrace("QInfo:%p vid:%d sid:%d id:%s, fileId:%d, load cache block, ts:%d, slot:%d, brange:%lld-%lld, rows:%d", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->fileId, 1, pQuery->slot, - skey, ekey, numOfPoints); - - return pNewBlock; -} - -static SCompBlock *getDiskDataBlock(SQuery *pQuery, int32_t slot) { - assert(pQuery->fileId >= 0 && slot >= 0 && slot < pQuery->numOfBlocks && pQuery->pBlock != NULL); - return &pQuery->pBlock[slot]; -} - -static void *getGenericDataBlock(SMeterObj *pMeterObj, SQueryRuntimeEnv *pRuntimeEnv, int32_t slot) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - if (IS_DISK_DATA_BLOCK(pQuery)) { - return getDiskDataBlock(pQuery, slot); - } else { - return getCacheDataBlock(pMeterObj, pRuntimeEnv, slot); - } -} - -SBlockInfo getBlockInfo(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - void *pBlock = getGenericDataBlock(pRuntimeEnv->pMeterObj, pRuntimeEnv, pQuery->slot); - assert(pBlock != NULL); - - int32_t blockType = IS_DISK_DATA_BLOCK(pQuery) ? BLK_FILE_BLOCK : BLK_CACHE_BLOCK; - return getBlockBasicInfo(pRuntimeEnv, pBlock, blockType); -} - -static int32_t getFileIdFromKey(int32_t vid, TSKEY key) { - SVnodeObj *pVnode = &vnodeList[vid]; - int64_t delta = (int64_t)pVnode->cfg.daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision]; - - return (int32_t)(key / delta); // set the starting fileId -} - -enum { - QUERY_RANGE_LESS_EQUAL = 0, - QUERY_RANGE_GREATER_EQUAL = 1, -}; - -static bool getQualifiedDataBlock(SMeterObj *pMeterObj, SQueryRuntimeEnv *pRuntimeEnv, int32_t type, - __block_search_fn_t searchFn) { - int32_t blkIdx = -1; - int32_t fid = -1; - int32_t step = (type == QUERY_RANGE_GREATER_EQUAL) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP; - - SQuery *pQuery = pRuntimeEnv->pQuery; - pQuery->slot = -1; - - TSKEY key = pQuery->lastKey; - - SData *primaryColBuffer = pRuntimeEnv->primaryColBuffer; - pQuery->fileId = getFileIdFromKey(pMeterObj->vnode, key) - step; - - while (1) { - if ((fid = getNextDataFileCompInfo(pRuntimeEnv, pMeterObj, step)) < 0) { - break; - } - - blkIdx = binarySearchForBlock(pQuery, key); - - if (type == QUERY_RANGE_GREATER_EQUAL) { - if (key <= pQuery->pBlock[blkIdx].keyLast) { - break; - } else { - blkIdx = -1; - } - } else { - if (key >= pQuery->pBlock[blkIdx].keyFirst) { - break; - } else { - blkIdx = -1; - } - } - } - - /* failed to find qualified point in file, abort */ - if (blkIdx == -1) { - return false; - } - - assert(blkIdx >= 0 && blkIdx < pQuery->numOfBlocks); - - // load first data block into memory failed, caused by disk block error - bool blockLoaded = false; - while (blkIdx < pQuery->numOfBlocks && blkIdx >= 0) { - pQuery->slot = blkIdx; - if (loadDataBlockIntoMem(&pQuery->pBlock[pQuery->slot], &pQuery->pFields[pQuery->slot], pRuntimeEnv, fid, true, - true) == 0) { - SET_DATA_BLOCK_LOADED(pRuntimeEnv->blockStatus); - blockLoaded = true; - break; - } - - dError("QInfo:%p fileId:%d total numOfBlks:%d blockId:%d load into memory failed due to error in disk files", - GET_QINFO_ADDR(pQuery), pQuery->fileId, pQuery->numOfBlocks, blkIdx); - blkIdx += step; - } - - // failed to load data from disk, abort current query - if (blockLoaded == false) { - return false; - } - - SCompBlock *pBlocks = getDiskDataBlock(pQuery, blkIdx); - - // search qualified points in blk, according to primary key (timestamp) column - pQuery->pos = searchFn(primaryColBuffer->data, pBlocks->numOfPoints, key, pQuery->order.order); - assert(pQuery->pos >= 0 && pQuery->fileId >= 0 && pQuery->slot >= 0); - - return true; -} - -static SField *getFieldInfo(SQuery *pQuery, SBlockInfo *pBlockInfo, SField *pFields, int32_t column) { - // no SField info exist, or column index larger than the output column, no result. - if (pFields == NULL || column >= pQuery->numOfOutputCols) { - return NULL; - } - - SColIndexEx *pColIndexEx = &pQuery->pSelectExpr[column].pBase.colInfo; - - // for a tag column, no corresponding field info - if (TSDB_COL_IS_TAG(pColIndexEx->flag)) { - return NULL; - } - - /* - * Choose the right column field info by field id, since the file block may be out of date, - * which means the newest table schema is not equalled to the schema of this block. - */ - for (int32_t i = 0; i < pBlockInfo->numOfCols; ++i) { - if (pColIndexEx->colId == pFields[i].colId) { - return &pFields[i]; - } - } - - return NULL; -} - -/* - * not null data in two cases: - * 1. tags data: isTag == true; - * 2. data locate in file, numOfNullPoints == 0 or pFields does not needed to be loaded - */ -static bool hasNullVal(SQuery *pQuery, int32_t col, SBlockInfo *pBlockInfo, SField *pFields, bool isDiskFileBlock) { - bool ret = true; - - if (TSDB_COL_IS_TAG(pQuery->pSelectExpr[col].pBase.colInfo.flag)) { - ret = false; - } else if (isDiskFileBlock) { - if (pFields == NULL) { - ret = false; - } else { - SField *pField = getFieldInfo(pQuery, pBlockInfo, pFields, col); - if (pField != NULL && pField->numOfNullPoints == 0) { - ret = false; - } - } - } - - return ret; -} - -static char *doGetDataBlocks(SQuery *pQuery, SData **data, int32_t colIdx) { - assert(colIdx >= 0 && colIdx < pQuery->numOfCols); - char *pData = data[colIdx]->data; - return pData; -} - -static char *getDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; - - char *dataBlock = NULL; - - int32_t functionId = pQuery->pSelectExpr[col].pBase.functionId; - - if (functionId == TSDB_FUNC_ARITHM) { - sas->pExpr = &pQuery->pSelectExpr[col]; - - // set the start offset to be the lowest start position, no matter asc/desc query order - if (QUERY_IS_ASC_QUERY(pQuery)) { - pCtx->startOffset = pQuery->pos; - } else { - pCtx->startOffset = pQuery->pos - (size - 1); - } - - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - SColumnInfo *pColMsg = &pQuery->colList[i].data; - char * pData = doGetDataBlocks(pQuery, pRuntimeEnv->colDataBuffer, pQuery->colList[i].colIdxInBuf); - - sas->elemSize[i] = pColMsg->bytes; - sas->data[i] = pData + pCtx->startOffset * sas->elemSize[i]; // start from the offset - } - - sas->numOfCols = pQuery->numOfCols; - sas->offset = 0; - } else { // other type of query function - SColIndexEx *pCol = &pQuery->pSelectExpr[col].pBase.colInfo; - if (TSDB_COL_IS_TAG(pCol->flag)) { - dataBlock = NULL; - } else { - /* - * the colIdx is acquired from the first meter of all qualified meters in this vnode during query prepare stage, - * the remain meter may not have the required column in cache actually. - * So, the validation of required column in cache with the corresponding meter schema is reinforced. - */ - dataBlock = doGetDataBlocks(pQuery, pRuntimeEnv->colDataBuffer, pCol->colIdxInBuf); - } - } - - return dataBlock; -} - -static SWindowResult *getWindowResult(SWindowResInfo *pWindowResInfo, int32_t slot) { - assert(pWindowResInfo != NULL && slot >= 0 && slot < pWindowResInfo->size); - return &pWindowResInfo->pResult[slot]; -} - -static bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot) { - return (getWindowResult(pWindowResInfo, slot)->status.closed == true); -} - -static int32_t curTimeWindow(SWindowResInfo *pWindowResInfo) { - assert(pWindowResInfo->curIndex >= 0 && pWindowResInfo->curIndex < pWindowResInfo->size); - return pWindowResInfo->curIndex; -} - -static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData, - int16_t bytes) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - int32_t *p1 = (int32_t *)taosHashGet(pWindowResInfo->hashList, pData, bytes); - if (p1 != NULL) { - pWindowResInfo->curIndex = *p1; - } else { // more than the capacity, reallocate the resources - if (pWindowResInfo->size >= pWindowResInfo->capacity) { - int64_t newCap = pWindowResInfo->capacity * 2; - - char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult)); - if (t != NULL) { - pWindowResInfo->pResult = (SWindowResult *)t; - memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity); - } else { - // todo - } - - for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) { - SPosInfo pos = {-1, -1}; - createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos); - } - - pWindowResInfo->capacity = newCap; - } - - // add a new result set for a new group - pWindowResInfo->curIndex = pWindowResInfo->size++; - taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t)); - } - - return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex); -} - -// get the correct time window according to the handled timestamp -static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) { - STimeWindow w = {0}; - - if (pWindowResInfo->curIndex == -1) { // the first window, from the previous stored value - w.skey = pWindowResInfo->prevSKey; - w.ekey = w.skey + pQuery->intervalTime - 1; - } else { - int32_t slot = curTimeWindow(pWindowResInfo); - w = getWindowResult(pWindowResInfo, slot)->window; - } - - if (w.skey > ts || w.ekey < ts) { - int64_t st = w.skey; - - if (st > ts) { - st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime; - } - - int64_t et = st + pQuery->intervalTime - 1; - if (et < ts) { - st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime; - } - - w.skey = st; - w.ekey = w.skey + pQuery->intervalTime - 1; - } - - /* - * query border check, skey should not be bounded by the query time range, since the value skey will - * be used as the time window index value. So we only change ekey of time window accordingly. - */ - if (w.ekey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) { - w.ekey = pQuery->ekey; - } - - assert(ts >= w.skey && ts <= w.ekey && w.skey != 0); - - return w; -} - -static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid, - int32_t numOfRowsPerPage) { - if (pWindowRes->pos.pageId != -1) { - return 0; - } - - tFilePage *pData = NULL; - - // in the first scan, new space needed for results - int32_t pageId = -1; - SIDList list = getDataBufPagesIdList(pResultBuf, sid); - - if (list.size == 0) { - pData = getNewDataBuf(pResultBuf, sid, &pageId); - } else { - pageId = getLastPageId(&list); - pData = getResultBufferPageById(pResultBuf, pageId); - - if (pData->numOfElems >= numOfRowsPerPage) { - pData = getNewDataBuf(pResultBuf, sid, &pageId); - if (pData != NULL) { - assert(pData->numOfElems == 0); // number of elements must be 0 for new allocated buffer - } - } - } - - if (pData == NULL) { - return -1; - } - - // set the number of rows in current disk page - if (pWindowRes->pos.pageId == -1) { // not allocated yet, allocate new buffer - pWindowRes->pos.pageId = pageId; - pWindowRes->pos.rowId = pData->numOfElems++; - } - - return 0; -} - -static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid, - STimeWindow *win) { - assert(win->skey <= win->ekey); - SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; - - SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE); - if (pWindowRes == NULL) { - return -1; - } - - // not assign result buffer yet, add new result buffer - if (pWindowRes->pos.pageId == -1) { - int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage); - if (ret != 0) { - return -1; - } - } - - // set time window for current result - pWindowRes->window = *win; - - setWindowResOutputBuf(pRuntimeEnv, pWindowRes); - initCtxOutputBuf(pRuntimeEnv); - - return TSDB_CODE_SUCCESS; -} - -static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) { - assert(slot >= 0 && slot < pWindowResInfo->size); - return &pWindowResInfo->pResult[slot].status; -} - -static int32_t getForwardStepsInBlock(int32_t numOfPoints, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos, - int16_t order, int64_t *pData) { - int32_t endPos = searchFn((char *)pData, numOfPoints, ekey, order); - int32_t forwardStep = 0; - - if (endPos >= 0) { - forwardStep = (order == TSQL_SO_ASC) ? (endPos - pos) : (pos - endPos); - assert(forwardStep >= 0); - - // endPos data is equalled to the key so, we do need to read the element in endPos - if (pData[endPos] == ekey) { - forwardStep += 1; - } - } - - return forwardStep; -} - -/** - * NOTE: the query status only set for the first scan of master scan. - */ -static void doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) { - SQuery *pQuery = pRuntimeEnv->pQuery; - if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) { - return; - } - - // no qualified results exist, abort check - if (pWindowResInfo->size == 0) { - return; - } - - // query completed - if ((lastKey >= pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (lastKey <= pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - closeAllTimeWindow(pWindowResInfo); - - pWindowResInfo->curIndex = pWindowResInfo->size - 1; - setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL); - } else { // set the current index to be the last unclosed window - int32_t i = 0; - int64_t skey = 0; - - for (i = 0; i < pWindowResInfo->size; ++i) { - SWindowResult *pResult = &pWindowResInfo->pResult[i]; - if (pResult->status.closed) { - continue; - } - - if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) || - (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) { - closeTimeWindow(pWindowResInfo, i); - } else { - skey = pResult->window.skey; - break; - } - } - - // all windows are closed, set the last one to be the skey - if (skey == 0) { - assert(i == pWindowResInfo->size); - pWindowResInfo->curIndex = pWindowResInfo->size - 1; - } else { - pWindowResInfo->curIndex = i; - } - - pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey; - - // the number of completed slots are larger than the threshold, dump to client immediately. - int32_t n = numOfClosedTimeWindow(pWindowResInfo); - if (n > pWindowResInfo->threshold) { - setQueryStatus(pQuery, QUERY_RESBUF_FULL); - } - - dTrace("QInfo:%p total window:%d, closed:%d", GET_QINFO_ADDR(pQuery), pWindowResInfo->size, n); - } - - assert(pWindowResInfo->prevSKey != 0); -} - -static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SBlockInfo *pBlockInfo, TSKEY *pPrimaryColumn, int32_t startPos, - TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) { - assert(startPos >= 0 && startPos < pBlockInfo->size); - - int32_t num = -1; - int32_t order = pQuery->order.order; - - int32_t step = GET_FORWARD_DIRECTION_FACTOR(order); - - if (QUERY_IS_ASC_QUERY(pQuery)) { - if (ekey < pBlockInfo->keyLast) { - num = getForwardStepsInBlock(pBlockInfo->size, searchFn, ekey, startPos, order, pPrimaryColumn); - if (num == 0) { // no qualified data in current block, do not update the lastKey value - assert(ekey < pPrimaryColumn[startPos]); - } else { - if (updateLastKey) { - pQuery->lastKey = pPrimaryColumn[startPos + (num - 1)] + step; - } - } - } else { - num = pBlockInfo->size - startPos; - if (updateLastKey) { - pQuery->lastKey = pBlockInfo->keyLast + step; - } - } - } else { // desc - if (ekey > pBlockInfo->keyFirst) { - num = getForwardStepsInBlock(pBlockInfo->size, searchFn, ekey, startPos, order, pPrimaryColumn); - if (num == 0) { // no qualified data in current block, do not update the lastKey value - assert(ekey > pPrimaryColumn[startPos]); - } else { - if (updateLastKey) { - pQuery->lastKey = pPrimaryColumn[startPos - (num - 1)] + step; - } - } - } else { - num = startPos + 1; - if (updateLastKey) { - pQuery->lastKey = pBlockInfo->keyFirst + step; - } - } - } - - assert(num >= 0); - return num; -} - -static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin, - int32_t startPos, int32_t forwardStep) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; - - if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) { - for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { - pCtx[k].nStartQueryTimestamp = pWin->skey; - pCtx[k].size = forwardStep; - pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? startPos : startPos - (forwardStep - 1); - - int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; - if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { - aAggs[functionId].xFunction(&pCtx[k]); - } - } - } -} - -static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin, - int32_t offset) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; - - if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) { - for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { - pCtx[k].nStartQueryTimestamp = pWin->skey; - - int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; - if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { - aAggs[functionId].xFunctionF(&pCtx[k], offset); - } - } - } -} - -static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin, - SWindowResInfo *pWindowResInfo, SBlockInfo *pBlockInfo, TSKEY *primaryKeys, - __block_search_fn_t searchFn) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - while (1) { - getNextTimeWindow(pQuery, pNextWin); - - if (pWindowResInfo->startTime > pNextWin->skey || (pNextWin->skey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (pNextWin->ekey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - return -1; - } - - // next time window is not in current block - if ((pNextWin->skey > pBlockInfo->keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (pNextWin->ekey < pBlockInfo->keyFirst && !QUERY_IS_ASC_QUERY(pQuery))) { - return -1; - } - - TSKEY startKey = QUERY_IS_ASC_QUERY(pQuery) ? pNextWin->skey : pNextWin->ekey; - int32_t startPos = searchFn((char *)primaryKeys, pBlockInfo->size, startKey, pQuery->order.order); - - /* - * This time window does not cover any data, try next time window, - * this case may happen when the time window is too small - */ - if ((primaryKeys[startPos] > pNextWin->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (primaryKeys[startPos] < pNextWin->skey && !QUERY_IS_ASC_QUERY(pQuery))) { - continue; - } - -// if (pNextWin->ekey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) { -// pNextWin->ekey = pQuery->ekey; -// } -// if (pNextWin->skey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery)) { -// pNextWin->skey = pQuery->ekey; -// } - - return startPos; - } -} - -static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) { - TSKEY ekey = -1; - if (QUERY_IS_ASC_QUERY(pQuery)) { - ekey = pWindow->ekey; - if (ekey > pQuery->ekey) { - ekey = pQuery->ekey; - } - } else { - ekey = pWindow->skey; - if (ekey < pQuery->ekey) { - ekey = pQuery->ekey; - } - } - - return ekey; -} - -/** - * - * @param pRuntimeEnv - * @param forwardStep - * @param primaryKeyCol - * @param pFields - * @param isDiskFileBlock - * @return the incremental number of output value, so it maybe 0 for fixed number of query, - * such as count/min/max etc. - */ -static int32_t blockwiseApplyAllFunctions(SQueryRuntimeEnv *pRuntimeEnv, int32_t forwardStep, SField *pFields, - SBlockInfo *pBlockInfo, SWindowResInfo *pWindowResInfo, - __block_search_fn_t searchFn) { - SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; - SQuery * pQuery = pRuntimeEnv->pQuery; - TSKEY * primaryKeyCol = (TSKEY *)pRuntimeEnv->primaryColBuffer->data; - - bool isDiskFileBlock = IS_FILE_BLOCK(pRuntimeEnv->blockStatus); - int64_t prevNumOfRes = getNumOfResult(pRuntimeEnv); - - SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutputCols, sizeof(SArithmeticSupport)); - - for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { - int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; - - SField dummyField = {0}; - - bool hasNull = hasNullVal(pQuery, k, pBlockInfo, pFields, isDiskFileBlock); - char *dataBlock = getDataBlocks(pRuntimeEnv, &sasArray[k], k, forwardStep); - - SField *tpField = NULL; - - if (pFields != NULL) { - tpField = getFieldInfo(pQuery, pBlockInfo, pFields, k); - /* - * Field info not exist, the required column is not present in current block, - * so all data must be null value in current block. - */ - if (tpField == NULL) { - tpField = &dummyField; - tpField->numOfNullPoints = (int32_t)forwardStep; - } - } - - setExecParams(pQuery, &pCtx[k], pQuery->skey, dataBlock, (char *)primaryKeyCol, forwardStep, functionId, tpField, - hasNull, pRuntimeEnv->blockStatus, &sasArray[k], pRuntimeEnv->scanFlag); - } - - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - if (isIntervalQuery(pQuery)) { - int32_t offset = GET_COL_DATA_POS(pQuery, 0, step); - TSKEY ts = primaryKeyCol[offset]; - - STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery); - if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pRuntimeEnv->pMeterObj->sid, &win) != TSDB_CODE_SUCCESS) { - return 0; - } - - TSKEY ekey = reviseWindowEkey(pQuery, &win); - forwardStep = getNumOfRowsInTimeWindow(pQuery, pBlockInfo, primaryKeyCol, pQuery->pos, ekey, searchFn, true); - - SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo)); - doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep); - - int32_t index = pWindowResInfo->curIndex; - STimeWindow nextWin = win; - - while (1) { - int32_t startPos = - getNextQualifiedWindow(pRuntimeEnv, &nextWin, pWindowResInfo, pBlockInfo, primaryKeyCol, searchFn); - if (startPos < 0) { - break; - } - - // null data, failed to allocate more memory buffer - int32_t sid = pRuntimeEnv->pMeterObj->sid; - if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, sid, &nextWin) != TSDB_CODE_SUCCESS) { - break; - } - - ekey = reviseWindowEkey(pQuery, &nextWin); - forwardStep = getNumOfRowsInTimeWindow(pQuery, pBlockInfo, primaryKeyCol, startPos, ekey, searchFn, true); - - pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo)); - doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep); - } - - pWindowResInfo->curIndex = index; - } else { - /* - * the sqlfunctionCtx parameters should be set done before all functions are invoked, - * since the selectivity + tag_prj query needs all parameters been set done. - * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY - */ - for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { - int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; - if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { - aAggs[functionId].xFunction(&pCtx[k]); - } - } - } - - /* - * No need to calculate the number of output results for group-by normal columns, interval query - * because the results of group by normal column is put into intermediate buffer. - */ - int32_t num = 0; - if (!isIntervalQuery(pQuery)) { - num = getNumOfResult(pRuntimeEnv) - prevNumOfRes; - } - - tfree(sasArray); - return (int32_t)num; -} - -/** - * if sfields is null - * 1. count(*)/spread(ts) is invoked - * 2. this column does not exists - * - * first filter the data block according to the value filter condition, then, if the top/bottom query applied, - * invoke the filter function to decide if the data block need to be accessed or not. - * TODO handle the whole data block is NULL situation - * @param pQuery - * @param pField - * @return - */ -static bool needToLoadDataBlock(SQuery *pQuery, SField *pField, SQLFunctionCtx *pCtx, int32_t numOfTotalPoints) { - if (pField == NULL) { - return false; // no need to load data - } - - for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { - SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k]; - int32_t colIndex = pFilterInfo->info.colIdx; - - // this column not valid in current data block - if (colIndex < 0 || pField[colIndex].colId != pFilterInfo->info.data.colId) { - continue; - } - - // not support pre-filter operation on binary/nchar data type - if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) { - continue; - } - - // all points in current column are NULL, no need to check its boundary value - if (pField[colIndex].numOfNullPoints == numOfTotalPoints) { - continue; - } - - if (pFilterInfo->info.data.type == TSDB_DATA_TYPE_FLOAT) { - float minval = *(double *)(&pField[colIndex].min); - float maxval = *(double *)(&pField[colIndex].max); - - for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) { - if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) { - return true; - } - } - } else { - for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) { - if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pField[colIndex].min, - (char *)&pField[colIndex].max)) { - return true; - } - } - } - } - - // todo disable this opt code block temporarily - // for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - // int32_t functId = pQuery->pSelectExpr[i].pBase.functionId; - // if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) { - // return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max); - // } - // } - - return true; -} - -int32_t initWindowResInfo(SWindowResInfo *pWindowResInfo, SQueryRuntimeEnv *pRuntimeEnv, int32_t size, - int32_t threshold, int16_t type) { - if (size < threshold) { - size = threshold; - } - - pWindowResInfo->capacity = size; - pWindowResInfo->threshold = threshold; - - pWindowResInfo->type = type; - - _hash_fn_t fn = taosGetDefaultHashFunction(type); - pWindowResInfo->hashList = taosHashInit(threshold, fn, false); - - pWindowResInfo->curIndex = -1; - pWindowResInfo->size = 0; - - // use the pointer arraylist - pWindowResInfo->pResult = calloc(threshold, sizeof(SWindowResult)); - for (int32_t i = 0; i < threshold; ++i) { - SPosInfo posInfo = {-1, -1}; - createQueryResultInfo(pRuntimeEnv->pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &posInfo); - } - - return TSDB_CODE_SUCCESS; -} - -void cleanupTimeWindowInfo(SWindowResInfo *pWindowResInfo, SQueryRuntimeEnv *pRuntimeEnv) { - if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0) { - assert(pWindowResInfo->hashList == NULL && pWindowResInfo->pResult == NULL); - return; - } - - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - SWindowResult *pResult = &pWindowResInfo->pResult[i]; - destroyTimeWindowRes(pResult, pRuntimeEnv->pQuery->numOfOutputCols); - } - - taosHashCleanup(pWindowResInfo->hashList); - tfree(pWindowResInfo->pResult); -} - -void resetTimeWindowInfo(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo) { - if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0) { - return; - } - - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - SWindowResult *pWindowRes = &pWindowResInfo->pResult[i]; - clearTimeWindowResBuf(pRuntimeEnv, pWindowRes); - } - - pWindowResInfo->curIndex = -1; - taosHashCleanup(pWindowResInfo->hashList); - pWindowResInfo->size = 0; - - _hash_fn_t fn = taosGetDefaultHashFunction(pWindowResInfo->type); - pWindowResInfo->hashList = taosHashInit(pWindowResInfo->capacity, fn, false); - - pWindowResInfo->startTime = 0; - pWindowResInfo->prevSKey = 0; -} - -void clearFirstNTimeWindow(SQueryRuntimeEnv *pRuntimeEnv, int32_t num) { - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0 || pWindowResInfo->size == 0 || num == 0) { - return; - } - - int32_t numOfClosed = numOfClosedTimeWindow(pWindowResInfo); - assert(num >= 0 && num <= numOfClosed); - - for (int32_t i = 0; i < num; ++i) { - SWindowResult *pResult = &pWindowResInfo->pResult[i]; - if (pResult->status.closed) { // remove the window slot from hash table - taosHashRemove(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); - } else { - break; - } - } - - int32_t remain = pWindowResInfo->size - num; - - // clear all the closed windows from the window list - for (int32_t k = 0; k < remain; ++k) { - copyTimeWindowResBuf(pRuntimeEnv, &pWindowResInfo->pResult[k], &pWindowResInfo->pResult[num + k]); - } - - // move the unclosed window in the front of the window list - for (int32_t k = remain; k < pWindowResInfo->size; ++k) { - SWindowResult *pWindowRes = &pWindowResInfo->pResult[k]; - clearTimeWindowResBuf(pRuntimeEnv, pWindowRes); - } - - pWindowResInfo->size = remain; - - for (int32_t k = 0; k < pWindowResInfo->size; ++k) { - SWindowResult *pResult = &pWindowResInfo->pResult[k]; - int32_t *p = (int32_t *)taosHashGet(pWindowResInfo->hashList, (const char *)&pResult->window.skey, - TSDB_KEYSIZE); - int32_t v = (*p - num); - assert(v >= 0 && v <= pWindowResInfo->size); - - // todo add the update function for hash table - taosHashRemove(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); - taosHashPut(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE, (char *)&v, - sizeof(int32_t)); - } - - pWindowResInfo->curIndex = -1; -} - -void clearClosedTimeWindow(SQueryRuntimeEnv *pRuntimeEnv) { - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0 || pWindowResInfo->size == 0) { - return; - } - - int32_t numOfClosed = numOfClosedTimeWindow(pWindowResInfo); - clearFirstNTimeWindow(pRuntimeEnv, numOfClosed); -} - -int32_t numOfClosedTimeWindow(SWindowResInfo *pWindowResInfo) { - int32_t i = 0; - while (i < pWindowResInfo->size && pWindowResInfo->pResult[i].status.closed) { - ++i; - } - - return i; -} - -void closeTimeWindow(SWindowResInfo *pWindowResInfo, int32_t slot) { - getWindowResult(pWindowResInfo, slot)->status.closed = true; -} - -void closeAllTimeWindow(SWindowResInfo *pWindowResInfo) { - assert(pWindowResInfo->size >= 0 && pWindowResInfo->capacity >= pWindowResInfo->size); - - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - pWindowResInfo->pResult[i].status.closed = true; - } -} - -static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) { - if (isNull(pData, type)) { // ignore the null value - return -1; - } - - int32_t GROUPRESULTID = 1; - - SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; - - SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes); - if (pWindowRes == NULL) { - return -1; - } - - // not assign result buffer yet, add new result buffer - if (pWindowRes->pos.pageId == -1) { - int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage); - if (ret != 0) { - return -1; - } - } - - setWindowResOutputBuf(pRuntimeEnv, pWindowRes); - initCtxOutputBuf(pRuntimeEnv); - return TSDB_CODE_SUCCESS; -} - -static char *getGroupbyColumnData(SQuery *pQuery, SData **data, int16_t *type, int16_t *bytes) { - char *groupbyColumnData = NULL; - - SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr; - - for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) { - if (pGroupbyExpr->columnInfo[k].flag == TSDB_COL_TAG) { - continue; - } - - int16_t colIndex = -1; - int32_t colId = pGroupbyExpr->columnInfo[k].colId; - - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - if (pQuery->colList[i].data.colId == colId) { - colIndex = i; - break; - } - } - - assert(colIndex >= 0 && colIndex < pQuery->numOfCols); - - *type = pQuery->colList[colIndex].data.type; - *bytes = pQuery->colList[colIndex].data.bytes; - - groupbyColumnData = doGetDataBlocks(pQuery, data, pQuery->colList[colIndex].colIdxInBuf); - break; - } - - return groupbyColumnData; -} - -static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf); - SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; - - // compare tag first - if (pCtx[0].tag.i64Key != elem.tag) { - return TS_JOIN_TAG_NOT_EQUALS; - } - - TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset); - -#if defined(_DEBUG_VIEW) - printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 - ", tag:%d, id:%s, query order:%d, ts order:%d, traverse:%d, index:%d\n", - elem.ts, key, elem.tag, pRuntimeEnv->pMeterObj->meterId, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder, - pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex); -#endif - - if (QUERY_IS_ASC_QUERY(pQuery)) { - if (key < elem.ts) { - return TS_JOIN_TS_NOT_EQUALS; - } else if (key > elem.ts) { - assert(false); - } - } else { - if (key > elem.ts) { - return TS_JOIN_TS_NOT_EQUALS; - } else if (key < elem.ts) { - assert(false); - } - } - - return TS_JOIN_TS_EQUAL; -} - -static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) { - SResultInfo *pResInfo = GET_RES_INFO(pCtx); - - if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) { - return false; - } - - // in the supplementary scan, only the following functions need to be executed - if (IS_SUPPLEMENT_SCAN(pRuntimeEnv) && - !(functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST || - functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS)) { - return false; - } - - return true; -} - -static int32_t rowwiseApplyAllFunctions(SQueryRuntimeEnv *pRuntimeEnv, int32_t *forwardStep, SField *pFields, - SBlockInfo *pBlockInfo, SWindowResInfo *pWindowResInfo) { - SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; - SQuery * pQuery = pRuntimeEnv->pQuery; - TSKEY * primaryKeyCol = (TSKEY *)pRuntimeEnv->primaryColBuffer->data; - - bool isDiskFileBlock = IS_FILE_BLOCK(pRuntimeEnv->blockStatus); - SData **data = pRuntimeEnv->colDataBuffer; - - int64_t prevNumOfRes = 0; - bool groupbyStateValue = isGroupbyNormalCol(pQuery->pGroupbyExpr); - - if (!groupbyStateValue) { - prevNumOfRes = getNumOfResult(pRuntimeEnv); - } - - SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutputCols, sizeof(SArithmeticSupport)); - - int16_t type = 0; - int16_t bytes = 0; - - char *groupbyColumnData = NULL; - if (groupbyStateValue) { - groupbyColumnData = getGroupbyColumnData(pQuery, data, &type, &bytes); - } - - for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { - int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; - - bool hasNull = hasNullVal(pQuery, k, pBlockInfo, pFields, isDiskFileBlock); - char *dataBlock = getDataBlocks(pRuntimeEnv, &sasArray[k], k, *forwardStep); - - TSKEY ts = pQuery->skey; // QUERY_IS_ASC_QUERY(pQuery) ? pRuntimeEnv->intervalWindow.skey : - // pRuntimeEnv->intervalWindow.ekey; - setExecParams(pQuery, &pCtx[k], ts, dataBlock, (char *)primaryKeyCol, (*forwardStep), functionId, pFields, hasNull, - pRuntimeEnv->blockStatus, &sasArray[k], pRuntimeEnv->scanFlag); - } - - // set the input column data - for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { - SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k]; - /* - * NOTE: here the tbname/tags column cannot reach here, since it will never be a filter column, - * so we do NOT check if is a tag or not - */ - pFilterInfo->pData = doGetDataBlocks(pQuery, data, pFilterInfo->info.colIdxInBuf); - } - - int32_t numOfRes = 0; - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - - // from top to bottom in desc - // from bottom to top in asc order - if (pRuntimeEnv->pTSBuf != NULL) { - SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - qTrace("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, *forwardStep, - pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order); - } - - int32_t j = 0; - TSKEY lastKey = -1; - - for (j = 0; j < (*forwardStep); ++j) { - int32_t offset = GET_COL_DATA_POS(pQuery, j, step); - - if (pRuntimeEnv->pTSBuf != NULL) { - int32_t r = doTSJoinFilter(pRuntimeEnv, offset); - if (r == TS_JOIN_TAG_NOT_EQUALS) { - break; - } else if (r == TS_JOIN_TS_NOT_EQUALS) { - continue; - } else { - assert(r == TS_JOIN_TS_EQUAL); - } - } - - if (pQuery->numOfFilterCols > 0 && (!vnodeDoFilterData(pQuery, offset))) { - continue; - } - - // interval window query - if (isIntervalQuery(pQuery)) { - // decide the time window according to the primary timestamp - int64_t ts = primaryKeyCol[offset]; - STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery); - - int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pRuntimeEnv->pMeterObj->sid, &win); - if (ret != TSDB_CODE_SUCCESS) { // null data, too many state code - continue; - } - - // all startOffset are identical - offset -= pCtx[0].startOffset; - - SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo)); - doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset); - - lastKey = ts; - STimeWindow nextWin = win; - int32_t index = pWindowResInfo->curIndex; - int32_t sid = pRuntimeEnv->pMeterObj->sid; - - while (1) { - getNextTimeWindow(pQuery, &nextWin); - if (pWindowResInfo->startTime > nextWin.skey || (nextWin.skey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (nextWin.skey > pQuery->skey && !QUERY_IS_ASC_QUERY(pQuery))) { - break; - } - - if (ts < nextWin.skey || ts > nextWin.ekey) { - break; - } - - // null data, failed to allocate more memory buffer - if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, sid, &nextWin) != TSDB_CODE_SUCCESS) { - break; - } - - pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo)); - doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset); - } - - pWindowResInfo->curIndex = index; - } else { // other queries - // decide which group this rows belongs to according to current state value - if (groupbyStateValue) { - char *stateVal = groupbyColumnData + bytes * offset; - - int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, stateVal, type, bytes); - if (ret != TSDB_CODE_SUCCESS) { // null data, too many state code - continue; - } - } - - // update the lastKey - lastKey = primaryKeyCol[offset]; - - // all startOffset are identical - offset -= pCtx[0].startOffset; - - for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { - int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; - if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { - aAggs[functionId].xFunctionF(&pCtx[k], offset); - } - } - } - - if (pRuntimeEnv->pTSBuf != NULL) { - // if timestamp filter list is empty, quit current query - if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - break; - } - } - - /* - * pointsOffset is the maximum available space in result buffer update the actual forward step for query that - * requires checking buffer during loop - */ - if ((pQuery->checkBufferInLoop == 1) && (++numOfRes) >= pQuery->pointsOffset) { - pQuery->lastKey = lastKey + step; - *forwardStep = j + 1; - break; - } - } - - free(sasArray); - - /* - * No need to calculate the number of output results for group-by normal columns, interval query - * because the results of group by normal column is put into intermediate buffer. - */ - int32_t num = 0; - if (!groupbyStateValue && !isIntervalQuery(pQuery)) { - num = getNumOfResult(pRuntimeEnv) - prevNumOfRes; - } - - return num; -} - -static int32_t reviseForwardSteps(SQueryRuntimeEnv *pRuntimeEnv, int32_t forwardStep) { - /* - * 1. If value filter exists, we try all data in current block, and do not set the QUERY_RESBUF_FULL flag. - * - * 2. In case of top/bottom/ts_comp query, the checkBufferInLoop == 1 and pQuery->numOfFilterCols - * may be 0 or not. We do not check the capacity of output buffer, since the filter function will do it. - * - * 3. In handling the query of secondary query of join, tsBuf servers as a ts filter. - */ - SQuery *pQuery = pRuntimeEnv->pQuery; - - if (isTopBottomQuery(pQuery) || isTSCompQuery(pQuery) || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) { - return forwardStep; - } - - // current buffer does not have enough space, try in the next loop - if ((pQuery->checkBufferInLoop == 1) && (pQuery->pointsOffset <= forwardStep)) { - forwardStep = pQuery->pointsOffset; - } - - return forwardStep; -} - -static void validateQueryRangeAndData(SQueryRuntimeEnv *pRuntimeEnv, const TSKEY *pPrimaryColumn, - SBlockInfo *pBlockBasicInfo) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - TSKEY startKey = -1; - // timestamp qualification check - if (IS_DATA_BLOCK_LOADED(pRuntimeEnv->blockStatus) && needPrimaryTimestampCol(pQuery, pBlockBasicInfo)) { - startKey = pPrimaryColumn[pQuery->pos]; - } else { - startKey = pBlockBasicInfo->keyFirst; - TSKEY endKey = pBlockBasicInfo->keyLast; - - assert((endKey <= pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (endKey >= pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))); - } - - assert((startKey >= pQuery->lastKey && startKey <= pQuery->ekey && pQuery->skey <= pQuery->lastKey && - QUERY_IS_ASC_QUERY(pQuery)) || - (startKey <= pQuery->lastKey && startKey >= pQuery->ekey && pQuery->skey >= pQuery->lastKey && - !QUERY_IS_ASC_QUERY(pQuery))); -} - -static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SBlockInfo *pBlockInfo, SField *pFields, - __block_search_fn_t searchFn, int32_t *numOfRes, - SWindowResInfo *pWindowResInfo) { - SQuery *pQuery = pRuntimeEnv->pQuery; - TSKEY * pPrimaryColumn = (TSKEY *)pRuntimeEnv->primaryColBuffer->data; - - validateQueryRangeAndData(pRuntimeEnv, pPrimaryColumn, pBlockInfo); - - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - int32_t forwardStep = - getNumOfRowsInTimeWindow(pQuery, pBlockInfo, pPrimaryColumn, pQuery->pos, pQuery->ekey, searchFn, true); - assert(forwardStep >= 0); - - int32_t newForwardStep = reviseForwardSteps(pRuntimeEnv, forwardStep); - assert(newForwardStep <= forwardStep && newForwardStep >= 0); - - // if buffer limitation is applied, there must be primary column(timestamp) loaded - if (newForwardStep < forwardStep && newForwardStep > 0) { - pQuery->lastKey = pPrimaryColumn[pQuery->pos + (newForwardStep - 1) * step] + step; - } - - if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) { - *numOfRes = rowwiseApplyAllFunctions(pRuntimeEnv, &newForwardStep, pFields, pBlockInfo, pWindowResInfo); - } else { - *numOfRes = blockwiseApplyAllFunctions(pRuntimeEnv, newForwardStep, pFields, pBlockInfo, pWindowResInfo, searchFn); - } - - TSKEY lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? pBlockInfo->keyLast : pBlockInfo->keyFirst; - doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo); // todo refactor merge - - // interval query with limit applied - if (isIntervalQuery(pQuery) && pQuery->limit.limit > 0 && - (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosedTimeWindow(pWindowResInfo) && - pRuntimeEnv->scanFlag == MASTER_SCAN) { - setQueryStatus(pQuery, QUERY_COMPLETED); - } - - assert(*numOfRes >= 0); - - // check if buffer is large enough for accommodating all qualified points - if (*numOfRes > 0 && pQuery->checkBufferInLoop == 1) { - pQuery->pointsOffset -= *numOfRes; - if (pQuery->pointsOffset <= 0) { // todo return correct numOfRes for ts_comp function - pQuery->pointsOffset = 0; - setQueryStatus(pQuery, QUERY_RESBUF_FULL); - } - } - - return newForwardStep; -} - -int32_t vnodeGetVnodeHeaderFileIndex(int32_t *fid, SQueryRuntimeEnv *pRuntimeEnv, int32_t order) { - if (pRuntimeEnv->vnodeFileInfo.numOfFiles == 0) { - return -1; - } - - SQueryFilesInfo *pVnodeFiles = &pRuntimeEnv->vnodeFileInfo; - - /* set the initial file for current query */ - if (order == TSQL_SO_ASC && *fid < pVnodeFiles->pFileInfo[0].fileID) { - *fid = pVnodeFiles->pFileInfo[0].fileID; - return 0; - } else if (order == TSQL_SO_DESC && *fid > pVnodeFiles->pFileInfo[pVnodeFiles->numOfFiles - 1].fileID) { - *fid = pVnodeFiles->pFileInfo[pVnodeFiles->numOfFiles - 1].fileID; - return pVnodeFiles->numOfFiles - 1; - } - - int32_t numOfFiles = pVnodeFiles->numOfFiles; - - if (order == TSQL_SO_DESC && *fid > pVnodeFiles->pFileInfo[numOfFiles - 1].fileID) { - *fid = pVnodeFiles->pFileInfo[numOfFiles - 1].fileID; - return numOfFiles - 1; - } - - if (order == TSQL_SO_ASC) { - int32_t i = 0; - int32_t step = QUERY_ASC_FORWARD_STEP; - - while (i pVnodeFiles->pFileInfo[i].fileID) { - i += step; - } - - if (i < numOfFiles && *fid <= pVnodeFiles->pFileInfo[i].fileID) { - *fid = pVnodeFiles->pFileInfo[i].fileID; - return i; - } else { - return -1; - } - } else { - int32_t i = numOfFiles - 1; - int32_t step = QUERY_DESC_FORWARD_STEP; - - while (i >= 0 && *fid < pVnodeFiles->pFileInfo[i].fileID) { - i += step; - } - - if (i >= 0 && *fid >= pVnodeFiles->pFileInfo[i].fileID) { - *fid = pVnodeFiles->pFileInfo[i].fileID; - return i; - } else { - return -1; - } - } -} - -int32_t getNextDataFileCompInfo(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj, int32_t step) { - SQuery *pQuery = pRuntimeEnv->pQuery; - pQuery->fileId += step; - - int32_t fileIndex = 0; - int32_t order = (step == QUERY_ASC_FORWARD_STEP) ? TSQL_SO_ASC : TSQL_SO_DESC; - while (1) { - fileIndex = vnodeGetVnodeHeaderFileIndex(&pQuery->fileId, pRuntimeEnv, order); - - // no files left, abort - if (fileIndex < 0) { - if (step == QUERY_ASC_FORWARD_STEP) { - dTrace("QInfo:%p no more file to access, try data in cache", GET_QINFO_ADDR(pQuery)); - } else { - dTrace("QInfo:%p no more file to access in desc order, query completed", GET_QINFO_ADDR(pQuery)); - } - - vnodeFreeFieldsEx(pRuntimeEnv); - pQuery->fileId = -1; - break; - } - - // failed to mmap header file into memory will cause the retrieval of compblock info failed - if (vnodeGetCompBlockInfo(pMeterObj, pRuntimeEnv, fileIndex) > 0) { - break; - } - - /* - * 1. failed to read blk information from header file or open data file failed - * 2. header file is empty - * - * try next one - */ - pQuery->fileId += step; - - /* for backwards search, if the first file is not valid, abort */ - if (step < 0 && fileIndex == 0) { - vnodeFreeFieldsEx(pRuntimeEnv); - pQuery->fileId = -1; - fileIndex = -1; - break; - } - } - - return fileIndex; -} - -void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, int64_t startQueryTimestamp, void *inputData, - char *primaryColumnData, int32_t size, int32_t functionId, SField *pField, bool hasNull, - int32_t blockStatus, void *param, int32_t scanFlag) { - int32_t startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? pQuery->pos : pQuery->pos - (size - 1); - - pCtx->nStartQueryTimestamp = startQueryTimestamp; - pCtx->scanFlag = scanFlag; - - pCtx->aInputElemBuf = inputData; - pCtx->hasNull = hasNull; - pCtx->blockStatus = blockStatus; - - if (pField != NULL) { - pCtx->preAggVals.isSet = true; - pCtx->preAggVals.minIndex = pField->minIndex; - pCtx->preAggVals.maxIndex = pField->maxIndex; - pCtx->preAggVals.sum = pField->sum; - pCtx->preAggVals.max = pField->max; - pCtx->preAggVals.min = pField->min; - pCtx->preAggVals.numOfNull = pField->numOfNullPoints; - } else { - pCtx->preAggVals.isSet = false; - } - - if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0 && (primaryColumnData != NULL)) { - pCtx->ptsList = (int64_t *)(primaryColumnData + startOffset * TSDB_KEYSIZE); - } - - if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) { - // last_dist or first_dist function - // store the first&last timestamp into the intermediate buffer [1], the true - // value may be null but timestamp will never be null - pCtx->ptsList = (int64_t *)(primaryColumnData + startOffset * TSDB_KEYSIZE); - } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA || - functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) { - /* - * leastsquares function needs two columns of input, currently, the x value of linear equation is set to - * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer - * - * top/bottom function needs timestamp to indicate when the - * top/bottom values emerge, so does diff function - */ - if (functionId == TSDB_FUNC_TWA) { - STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf; - pTWAInfo->SKey = pQuery->skey; - pTWAInfo->EKey = pQuery->ekey; - } - - pCtx->ptsList = (int64_t *)(primaryColumnData + startOffset * TSDB_KEYSIZE); - - } else if (functionId == TSDB_FUNC_ARITHM) { - pCtx->param[1].pz = param; - } - - pCtx->startOffset = startOffset; - pCtx->size = size; - -#if defined(_DEBUG_VIEW) - int64_t *tsList = (int64_t *)(primaryColumnData + startOffset * TSDB_KEYSIZE); - int64_t s = tsList[0]; - int64_t e = tsList[size - 1]; - -// if (IS_DATA_BLOCK_LOADED(blockStatus)) { -// dTrace("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d, -// functId:%d", GET_QINFO_ADDR(pQuery), -// s, e, startOffset, size, blockStatus, functionId); -// } else { -// dTrace("QInfo:%p block not loaded, bstatus:%d", -// GET_QINFO_ADDR(pQuery), blockStatus); -// } -#endif -} - -// set the output buffer for the selectivity + tag query -static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) { - if (isSelectivityWithTagsQuery(pQuery)) { - int32_t num = 0; - SQLFunctionCtx *p = NULL; - - int16_t tagLen = 0; - - SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutputCols, POINTER_BYTES); - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SSqlFuncExprMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].pBase; - if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) { - tagLen += pCtx[i].outputBytes; - pTagCtx[num++] = &pCtx[i]; - } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) { - p = &pCtx[i]; - } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) { - // tag function may be the group by tag column - // ts may be the required primary timestamp column - continue; - } else { - // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ - } - } - - p->tagInfo.pTagCtxList = pTagCtx; - p->tagInfo.numOfTagCols = num; - p->tagInfo.tagsLen = tagLen; - } -} - -static void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interResBytes, isStableQuery); - } -} - -static int32_t setupQueryRuntimeEnv(SMeterObj *pMeterObj, SQuery *pQuery, SQueryRuntimeEnv *pRuntimeEnv, - SColumnModel *pTagsSchema, int16_t order, bool isSTableQuery) { - dTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pQuery)); - - pRuntimeEnv->pMeterObj = pMeterObj; - pRuntimeEnv->pQuery = pQuery; - - pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutputCols, sizeof(SResultInfo)); - pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutputCols, sizeof(SQLFunctionCtx)); - - if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) { - goto _error_clean; - } - - pRuntimeEnv->offset[0] = 0; - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SSqlFuncExprMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].pBase; - SColIndexEx * pColIndexEx = &pSqlFuncMsg->colInfo; - - SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - - if (TSDB_COL_IS_TAG(pSqlFuncMsg->colInfo.flag)) { // process tag column info - SSchema *pSchema = getColumnModelSchema(pTagsSchema, pColIndexEx->colIdx); - - pCtx->inputType = pSchema->type; - pCtx->inputBytes = pSchema->bytes; - } else { - pCtx->inputType = GET_COLUMN_TYPE(pQuery, i); - pCtx->inputBytes = GET_COLUMN_BYTES(pQuery, i); - } - - pCtx->ptsOutputBuf = NULL; - - pCtx->outputBytes = pQuery->pSelectExpr[i].resBytes; - pCtx->outputType = pQuery->pSelectExpr[i].resType; - - pCtx->order = pQuery->order.order; - pCtx->functionId = pSqlFuncMsg->functionId; - - pCtx->numOfParams = pSqlFuncMsg->numOfParams; - for (int32_t j = 0; j < pCtx->numOfParams; ++j) { - int16_t type = pSqlFuncMsg->arg[j].argType; - int16_t bytes = pSqlFuncMsg->arg[j].argBytes; - if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) { - tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type); - } else { - tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type); - } - } - - // set the order information for top/bottom query - int32_t functionId = pCtx->functionId; - - if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { - int32_t f = pQuery->pSelectExpr[0].pBase.functionId; - assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY); - - pCtx->param[2].i64Key = order; - pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT; - pCtx->param[3].i64Key = functionId; - pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT; - - pCtx->param[1].i64Key = pQuery->order.orderColId; - } - - if (i > 0) { - pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes; - } - } - - // set the intermediate result output buffer - setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, isSTableQuery); - - // if it is group by normal column, do not set output buffer, the output buffer is pResult - if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isSTableQuery) { - resetCtxOutputBuf(pRuntimeEnv); - } - - setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx); - - // for loading block data in memory - assert(vnodeList[pMeterObj->vnode].cfg.rowsInFileBlock == pMeterObj->pointsPerFileBlock); - return TSDB_CODE_SUCCESS; - -_error_clean: - tfree(pRuntimeEnv->resultInfo); - tfree(pRuntimeEnv->pCtx); - - return TSDB_CODE_SERV_OUT_OF_MEMORY; -} - -static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) { - if (pRuntimeEnv->pQuery == NULL) { - return; - } - - dTrace("QInfo:%p teardown runtime env", GET_QINFO_ADDR(pRuntimeEnv->pQuery)); - for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfCols; ++i) { - tfree(pRuntimeEnv->colDataBuffer[i]); - } - - tfree(pRuntimeEnv->secondaryUnzipBuffer); - cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv); - - if (pRuntimeEnv->pCtx != NULL) { - for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutputCols; ++i) { - SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - - for (int32_t j = 0; j < pCtx->numOfParams; ++j) { - tVariantDestroy(&pCtx->param[j]); - } - - tVariantDestroy(&pCtx->tag); - tfree(pCtx->tagInfo.pTagCtxList); - tfree(pRuntimeEnv->resultInfo[i].interResultBuf); - } - - tfree(pRuntimeEnv->resultInfo); - tfree(pRuntimeEnv->pCtx); - } - - tfree(pRuntimeEnv->unzipBuffer); - - if (pRuntimeEnv->pQuery && (!PRIMARY_TSCOL_LOADED(pRuntimeEnv->pQuery))) { - tfree(pRuntimeEnv->primaryColBuffer); - } - - doCloseQueryFiles(&pRuntimeEnv->vnodeFileInfo); - - if (pRuntimeEnv->vnodeFileInfo.pFileInfo != NULL) { - pRuntimeEnv->vnodeFileInfo.numOfFiles = 0; - free(pRuntimeEnv->vnodeFileInfo.pFileInfo); - } - - taosDestoryInterpoInfo(&pRuntimeEnv->interpoInfo); - - if (pRuntimeEnv->pInterpoBuf != NULL) { - for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutputCols; ++i) { - tfree(pRuntimeEnv->pInterpoBuf[i]); - } - - tfree(pRuntimeEnv->pInterpoBuf); - } - - pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf); -} - -// get maximum time interval in each file -static int64_t getOldestKey(int32_t numOfFiles, int64_t fileId, SVnodeCfg *pCfg) { - int64_t duration = pCfg->daysPerFile * tsMsPerDay[(uint8_t)pCfg->precision]; - return (fileId - numOfFiles + 1) * duration; -} - -bool isQueryKilled(SQuery *pQuery) { - SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - - /* - * check if the queried meter is going to be deleted. - * if it will be deleted soon, stop current query ASAP. - */ - SMeterObj *pMeterObj = pQInfo->pObj; - if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DROPPING)) { - pQInfo->killed = 1; - return true; - } - - return (pQInfo->killed == 1); -} - -bool isFixedOutputQuery(SQuery *pQuery) { - if (pQuery->intervalTime != 0) { - return false; - } - - // Note:top/bottom query is fixed output query - if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) { - return true; - } - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SSqlFuncExprMsg *pExprMsg = &pQuery->pSelectExpr[i].pBase; - - // ignore the ts_comp function - if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 && - pExprMsg->colInfo.colIdx == PRIMARYKEY_TIMESTAMP_COL_INDEX) { - continue; - } - - if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) { - continue; - } - - if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) { - return true; - } - } - - return false; -} - -bool isPointInterpoQuery(SQuery *pQuery) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionID = pQuery->pSelectExpr[i].pBase.functionId; - if (functionID == TSDB_FUNC_INTERP || functionID == TSDB_FUNC_LAST_ROW) { - return true; - } - } - - return false; -} - -// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION -bool isSumAvgRateQuery(SQuery *pQuery) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - if (functionId == TSDB_FUNC_TS) { - continue; - } - - if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE || - functionId == TSDB_FUNC_AVG_IRATE) { - return true; - } - } - - return false; -} - -bool isTopBottomQuery(SQuery *pQuery) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - if (functionId == TSDB_FUNC_TS) { - continue; - } - - if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) { - return true; - } - } - - return false; -} - -bool isFirstLastRowQuery(SQuery *pQuery) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionID = pQuery->pSelectExpr[i].pBase.functionId; - if (functionID == TSDB_FUNC_LAST_ROW) { - return true; - } - } - - return false; -} - -bool notHasQueryTimeRange(SQuery *pQuery) { - return (pQuery->skey == 0 && pQuery->ekey == INT64_MAX && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->skey == INT64_MAX && pQuery->ekey == 0 && (!QUERY_IS_ASC_QUERY(pQuery))); -} - -bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].pBase.functionId == TSDB_FUNC_TS_COMP; } - -bool needSupplementaryScan(SQuery *pQuery) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) { - continue; - } - - if (((functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) && QUERY_IS_ASC_QUERY(pQuery)) || - ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery))) { - return true; - } - } - - return false; -} -///////////////////////////////////////////////////////////////////////////////////////////// -static int32_t binarySearchInCacheBlk(SCacheInfo *pCacheInfo, SQuery *pQuery, int32_t keyLen, int32_t firstSlot, - int32_t lastSlot) { - int32_t midSlot = 0; - - while (1) { - int32_t numOfBlocks = (lastSlot - firstSlot + 1 + pCacheInfo->maxBlocks) % pCacheInfo->maxBlocks; - if (numOfBlocks == 0) { - numOfBlocks = pCacheInfo->maxBlocks; - } - - midSlot = (firstSlot + (numOfBlocks >> 1)) % pCacheInfo->maxBlocks; - SCacheBlock *pBlock = pCacheInfo->cacheBlocks[midSlot]; - - TSKEY keyFirst = *((TSKEY *)pBlock->offset[0]); - TSKEY keyLast = *((TSKEY *)(pBlock->offset[0] + (pBlock->numOfPoints - 1) * keyLen)); - - if (numOfBlocks == 1) { - break; - } - - if (pQuery->skey > keyLast) { - if (numOfBlocks == 2) break; - if (!QUERY_IS_ASC_QUERY(pQuery)) { - int nextSlot = (midSlot + 1 + pCacheInfo->maxBlocks) % pCacheInfo->maxBlocks; - SCacheBlock *pNextBlock = pCacheInfo->cacheBlocks[nextSlot]; - TSKEY nextKeyFirst = *((TSKEY *)(pNextBlock->offset[0])); - if (pQuery->skey < nextKeyFirst) break; - } - firstSlot = (midSlot + 1) % pCacheInfo->maxBlocks; - } else if (pQuery->skey < keyFirst) { - if (QUERY_IS_ASC_QUERY(pQuery)) { - int prevSlot = (midSlot - 1 + pCacheInfo->maxBlocks) % pCacheInfo->maxBlocks; - SCacheBlock *pPrevBlock = pCacheInfo->cacheBlocks[prevSlot]; - TSKEY prevKeyLast = *((TSKEY *)(pPrevBlock->offset[0] + (pPrevBlock->numOfPoints - 1) * keyLen)); - if (pQuery->skey > prevKeyLast) { - break; - } - } - lastSlot = (midSlot - 1 + pCacheInfo->maxBlocks) % pCacheInfo->maxBlocks; - } else { - break; // got the slot - } - } - - return midSlot; -} - -static void getQueryRange(SQuery *pQuery, TSKEY *min, TSKEY *max) { - *min = pQuery->lastKey < pQuery->ekey ? pQuery->lastKey : pQuery->ekey; - *max = pQuery->lastKey >= pQuery->ekey ? pQuery->lastKey : pQuery->ekey; -} - -static int32_t getFirstCacheSlot(int32_t numOfBlocks, int32_t lastSlot, SCacheInfo *pCacheInfo) { - return (lastSlot - numOfBlocks + 1 + pCacheInfo->maxBlocks) % pCacheInfo->maxBlocks; -} - -static bool cacheBoundaryCheck(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj) { - /* - * here we get the first slot from the meter cache, not from the cache snapshot from pQuery, since the - * snapshot value in pQuery may have been expired now. - */ - SQuery *pQuery = pRuntimeEnv->pQuery; - - SCacheInfo * pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - SCacheBlock *pBlock = NULL; - - // earliest key in cache - TSKEY keyFirst = 0; - TSKEY keyLast = pMeterObj->lastKey; - - while (1) { - // keep the value in local variable, since it may be changed by other thread any time - int32_t numOfBlocks = pCacheInfo->numOfBlocks; - int32_t currentSlot = pCacheInfo->currentSlot; - - // no data in cache, return false directly - if (numOfBlocks == 0) { - return false; - } - - int32_t first = getFirstCacheSlot(numOfBlocks, currentSlot, pCacheInfo); - - /* - * pBlock may be null value since this block is flushed to disk, and re-distributes to - * other meter, so go on until we get the first not flushed cache block. - */ - if ((pBlock = getCacheDataBlock(pMeterObj, pRuntimeEnv, first)) != NULL) { - keyFirst = getTimestampInCacheBlock(pRuntimeEnv, pBlock, 0); - break; - } else { - /* - * there may be only one empty cache block existed caused by import. - */ - if (numOfBlocks == 1) { - return false; - } - } - } - - TSKEY min, max; - getQueryRange(pQuery, &min, &max); - - /* - * The query time range is earlier than the first element or later than the last elements in cache. - * If the query window overlaps with the time range of disk files, the flag needs to be reset. - * Otherwise, this flag will cause error in following processing. - */ - if (max < keyFirst || min > keyLast) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return false; - } - - return true; -} - -void getBasicCacheInfoSnapshot(SQuery *pQuery, SCacheInfo *pCacheInfo, int32_t vid) { - // commitSlot here denotes the first uncommitted block in cache - int32_t numOfBlocks = 0; - int32_t lastSlot = 0; - int32_t commitSlot = 0; - int32_t commitPoint = 0; - - SCachePool *pPool = (SCachePool *)vnodeList[vid].pCachePool; - pthread_mutex_lock(&pPool->vmutex); - numOfBlocks = pCacheInfo->numOfBlocks; - lastSlot = pCacheInfo->currentSlot; - commitSlot = pCacheInfo->commitSlot; - commitPoint = pCacheInfo->commitPoint; - pthread_mutex_unlock(&pPool->vmutex); - - // make sure it is there, otherwise, return right away - pQuery->currentSlot = lastSlot; - pQuery->numOfBlocks = numOfBlocks; - pQuery->firstSlot = getFirstCacheSlot(numOfBlocks, lastSlot, pCacheInfo); - pQuery->commitSlot = commitSlot; - pQuery->commitPoint = commitPoint; - - /* - * Note: the block id is continuous increasing, never becomes smaller. - * - * blockId is the maximum block id in cache of current meter during query. - * If any blocks' id are greater than this value, those blocks may be reallocated to other meters, - * or assigned new data of this meter, on which the query is performed should be ignored. - */ - if (pQuery->numOfBlocks > 0) { - pQuery->blockId = pCacheInfo->cacheBlocks[pQuery->currentSlot]->blockId; - } -} - -int64_t getQueryStartPositionInCache(SQueryRuntimeEnv *pRuntimeEnv, int32_t *slot, int32_t *pos, - bool ignoreQueryRange) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - pQuery->fileId = -1; - vnodeFreeFieldsEx(pRuntimeEnv); - - // keep in-memory cache status in local variables in case that it may be changed by write operation - getBasicCacheInfoSnapshot(pQuery, pMeterObj->pCache, pMeterObj->vnode); - - SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - if (pCacheInfo == NULL || pCacheInfo->cacheBlocks == NULL || pQuery->numOfBlocks == 0) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return -1; - } - - assert((pQuery->lastKey >= pQuery->skey && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->lastKey <= pQuery->skey && !QUERY_IS_ASC_QUERY(pQuery))); - - if (!ignoreQueryRange && !cacheBoundaryCheck(pRuntimeEnv, pMeterObj)) { - return -1; - } - - /* find the appropriated slot that contains the requested points */ - TSKEY rawskey = pQuery->skey; - - /* here we actual start to query from pQuery->lastKey */ - pQuery->skey = pQuery->lastKey; - - (*slot) = binarySearchInCacheBlk(pCacheInfo, pQuery, TSDB_KEYSIZE, pQuery->firstSlot, pQuery->currentSlot); - - /* locate the first point of which time stamp is no less than pQuery->skey */ - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; - - pQuery->slot = *slot; - - // cache block has been flushed to disk, no required data block in cache. - SCacheBlock *pBlock = getCacheDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot); - if (pBlock == NULL) { - pQuery->skey = rawskey; // restore the skey - return -1; - } - - (*pos) = searchFn(pRuntimeEnv->primaryColBuffer->data, pBlock->numOfPoints, pQuery->skey, pQuery->order.order); - - // restore skey before return - pQuery->skey = rawskey; - - // all data are less(greater) than the pQuery->lastKey in case of ascending(descending) query - if (*pos == -1) { - return -1; - } - - int64_t nextKey = getTimestampInCacheBlock(pRuntimeEnv, pBlock, *pos); - if ((nextKey < pQuery->lastKey && QUERY_IS_ASC_QUERY(pQuery)) || - (nextKey > pQuery->lastKey && !QUERY_IS_ASC_QUERY(pQuery))) { - // all data are less than the pQuery->lastKey(pQuery->sKey) for asc query - return -1; - } - - SET_CACHE_BLOCK_FLAG(pRuntimeEnv->blockStatus); - return nextKey; -} - -/** - * check if data in disk. - */ -bool hasDataInDisk(SQuery *pQuery, SMeterObj *pMeterObj) { - SVnodeObj *pVnode = &vnodeList[pMeterObj->vnode]; - if (pVnode->numOfFiles <= 0) { - pQuery->fileId = -1; - return false; - } - - int64_t latestKey = pMeterObj->lastKeyOnFile; - int64_t oldestKey = getOldestKey(pVnode->numOfFiles, pVnode->fileId, &pVnode->cfg); - - TSKEY min, max; - getQueryRange(pQuery, &min, &max); - - /* query range is out of current time interval of table */ - if ((min > latestKey) || (max < oldestKey)) { - pQuery->fileId = -1; - return false; - } - - return true; -} - -bool hasDataInCache(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - - /* no data in cache, return */ - if ((pCacheInfo == NULL) || (pCacheInfo->cacheBlocks == NULL)) { - return false; - } - - /* numOfBlocks value has been overwrite, release pFields data if exists */ - vnodeFreeFieldsEx(pRuntimeEnv); - getBasicCacheInfoSnapshot(pQuery, pCacheInfo, pMeterObj->vnode); - if (pQuery->numOfBlocks <= 0) { - return false; - } - - return cacheBoundaryCheck(pRuntimeEnv, pMeterObj); -} - -/** - * Get cache snapshot will destroy the comp block info in SQuery, in order to speedup the query - * process, we always check cache first. - */ -void vnodeCheckIfDataExists(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj, bool *dataInDisk, bool *dataInCache) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - *dataInCache = hasDataInCache(pRuntimeEnv, pMeterObj); - *dataInDisk = hasDataInDisk(pQuery, pMeterObj); - - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); -} - -void doGetAlignedIntervalQueryRangeImpl(SQuery *pQuery, int64_t pKey, int64_t keyFirst, int64_t keyLast, - int64_t *actualSkey, int64_t *actualEkey, int64_t *skey, int64_t *ekey) { - assert(pKey >= keyFirst && pKey <= keyLast); - *skey = taosGetIntervalStartTimestamp(pKey, pQuery->intervalTime, pQuery->intervalTimeUnit, pQuery->precision); - - if (keyFirst > (INT64_MAX - pQuery->intervalTime)) { - /* - * if the actualSkey > INT64_MAX - pQuery->intervalTime, the query duration between - * actualSkey and actualEkey must be less than one interval.Therefore, no need to adjust the query ranges. - */ - assert(keyLast - keyFirst < pQuery->intervalTime); - - *actualSkey = keyFirst; - *actualEkey = keyLast; - - *ekey = INT64_MAX; - return; - } - - *ekey = *skey + pQuery->intervalTime - 1; - - if (*skey < keyFirst) { - *actualSkey = keyFirst; - } else { - *actualSkey = *skey; - } - - if (*ekey < keyLast) { - *actualEkey = *ekey; - } else { - *actualEkey = keyLast; - } -} - -static void getOneRowFromDataBlock(SQueryRuntimeEnv *pRuntimeEnv, char **dst, int32_t pos) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - int32_t bytes = pQuery->colList[i].data.bytes; - memcpy(dst[i], pRuntimeEnv->colDataBuffer[i]->data + pos * bytes, bytes); - } -} - -static bool getNeighborPoints(STableQuerySupportObj *pSupporter, SMeterObj *pMeterObj, - SPointInterpoSupporter *pPointInterpSupporter) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - - if (!isPointInterpoQuery(pQuery)) { - return false; - } - - /* - * for interpolate point query, points that are directly before/after the specified point are required - */ - if (isFirstLastRowQuery(pQuery)) { - assert(!QUERY_IS_ASC_QUERY(pQuery)); - } else { - assert(QUERY_IS_ASC_QUERY(pQuery)); - } - assert(pPointInterpSupporter != NULL && pQuery->skey == pQuery->ekey); - - SCacheBlock *pBlock = NULL; - - qTrace("QInfo:%p get next data point, fileId:%d, slot:%d, pos:%d", GET_QINFO_ADDR(pQuery), pQuery->fileId, - pQuery->slot, pQuery->pos); - - // save the point that is directly after or equals to the specified point - getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pNextPoint, pQuery->pos); - - /* - * 1. for last_row query, return immediately. - * 2. the specified timestamp equals to the required key, interpolation according to neighbor points is not necessary - * for interp query. - */ - TSKEY actualKey = *(TSKEY *)pPointInterpSupporter->pNextPoint[0]; - if (isFirstLastRowQuery(pQuery) || actualKey == pQuery->skey) { - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); - - /* - * the retrieved ts may not equals to pMeterObj->lastKey due to cache re-allocation - * set the pQuery->ekey/pQuery->skey/pQuery->lastKey to be the new value. - */ - if (pQuery->ekey != actualKey) { - pQuery->skey = actualKey; - pQuery->ekey = actualKey; - pQuery->lastKey = actualKey; - pSupporter->rawSKey = actualKey; - pSupporter->rawEKey = actualKey; - } - return true; - } - - /* the qualified point is not the first point in data block */ - if (pQuery->pos > 0) { - int32_t prevPos = pQuery->pos - 1; - - /* save the point that is directly after the specified point */ - getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, prevPos); - } else { - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; - - savePointPosition(&pRuntimeEnv->startPos, pQuery->fileId, pQuery->slot, pQuery->pos); - - // backwards movement would not set the pQuery->pos correct. We need to set it manually later. - moveToNextBlock(pRuntimeEnv, QUERY_DESC_FORWARD_STEP, searchFn, true); - - /* - * no previous data exists. - * reset the status and load the data block that contains the qualified point - */ - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)) { - dTrace("QInfo:%p no previous data block, start fileId:%d, slot:%d, pos:%d, qrange:%" PRId64 "-%" PRId64 - ", out of range", - GET_QINFO_ADDR(pQuery), pRuntimeEnv->startPos.fileId, pRuntimeEnv->startPos.slot, - pRuntimeEnv->startPos.pos, pQuery->skey, pQuery->ekey); - - // no result, return immediately - setQueryStatus(pQuery, QUERY_COMPLETED); - return false; - } else { // prev has been located - if (pQuery->fileId >= 0) { - pQuery->pos = pQuery->pBlock[pQuery->slot].numOfPoints - 1; - getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, pQuery->pos); - - qTrace("QInfo:%p get prev data point, fileId:%d, slot:%d, pos:%d, pQuery->pos:%d", GET_QINFO_ADDR(pQuery), - pQuery->fileId, pQuery->slot, pQuery->pos, pQuery->pos); - } else { - // moveToNextBlock make sure there is a available cache block, if exists - assert(vnodeIsDatablockLoaded(pRuntimeEnv, pMeterObj, -1, true) == DISK_BLOCK_NO_NEED_TO_LOAD); - pBlock = &pRuntimeEnv->cacheBlock; - - pQuery->pos = pBlock->numOfPoints - 1; - getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, pQuery->pos); - - qTrace("QInfo:%p get prev data point, fileId:%d, slot:%d, pos:%d, pQuery->pos:%d", GET_QINFO_ADDR(pQuery), - pQuery->fileId, pQuery->slot, pBlock->numOfPoints - 1, pQuery->pos); - } - } - } - - pQuery->skey = *(TSKEY *)pPointInterpSupporter->pPrevPoint[0]; - pQuery->ekey = *(TSKEY *)pPointInterpSupporter->pNextPoint[0]; - pQuery->lastKey = pQuery->skey; - - return true; -} - -static bool doGetQueryPos(TSKEY key, STableQuerySupportObj *pSupporter, SPointInterpoSupporter *pPointInterpSupporter) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj * pMeterObj = pRuntimeEnv->pMeterObj; - - /* key in query range. If not, no qualified in disk file */ - if (key != -1 && key <= pQuery->ekey) { - if (isPointInterpoQuery(pQuery)) { /* no qualified data in this query range */ - return getNeighborPoints(pSupporter, pMeterObj, pPointInterpSupporter); - } else { - return true; - } - } else { // key > pQuery->ekey, abort for normal query, continue for interp query - if (isPointInterpoQuery(pQuery)) { - return getNeighborPoints(pSupporter, pMeterObj, pPointInterpSupporter); - } else { - return false; - } - } -} - -static bool doSetDataInfo(STableQuerySupportObj *pSupporter, SPointInterpoSupporter *pPointInterpSupporter, - SMeterObj *pMeterObj, TSKEY nextKey) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - - if (isFirstLastRowQuery(pQuery)) { - /* - * if the pQuery->skey != pQuery->ekey for last_row query, - * the query range is existed, so set them both the value of nextKey - */ - if (pQuery->skey != pQuery->ekey) { - assert(pQuery->skey >= pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery) && nextKey >= pQuery->ekey && - nextKey <= pQuery->skey); - - pQuery->skey = nextKey; - pQuery->ekey = nextKey; - } - - return getNeighborPoints(pSupporter, pMeterObj, pPointInterpSupporter); - } else { - return true; - } -} - -// TODO refactor code, the best way to implement the last_row is utilizing the iterator -bool normalizeUnBoundLastRowQuery(STableQuerySupportObj *pSupporter, SPointInterpoSupporter *pPointInterpSupporter) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - assert(!QUERY_IS_ASC_QUERY(pQuery) && notHasQueryTimeRange(pQuery)); - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; - - TSKEY lastKey = -1; - - pQuery->fileId = -1; - vnodeFreeFieldsEx(pRuntimeEnv); - - // keep in-memory cache status in local variables in case that it may be changed by write operation - getBasicCacheInfoSnapshot(pQuery, pMeterObj->pCache, pMeterObj->vnode); - - SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - if (pCacheInfo != NULL && pCacheInfo->cacheBlocks != NULL && pQuery->numOfBlocks > 0) { - pQuery->fileId = -1; - TSKEY key = pMeterObj->lastKey; - - pQuery->skey = key; - pQuery->ekey = key; - pQuery->lastKey = pQuery->skey; - - /* - * cache block may have been flushed to disk, and no data in cache anymore. - * So, copy cache block to local buffer is required. - */ - lastKey = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, false); - if (lastKey < 0) { // data has been flushed to disk, try again search in file - lastKey = getQueryPositionForCacheInvalid(pRuntimeEnv, searchFn); - - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { - return false; - } - } - } else { // no data in cache, try file - TSKEY key = pMeterObj->lastKeyOnFile; - - pQuery->skey = key; - pQuery->ekey = key; - pQuery->lastKey = pQuery->skey; - - bool ret = getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_LESS_EQUAL, searchFn); - if (!ret) { // no data in file, return false; - return false; - } - - lastKey = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - } - - assert(lastKey <= pQuery->skey); - - pQuery->skey = lastKey; - pQuery->ekey = lastKey; - pQuery->lastKey = pQuery->skey; - - return getNeighborPoints(pSupporter, pMeterObj, pPointInterpSupporter); -} - -static int64_t getGreaterEqualTimestamp(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj * pMeterObj = pRuntimeEnv->pMeterObj; - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - return -1; - } - - TSKEY key = -1; - - SPositionInfo p = {0}; - { // todo refactor save the context - savePointPosition(&p, pQuery->fileId, pQuery->slot, pQuery->pos); - } - - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - pQuery->lastKey = pQuery->skey; - pQuery->order.order ^= 1u; - - if (getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_GREATER_EQUAL, searchFn)) { - key = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - } else { // set no data in file - key = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, false); - } - - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - pQuery->order.order ^= 1u; - pQuery->lastKey = pQuery->skey; - - pQuery->fileId = p.fileId; - pQuery->pos = p.pos; - pQuery->slot = p.slot; - - return key; -} - -/** - * determine the first query range, according to raw query range [skey, ekey] and group-by interval. - * the time interval for aggregating is not enforced to check its validation, the minimum interval is not less than - * 10ms, which is guaranteed by parser at client-side - */ -bool normalizedFirstQueryRange(bool dataInDisk, bool dataInCache, STableQuerySupportObj *pSupporter, - SPointInterpoSupporter *pPointInterpSupporter, int64_t *key) { - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj * pMeterObj = pRuntimeEnv->pMeterObj; - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - // todo: the action return as the getQueryStartPositionInCache function - if (dataInDisk && getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_GREATER_EQUAL, searchFn)) { - TSKEY nextKey = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - assert(nextKey >= pQuery->skey); - - if (key != NULL) { - *key = nextKey; - } - - return doGetQueryPos(nextKey, pSupporter, pPointInterpSupporter); - } - - // set no data in file - pQuery->fileId = -1; - SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - - /* if it is a interpolation query, the any points in cache that is greater than the query range is required */ - if (pCacheInfo == NULL || pCacheInfo->cacheBlocks == NULL || pCacheInfo->numOfBlocks == 0 || !dataInCache) { - return false; - } - - TSKEY nextKey = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, false); - - if (key != NULL) { - *key = nextKey; - } - - return doGetQueryPos(nextKey, pSupporter, pPointInterpSupporter); - - } else { // descending order - if (dataInCache) { // todo handle error - TSKEY nextKey = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, false); - assert(nextKey == -1 || nextKey <= pQuery->skey); - - if (key != NULL) { - *key = nextKey; - } - - if (nextKey != -1) { // find qualified data in cache - if (nextKey >= pQuery->ekey) { - return doSetDataInfo(pSupporter, pPointInterpSupporter, pMeterObj, nextKey); - } else { - /* - * nextKey < pQuery->ekey && nextKey < pQuery->lastKey, query range is - * larger than all data, abort - * - * NOTE: Interp query does not reach here, since for all interp query, - * the query order is ascending order. - */ - return false; - } - } else { // all data in cache are greater than pQuery->skey, try file - } - } - - if (dataInDisk && getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_LESS_EQUAL, searchFn)) { - TSKEY nextKey = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - assert(nextKey <= pQuery->skey); - - if (key != NULL) { - *key = nextKey; - } - - // key in query range. If not, no qualified in disk file - if (nextKey >= pQuery->ekey) { - return doSetDataInfo(pSupporter, pPointInterpSupporter, pMeterObj, nextKey); - } else { // In case of all queries, the value of false will be returned if key < pQuery->ekey - return false; - } - } - } - - return false; -} - -int64_t loadRequiredBlockIntoMem(SQueryRuntimeEnv *pRuntimeEnv, SPositionInfo *position) { - TSKEY nextTimestamp = -1; - - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - pQuery->fileId = position->fileId; - pQuery->slot = position->slot; - pQuery->pos = position->pos; - - if (position->fileId == -1) { - SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - if (pCacheInfo == NULL || pCacheInfo->numOfBlocks == 0 || pCacheInfo->cacheBlocks == NULL) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return -1; - } - - SCacheBlock *pBlock = getCacheDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot); - if (pBlock != NULL) { - nextTimestamp = getTimestampInCacheBlock(pRuntimeEnv, pBlock, position->pos); - } else { - // todo fix it - } - - SET_CACHE_BLOCK_FLAG(pRuntimeEnv->blockStatus); - } else { - // todo handle the file broken situation - /* - * load the file metadata into buffer first, then the specific data block. - * currently opened file is not the start file, reset to the start file - */ - int32_t fileIdx = vnodeGetVnodeHeaderFileIndex(&pQuery->fileId, pRuntimeEnv, pQuery->order.order); - if (fileIdx < 0) { // ignore the files on disk - dError("QInfo:%p failed to get data file:%d", GET_QINFO_ADDR(pQuery), pQuery->fileId); - position->fileId = -1; - return -1; - } - - /* - * NOTE: - * The compblock information may not be loaded yet, here loaded it firstly. - * If the compBlock info is loaded, it wont be loaded again. - * - * If failed to load comp block into memory due some how reasons, e.g., empty header file/not enough memory - */ - if (vnodeGetCompBlockInfo(pMeterObj, pRuntimeEnv, fileIdx) <= 0) { - position->fileId = -1; - return -1; - } - - nextTimestamp = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - } - - return nextTimestamp; -} - -static void setScanLimitationByResultBuffer(SQuery *pQuery) { - if (isTopBottomQuery(pQuery)) { - pQuery->checkBufferInLoop = 0; - } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { - pQuery->checkBufferInLoop = 0; - } else { - bool hasMultioutput = false; - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SSqlFuncExprMsg *pExprMsg = &pQuery->pSelectExpr[i].pBase; - if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) { - continue; - } - - hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus); - if (!hasMultioutput) { - break; - } - } - - pQuery->checkBufferInLoop = hasMultioutput ? 1 : 0; - } - - pQuery->pointsOffset = pQuery->pointsToRead; -} - -/* - * todo add more parameters to check soon.. - */ -bool vnodeParametersSafetyCheck(SQuery *pQuery) { - // load data column information is incorrect - for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) { - if (pQuery->colList[i].data.colId == pQuery->colList[i + 1].data.colId) { - dError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery)); - return false; - } - } - return true; -} - -static int file_order_comparator(const void *p1, const void *p2) { - SHeaderFileInfo *pInfo1 = (SHeaderFileInfo *)p1; - SHeaderFileInfo *pInfo2 = (SHeaderFileInfo *)p2; - - if (pInfo1->fileID == pInfo2->fileID) { - return 0; - } - - return (pInfo1->fileID > pInfo2->fileID) ? 1 : -1; -} - -/** - * open a data files and header file for metric meta query - * - * @param pVnodeFiles - * @param fid - * @param index - */ -static FORCE_INLINE void vnodeStoreFileId(SQueryFilesInfo *pVnodeFiles, int32_t fid, int32_t index) { - pVnodeFiles->pFileInfo[index].fileID = fid; -} - -static void vnodeRecordAllFiles(SQInfo *pQInfo, int32_t vnodeId) { - char suffix[] = ".head"; - - struct dirent *pEntry = NULL; - size_t alloc = 4; // default allocated size - - SQueryFilesInfo *pVnodeFilesInfo = &(pQInfo->pTableQuerySupporter->runtimeEnv.vnodeFileInfo); - pVnodeFilesInfo->vnodeId = vnodeId; - - sprintf(pVnodeFilesInfo->dbFilePathPrefix, "%s/vnode%d/db/", tsDirectory, vnodeId); - DIR *pDir = opendir(pVnodeFilesInfo->dbFilePathPrefix); - if (pDir == NULL) { - dError("QInfo:%p failed to open directory:%s, %s", pQInfo, pVnodeFilesInfo->dbFilePathPrefix, strerror(errno)); - return; - } - - pVnodeFilesInfo->pFileInfo = calloc(1, sizeof(SHeaderFileInfo) * alloc); - SVnodeObj *pVnode = &vnodeList[vnodeId]; - - while ((pEntry = readdir(pDir)) != NULL) { - if ((pEntry->d_name[0] == '.' && pEntry->d_name[1] == '\0') || (strcmp(pEntry->d_name, "..") == 0)) { - continue; - } - - if (pEntry->d_type & DT_DIR) { - continue; - } - - size_t len = strlen(pEntry->d_name); - if (strcasecmp(&pEntry->d_name[len - 5], suffix) != 0) { - continue; - } - - int32_t vid = 0; - int32_t fid = 0; - sscanf(pEntry->d_name, "v%df%d", &vid, &fid); - if (vid != vnodeId) { /* ignore error files */ - dError("QInfo:%p error data file:%s in vid:%d, ignore", pQInfo, pEntry->d_name, vnodeId); - continue; - } - - int32_t firstFid = pVnode->fileId - pVnode->numOfFiles + 1; - if (fid > pVnode->fileId || fid < firstFid) { - dError("QInfo:%p error data file:%s in vid:%d, fid:%d, fid range:%d-%d", pQInfo, pEntry->d_name, vnodeId, fid, - firstFid, pVnode->fileId); - continue; - } - - assert(fid >= 0 && vid >= 0); - - if (++pVnodeFilesInfo->numOfFiles > alloc) { - alloc = alloc << 1U; - pVnodeFilesInfo->pFileInfo = realloc(pVnodeFilesInfo->pFileInfo, alloc * sizeof(SHeaderFileInfo)); - memset(&pVnodeFilesInfo->pFileInfo[alloc >> 1U], 0, (alloc >> 1U) * sizeof(SHeaderFileInfo)); - } - - int32_t index = pVnodeFilesInfo->numOfFiles - 1; - vnodeStoreFileId(pVnodeFilesInfo, fid, index); - } - - closedir(pDir); - - dTrace("QInfo:%p find %d data files in %s to be checked", pQInfo, pVnodeFilesInfo->numOfFiles, - pVnodeFilesInfo->dbFilePathPrefix); - - /* order the files information according their names */ - qsort(pVnodeFilesInfo->pFileInfo, (size_t)pVnodeFilesInfo->numOfFiles, sizeof(SHeaderFileInfo), - file_order_comparator); -} - -static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SBlockInfo *pBlockInfo, void *pBlock) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - /* - * The actually qualified points that can be skipped needs to be calculated if query is - * done in current data block - */ - if ((pQuery->ekey <= pBlockInfo->keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->ekey >= pBlockInfo->keyFirst && !QUERY_IS_ASC_QUERY(pQuery))) { - // force load timestamp data blocks - if (IS_DISK_DATA_BLOCK(pQuery)) { - getTimestampInDiskBlock(pRuntimeEnv, 0); - } - - // update the pQuery->limit.offset value, and pQuery->pos value - TSKEY *keys = (TSKEY *)pRuntimeEnv->primaryColBuffer->data; - - int32_t i = 0; - if (QUERY_IS_ASC_QUERY(pQuery)) { - for (i = pQuery->pos; i < pBlockInfo->size && pQuery->limit.offset > 0; ++i) { - if (keys[i] <= pQuery->ekey) { - pQuery->limit.offset -= 1; - } else { - break; - } - } - - } else { - for (i = pQuery->pos; i >= 0 && pQuery->limit.offset > 0; --i) { - if (keys[i] >= pQuery->ekey) { - pQuery->limit.offset -= 1; - } else { - break; - } - } - } - - if (((i == pBlockInfo->size || keys[i] > pQuery->ekey) && QUERY_IS_ASC_QUERY(pQuery)) || - ((i < 0 || keys[i] < pQuery->ekey) && !QUERY_IS_ASC_QUERY(pQuery))) { - setQueryStatus(pQuery, QUERY_COMPLETED); - pQuery->pos = -1; - } else { - pQuery->pos = i; - } - } else { - if (QUERY_IS_ASC_QUERY(pQuery)) { - pQuery->pos += pQuery->limit.offset; - } else { - pQuery->pos -= pQuery->limit.offset; - } - - assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->size - 1); - - if (IS_DISK_DATA_BLOCK(pQuery)) { - pQuery->skey = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - } else { - pQuery->skey = getTimestampInCacheBlock(pRuntimeEnv, pBlock, pQuery->pos); - } - - // update the offset value - pQuery->lastKey = pQuery->skey; - pQuery->limit.offset = 0; - } -} - -// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which -// the scan order is not matter -static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - - if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG || - functionId == TSDB_FUNC_TAG_DUMMY) { - continue; - } - - if (functionId != functId && functionId != functIdDst) { - return false; - } - } - - return true; -} - -static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); } - -static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); } - -static void changeExecuteScanOrder(SQuery *pQuery, bool metricQuery) { - // in case of point-interpolation query, use asc order scan - char msg[] = - "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64 "-%" PRId64 - ", new qrange:%" PRId64 "-%" PRId64; - - // todo handle the case the the order irrelevant query type mixed up with order critical query type - // descending order query for last_row query - if (isFirstLastRowQuery(pQuery)) { - dTrace("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery), - pQuery->order.order, TSQL_SO_DESC); - - pQuery->order.order = TSQL_SO_DESC; - - int64_t skey = MIN(pQuery->skey, pQuery->ekey); - int64_t ekey = MAX(pQuery->skey, pQuery->ekey); - - pQuery->skey = ekey; - pQuery->ekey = skey; - - return; - } - - if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) { - if (!QUERY_IS_ASC_QUERY(pQuery)) { - dTrace(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSQL_SO_ASC, pQuery->skey, pQuery->ekey, - pQuery->ekey, pQuery->skey); - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - } - - pQuery->order.order = TSQL_SO_ASC; - return; - } - - if (pQuery->intervalTime == 0) { - if (onlyFirstQuery(pQuery)) { - if (!QUERY_IS_ASC_QUERY(pQuery)) { - dTrace(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSQL_SO_ASC, pQuery->skey, pQuery->ekey, - pQuery->ekey, pQuery->skey); - - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - } - - pQuery->order.order = TSQL_SO_ASC; - } else if (onlyLastQuery(pQuery)) { - if (QUERY_IS_ASC_QUERY(pQuery)) { - dTrace(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSQL_SO_DESC, pQuery->skey, pQuery->ekey, - pQuery->ekey, pQuery->skey); - - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - } - - pQuery->order.order = TSQL_SO_DESC; - } - - } else { // interval query - if (metricQuery) { - if (onlyFirstQuery(pQuery)) { - if (!QUERY_IS_ASC_QUERY(pQuery)) { - dTrace(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSQL_SO_ASC, pQuery->skey, - pQuery->ekey, pQuery->ekey, pQuery->skey); - - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - } - - pQuery->order.order = TSQL_SO_ASC; - } else if (onlyLastQuery(pQuery)) { - if (QUERY_IS_ASC_QUERY(pQuery)) { - dTrace(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSQL_SO_DESC, pQuery->skey, - pQuery->ekey, pQuery->ekey, pQuery->skey); - - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - } - - pQuery->order.order = TSQL_SO_DESC; - } - } - } -} - -static int32_t doSkipDataBlock(SQueryRuntimeEnv *pRuntimeEnv) { - SMeterObj * pMeterObj = pRuntimeEnv->pMeterObj; - SQuery * pQuery = pRuntimeEnv->pQuery; - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; - - while (1) { - moveToNextBlock(pRuntimeEnv, step, searchFn, false); - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)) { - break; - } - - void *pBlock = getGenericDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot); - assert(pBlock != NULL); - - SBlockInfo blockInfo = getBlockInfo(pRuntimeEnv); - - int32_t maxReads = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.size - pQuery->pos : pQuery->pos + 1; - assert(maxReads >= 0); - - if (pQuery->limit.offset < maxReads || (pQuery->ekey <= blockInfo.keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->ekey >= blockInfo.keyFirst && !QUERY_IS_ASC_QUERY(pQuery))) { // start position in current block - updateOffsetVal(pRuntimeEnv, &blockInfo, pBlock); - break; - } else { - pQuery->limit.offset -= maxReads; - pQuery->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.keyLast : blockInfo.keyFirst; - pQuery->lastKey += step; - - qTrace("QInfo:%p skip rows:%d, offset:%" PRId64 "", GET_QINFO_ADDR(pQuery), maxReads, pQuery->limit.offset); - } - } - - return 0; -} - -void forwardQueryStartPosition(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - if (pQuery->limit.offset <= 0) { - return; - } - - void *pBlock = getGenericDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot); - assert(pBlock != NULL); - - SBlockInfo blockInfo = getBlockInfo(pRuntimeEnv); - - // get the qualified data that can be skipped - int32_t maxReads = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.size - pQuery->pos : pQuery->pos + 1; - - if (pQuery->limit.offset < maxReads || (pQuery->ekey <= blockInfo.keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->ekey >= blockInfo.keyFirst && !QUERY_IS_ASC_QUERY(pQuery))) { // start position in current block - updateOffsetVal(pRuntimeEnv, &blockInfo, pBlock); - } else { - pQuery->limit.offset -= maxReads; - - // update the lastkey, since the following skip operation may traverse to another media. update the lastkey first. - pQuery->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.keyLast + 1 : blockInfo.keyFirst - 1; - doSkipDataBlock(pRuntimeEnv); - } -} - -static bool forwardQueryStartPosIfNeeded(SQInfo *pQInfo, STableQuerySupportObj *pSupporter, bool dataInDisk, - bool dataInCache) { - SQuery * pQuery = &pQInfo->query; - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - - /* if queried with value filter, do NOT forward query start position */ - if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) { - return true; - } - - if (pQuery->limit.offset > 0 && (!isTopBottomQuery(pQuery)) && pQuery->interpoType == TSDB_INTERPO_NONE) { - /* - * 1. for top/bottom query, the offset applies to the final result, not here - * 2. for interval without interpolation query we forward pQuery->intervalTime at a time for - * pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is - * not valid. otherwise, we only forward pQuery->limit.offset number of points - */ - if (isIntervalQuery(pQuery)) { - int16_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pRuntimeEnv->pMeterObj->searchAlgorithm]; - SWindowResInfo * pWindowResInfo = &pRuntimeEnv->windowResInfo; - - TSKEY * primaryKey = (TSKEY *)pRuntimeEnv->primaryColBuffer->data; - STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery); - - while (pQuery->limit.offset > 0) { - SBlockInfo blockInfo = getBlockInfo(pRuntimeEnv); - - STimeWindow tw = win; - getNextTimeWindow(pQuery, &tw); - - // next time window starts from current data block - if ((tw.skey <= blockInfo.keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (tw.ekey >= blockInfo.keyFirst && !QUERY_IS_ASC_QUERY(pQuery))) { - - // query completed - if ((tw.skey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (tw.ekey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - setQueryStatus(pQuery, QUERY_COMPLETED); - break; - } - - // check its position in this block to make sure this time window covers data. - if (IS_DISK_DATA_BLOCK(pQuery)) { - getTimestampInDiskBlock(pRuntimeEnv, 0); - } - - tw = win; - int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &tw, pWindowResInfo, &blockInfo, primaryKey, searchFn); - assert(startPos >= 0); - - pQuery->limit.offset -= 1; - - // set the abort info - pQuery->pos = startPos; - pQuery->lastKey = primaryKey[startPos]; - pWindowResInfo->prevSKey = tw.skey; - win = tw; - continue; - } else { - moveToNextBlock(pRuntimeEnv, step, searchFn, false); - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)) { - break; - } - - blockInfo = getBlockInfo(pRuntimeEnv); - if ((blockInfo.keyFirst > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (blockInfo.keyLast < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - setQueryStatus(pQuery, QUERY_COMPLETED); - break; - } - - // set the window that start from the next data block - TSKEY key = (QUERY_IS_ASC_QUERY(pQuery))? blockInfo.keyFirst:blockInfo.keyLast; - STimeWindow n = getActiveTimeWindow(pWindowResInfo, key, pQuery); - - // next data block are still covered by current time window - if (n.skey == win.skey && n.ekey == win.ekey) { - // do nothing - } else { - pQuery->limit.offset -= 1; - - // query completed - if ((n.skey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (n.ekey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - setQueryStatus(pQuery, QUERY_COMPLETED); - break; - } - - // set the abort info - pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0:blockInfo.size-1; - pQuery->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.keyFirst:blockInfo.keyLast; - pWindowResInfo->prevSKey = n.skey; - - win = n; - - if (pQuery->limit.offset == 0 && IS_DISK_DATA_BLOCK(pQuery)) { - getTimestampInDiskBlock(pRuntimeEnv, 0); - } - } - } - -// if (win.ekey <= blockInfo.keyLast) { -// pQuery->limit.offset -= 1; -// -// if (win.ekey == blockInfo.keyLast) { -// moveToNextBlock(pRuntimeEnv, step, searchFn, false); -// if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)) { -// break; -// } -// -// // next block does not included in time range, abort query -// blockInfo = getBlockInfo(pRuntimeEnv); -// if ((blockInfo.keyFirst > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || -// (blockInfo.keyLast < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { -// setQueryStatus(pQuery, QUERY_COMPLETED); -// break; -// } -// -// // set the window that start from the next data block -// win = getActiveTimeWindow(pWindowResInfo, blockInfo.keyFirst, pQuery); -// } else { -// // the time window is closed in current data block, load disk file block into memory to -// // check the next time window -// if (IS_DISK_DATA_BLOCK(pQuery)) { -// getTimestampInDiskBlock(pRuntimeEnv, 0); -// } -// -// STimeWindow nextWin = win; -// int32_t startPos = -// getNextQualifiedWindow(pRuntimeEnv, &nextWin, pWindowResInfo, &blockInfo, primaryKey, searchFn); -// -// if (startPos < 0) { // failed to find the qualified time window -// assert((nextWin.skey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || -// (nextWin.ekey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))); -// -// setQueryStatus(pQuery, QUERY_COMPLETED); -// break; -// } else { // set the abort info -// pQuery->pos = startPos; -// pQuery->lastKey = primaryKey[startPos]; -// win = nextWin; -// } -// } -// -// continue; -// } -// -// moveToNextBlock(pRuntimeEnv, step, searchFn, false); -// if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)) { -// break; -// } -// -// blockInfo = getBlockInfo(pRuntimeEnv); -// if ((blockInfo.keyFirst > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || -// (blockInfo.keyLast < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { -// setQueryStatus(pQuery, QUERY_COMPLETED); -// break; -// } - } - - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED) || pQuery->limit.offset > 0) { - setQueryStatus(pQuery, QUERY_COMPLETED); - - sem_post(&pQInfo->dataReady); // hack for next read for empty return; - pQInfo->over = 1; - return false; - } else { - if (IS_DISK_DATA_BLOCK(pQuery)) { - getTimestampInDiskBlock(pRuntimeEnv, 0); - } - } - } else { // forward the start position for projection query - forwardQueryStartPosition(&pSupporter->runtimeEnv); - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)) { - setQueryStatus(pQuery, QUERY_COMPLETED); - - sem_post(&pQInfo->dataReady); // hack for next read for empty return; - pQInfo->over = 1; - return false; - } - } - } - - return true; -} - -static void doSetInterpVal(SQLFunctionCtx *pCtx, TSKEY ts, int16_t type, int32_t index, char *data) { - assert(pCtx->param[index].pz == NULL); - - int32_t len = 0; - size_t t = 0; - - if (type == TSDB_DATA_TYPE_BINARY) { - t = strlen(data); - - len = t + 1 + TSDB_KEYSIZE; - pCtx->param[index].pz = calloc(1, len); - } else if (type == TSDB_DATA_TYPE_NCHAR) { - t = wcslen((const wchar_t *)data); - - len = (t + 1) * TSDB_NCHAR_SIZE + TSDB_KEYSIZE; - pCtx->param[index].pz = calloc(1, len); - } else { - len = TSDB_KEYSIZE * 2; - pCtx->param[index].pz = malloc(len); - } - - pCtx->param[index].nType = TSDB_DATA_TYPE_BINARY; - - char *z = pCtx->param[index].pz; - *(TSKEY *)z = ts; - z += TSDB_KEYSIZE; - - switch (type) { - case TSDB_DATA_TYPE_FLOAT: - *(double *)z = GET_FLOAT_VAL(data); - break; - case TSDB_DATA_TYPE_DOUBLE: - *(double *)z = GET_DOUBLE_VAL(data); - break; - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TIMESTAMP: - *(int64_t *)z = GET_INT64_VAL(data); - break; - case TSDB_DATA_TYPE_BINARY: - strncpy(z, data, t); - break; - case TSDB_DATA_TYPE_NCHAR: { - wcsncpy((wchar_t *)z, (const wchar_t *)data, t); - } break; - default: - assert(0); - } - - pCtx->param[index].nLen = len; -} - -/** - * param[1]: default value/previous value of specified timestamp - * param[2]: next value of specified timestamp - * param[3]: denotes if the result is a precious result or interpolation results - * - * @param pQInfo - * @param pSupporter - * @param pInterpoRaw - */ -void pointInterpSupporterSetData(SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupport) { - // not point interpolation query, abort - if (!isPointInterpoQuery(&pQInfo->query)) { - return; - } - - SQuery * pQuery = &pQInfo->query; - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - - int32_t count = 1; - TSKEY key = *(TSKEY *)pPointInterpSupport->pNextPoint[0]; - - if (key == pSupporter->rawSKey) { - // the queried timestamp has value, return it directly without interpolation - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - tVariantCreateFromBinary(&pRuntimeEnv->pCtx[i].param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT); - - pRuntimeEnv->pCtx[i].param[0].i64Key = key; - pRuntimeEnv->pCtx[i].param[0].nType = TSDB_DATA_TYPE_BIGINT; - } - } else { - // set the direct previous(next) point for process - count = 2; - - if (pQuery->interpoType == TSDB_INTERPO_SET_VALUE) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - - // only the function of interp needs the corresponding information - if (pCtx->functionId != TSDB_FUNC_INTERP) { - continue; - } - - pCtx->numOfParams = 4; - - SInterpInfo *pInterpInfo = (SInterpInfo *)pRuntimeEnv->pCtx[i].aOutputBuf; - pInterpInfo->pInterpDetail = calloc(1, sizeof(SInterpInfoDetail)); - - SInterpInfoDetail *pInterpDetail = pInterpInfo->pInterpDetail; - - // for primary timestamp column, set the flag - if (pQuery->pSelectExpr[i].pBase.colInfo.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { - pInterpDetail->primaryCol = 1; - } - - tVariantCreateFromBinary(&pCtx->param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT); - - if (isNull((char *)&pQuery->defaultVal[i], pCtx->inputType)) { - pCtx->param[1].nType = TSDB_DATA_TYPE_NULL; - } else { - tVariantCreateFromBinary(&pCtx->param[1], (char *)&pQuery->defaultVal[i], pCtx->inputBytes, pCtx->inputType); - } - - pInterpDetail->ts = pSupporter->rawSKey; - pInterpDetail->type = pQuery->interpoType; - } - } else { - TSKEY prevKey = *(TSKEY *)pPointInterpSupport->pPrevPoint[0]; - TSKEY nextKey = *(TSKEY *)pPointInterpSupport->pNextPoint[0]; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - - // tag column does not need the interp environment - if (pQuery->pSelectExpr[i].pBase.functionId == TSDB_FUNC_TAG) { - continue; - } - - int32_t colInBuf = pQuery->pSelectExpr[i].pBase.colInfo.colIdxInBuf; - - SInterpInfo *pInterpInfo = (SInterpInfo *)pRuntimeEnv->pCtx[i].aOutputBuf; - - pInterpInfo->pInterpDetail = calloc(1, sizeof(SInterpInfoDetail)); - SInterpInfoDetail *pInterpDetail = pInterpInfo->pInterpDetail; - - int32_t type = GET_COLUMN_TYPE(pQuery, i); - - // for primary timestamp column, set the flag - if (pQuery->pSelectExpr[i].pBase.colInfo.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { - pInterpDetail->primaryCol = 1; - } else { - doSetInterpVal(pCtx, prevKey, type, 1, pPointInterpSupport->pPrevPoint[colInBuf]); - doSetInterpVal(pCtx, nextKey, type, 2, pPointInterpSupport->pNextPoint[colInBuf]); - } - - tVariantCreateFromBinary(&pRuntimeEnv->pCtx[i].param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT); - - pInterpDetail->ts = pSupporter->rawSKey; - pInterpDetail->type = pQuery->interpoType; - } - } - } -} - -void pointInterpSupporterInit(SQuery *pQuery, SPointInterpoSupporter *pInterpoSupport) { - if (isPointInterpoQuery(pQuery)) { - pInterpoSupport->pPrevPoint = malloc(pQuery->numOfCols * POINTER_BYTES); - pInterpoSupport->pNextPoint = malloc(pQuery->numOfCols * POINTER_BYTES); - - pInterpoSupport->numOfCols = pQuery->numOfCols; - - /* get appropriated size for one row data source*/ - int32_t len = 0; - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - len += pQuery->colList[i].data.bytes; - } - - assert(PRIMARY_TSCOL_LOADED(pQuery)); - - void *prev = calloc(1, len); - void *next = calloc(1, len); - - int32_t offset = 0; - - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - pInterpoSupport->pPrevPoint[i] = prev + offset; - pInterpoSupport->pNextPoint[i] = next + offset; - - offset += pQuery->colList[i].data.bytes; - } - } -} - -void pointInterpSupporterDestroy(SPointInterpoSupporter *pPointInterpSupport) { - if (pPointInterpSupport->numOfCols <= 0 || pPointInterpSupport->pPrevPoint == NULL) { - return; - } - - tfree(pPointInterpSupport->pPrevPoint[0]); - tfree(pPointInterpSupport->pNextPoint[0]); - - tfree(pPointInterpSupport->pPrevPoint); - tfree(pPointInterpSupport->pNextPoint); - - pPointInterpSupport->numOfCols = 0; -} - -static void allocMemForInterpo(STableQuerySupportObj *pSupporter, SQuery *pQuery, SMeterObj *pMeterObj) { - if (pQuery->interpoType != TSDB_INTERPO_NONE) { - assert(isIntervalQuery(pQuery) || (pQuery->intervalTime == 0 && isPointInterpoQuery(pQuery))); - - if (isIntervalQuery(pQuery)) { - pSupporter->runtimeEnv.pInterpoBuf = malloc(POINTER_BYTES * pQuery->numOfOutputCols); - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - pSupporter->runtimeEnv.pInterpoBuf[i] = - calloc(1, sizeof(tFilePage) + pQuery->pSelectExpr[i].resBytes * pMeterObj->pointsPerFileBlock); - } - } - } -} - -static int32_t getInitialPageNum(STableQuerySupportObj *pSupporter) { - SQuery *pQuery = pSupporter->runtimeEnv.pQuery; - - int32_t num = 0; - - if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { - num = 128; - } else if (isIntervalQuery(pQuery)) { // time window query, allocate one page for each table - num = pSupporter->numOfMeters; - } else { // for super table query, one page for each subset - num = pSupporter->pSidSet->numOfSubSet; - } - - assert(num > 0); - - return num; -} - -static int32_t allocateRuntimeEnvBuf(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj *pMeterObj) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - // To make sure the start position of each buffer is aligned to 4bytes in 32-bit ARM system. - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - int32_t bytes = pQuery->colList[i].data.bytes; - pRuntimeEnv->colDataBuffer[i] = calloc(1, sizeof(SData) + EXTRA_BYTES + pMeterObj->pointsPerFileBlock * bytes); - if (pRuntimeEnv->colDataBuffer[i] == NULL) { - goto _error_clean; - } - } - - // record the maximum column width among columns of this meter/metric - int32_t maxColWidth = pQuery->colList[0].data.bytes; - for (int32_t i = 1; i < pQuery->numOfCols; ++i) { - int32_t bytes = pQuery->colList[i].data.bytes; - if (bytes > maxColWidth) { - maxColWidth = bytes; - } - } - - pRuntimeEnv->primaryColBuffer = NULL; - if (PRIMARY_TSCOL_LOADED(pQuery)) { - pRuntimeEnv->primaryColBuffer = pRuntimeEnv->colDataBuffer[0]; - } else { - pRuntimeEnv->primaryColBuffer = - (SData *)malloc(pMeterObj->pointsPerFileBlock * TSDB_KEYSIZE + sizeof(SData) + EXTRA_BYTES); - } - - pRuntimeEnv->unzipBufSize = (size_t)(maxColWidth * pMeterObj->pointsPerFileBlock + EXTRA_BYTES); // plus extra_bytes - - pRuntimeEnv->unzipBuffer = (char *)calloc(1, pRuntimeEnv->unzipBufSize); - pRuntimeEnv->secondaryUnzipBuffer = (char *)calloc(1, pRuntimeEnv->unzipBufSize); - - if (pRuntimeEnv->unzipBuffer == NULL || pRuntimeEnv->secondaryUnzipBuffer == NULL || - pRuntimeEnv->primaryColBuffer == NULL) { - goto _error_clean; - } - - return TSDB_CODE_SUCCESS; - -_error_clean: - for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfCols; ++i) { - tfree(pRuntimeEnv->colDataBuffer[i]); - } - - tfree(pRuntimeEnv->unzipBuffer); - tfree(pRuntimeEnv->secondaryUnzipBuffer); - - if (!PRIMARY_TSCOL_LOADED(pQuery)) { - tfree(pRuntimeEnv->primaryColBuffer); - } - - return TSDB_CODE_SERV_OUT_OF_MEMORY; -} - -static int32_t getRowParamForMultiRowsOutput(SQuery *pQuery, bool isSTableQuery) { - int32_t rowparam = 1; - - if (isTopBottomQuery(pQuery) && (!isSTableQuery)) { - rowparam = pQuery->pSelectExpr[1].pBase.arg->argValue.i64; - } - - return rowparam; -} - -static int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool isSTableQuery) { - int32_t rowSize = pQuery->rowSize * getRowParamForMultiRowsOutput(pQuery, isSTableQuery); - return (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / rowSize; -} - -static char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) { - assert(pResult != NULL && pRuntimeEnv != NULL); - - SQuery * pQuery = pRuntimeEnv->pQuery; - tFilePage *page = getResultBufferPageById(pRuntimeEnv->pResultBuf, pResult->pos.pageId); - - int32_t numOfRows = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->stableQuery); - int32_t realRowId = pResult->pos.rowId * getRowParamForMultiRowsOutput(pQuery, pRuntimeEnv->stableQuery); - - return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * numOfRows + - pQuery->pSelectExpr[columnIndex].resBytes * realRowId; -} - -int32_t vnodeQueryTablePrepare(SQInfo *pQInfo, SMeterObj *pMeterObj, STableQuerySupportObj *pSupporter, void *param) { - SQuery *pQuery = &pQInfo->query; - int32_t code = TSDB_CODE_SUCCESS; - - /* - * only the successful complete requries the sem_post/over = 1 operations. - */ - if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->skey > pQuery->ekey)) || - (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->ekey > pQuery->skey))) { - dTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->skey, pQuery->ekey, - pQuery->order.order); - - sem_post(&pQInfo->dataReady); - pQInfo->over = 1; - return TSDB_CODE_SUCCESS; - } - - setScanLimitationByResultBuffer(pQuery); - changeExecuteScanOrder(pQuery, false); - - pQInfo->over = 0; - pQInfo->pointsRead = 0; - pQuery->pointsRead = 0; - - // dataInCache requires lastKey value - pQuery->lastKey = pQuery->skey; - - doInitQueryFileInfoFD(&pSupporter->runtimeEnv.vnodeFileInfo); - - vnodeInitDataBlockInfo(&pSupporter->runtimeEnv.loadBlockInfo); - vnodeInitLoadCompBlockInfo(&pSupporter->runtimeEnv.loadCompBlockInfo); - - // check data in file or cache - bool dataInCache = true; - bool dataInDisk = true; - - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - pRuntimeEnv->pQuery = pQuery; - pRuntimeEnv->pMeterObj = pMeterObj; - - if ((code = allocateRuntimeEnvBuf(pRuntimeEnv, pMeterObj)) != TSDB_CODE_SUCCESS) { - return code; - } - - vnodeCheckIfDataExists(pRuntimeEnv, pMeterObj, &dataInDisk, &dataInCache); - - /* data in file or cache is not qualified for the query. abort */ - if (!(dataInCache || dataInDisk)) { - dTrace("QInfo:%p no result in query", pQInfo); - sem_post(&pQInfo->dataReady); - pQInfo->over = 1; - return code; - } - - pRuntimeEnv->pTSBuf = param; - pRuntimeEnv->cur.vnodeIndex = -1; - if (param != NULL) { - int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSQL_SO_ASC : TSQL_SO_DESC; - tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order); - } - - // create runtime environment - code = setupQueryRuntimeEnv(pMeterObj, pQuery, &pSupporter->runtimeEnv, NULL, pQuery->order.order, false); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - vnodeRecordAllFiles(pQInfo, pMeterObj->vnode); - - pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, false); - if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) { - int32_t rows = getInitialPageNum(pSupporter); - - code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - int16_t type = TSDB_DATA_TYPE_NULL; - if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { - type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr); - } else { - type = TSDB_DATA_TYPE_TIMESTAMP; - } - - initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type); - } - - pSupporter->rawSKey = pQuery->skey; - pSupporter->rawEKey = pQuery->ekey; - - /* query on single table */ - pSupporter->numOfMeters = 1; - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); - - SPointInterpoSupporter interpInfo = {0}; - pointInterpSupporterInit(pQuery, &interpInfo); - - /* - * in case of last_row query without query range, we set the query timestamp to - * pMeterObj->lastKey. Otherwise, keep the initial query time range unchanged. - */ - - if (isFirstLastRowQuery(pQuery) && notHasQueryTimeRange(pQuery)) { - if (!normalizeUnBoundLastRowQuery(pSupporter, &interpInfo)) { - sem_post(&pQInfo->dataReady); - pQInfo->over = 1; - - pointInterpSupporterDestroy(&interpInfo); - return TSDB_CODE_SUCCESS; - } - } else { // find the skey and ekey in case of sliding query - if (isIntervalQuery(pQuery)) { - STimeWindow win = {0}; - - // find the minimum value for descending order query - TSKEY minKey = -1; - if (!QUERY_IS_ASC_QUERY(pQuery)) { - minKey = getGreaterEqualTimestamp(pRuntimeEnv); - } - - int64_t skey = 0; - if ((normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &interpInfo, &skey) == false) || - (isFixedOutputQuery(pQuery) && !isTopBottomQuery(pQuery) && (pQuery->limit.offset > 0)) || - (isTopBottomQuery(pQuery) && pQuery->limit.offset >= pQuery->pSelectExpr[1].pBase.arg[0].argValue.i64)) { - sem_post(&pQInfo->dataReady); - pQInfo->over = 1; - - pointInterpSupporterDestroy(&interpInfo); - return TSDB_CODE_SUCCESS; - } - - if (!QUERY_IS_ASC_QUERY(pQuery)) { - win.skey = minKey; - win.ekey = skey; - } else { - win.skey = skey; - win.ekey = pQuery->ekey; - } - - TSKEY skey1, ekey1; - TSKEY windowSKey = 0, windowEKey = 0; - - doGetAlignedIntervalQueryRangeImpl(pQuery, win.skey, win.skey, win.ekey, &skey1, &ekey1, &windowSKey, - &windowEKey); - pRuntimeEnv->windowResInfo.startTime = windowSKey; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - pRuntimeEnv->windowResInfo.prevSKey = windowSKey; - } else { - pRuntimeEnv->windowResInfo.prevSKey = - windowSKey + ((win.ekey - windowSKey) / pQuery->slidingTime) * pQuery->slidingTime; - } - - pQuery->over = QUERY_NOT_COMPLETED; - } else { - int64_t ekey = 0; - if ((normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &interpInfo, &ekey) == false) || - (isFixedOutputQuery(pQuery) && !isTopBottomQuery(pQuery) && (pQuery->limit.offset > 0)) || - (isTopBottomQuery(pQuery) && pQuery->limit.offset >= pQuery->pSelectExpr[1].pBase.arg[0].argValue.i64)) { - sem_post(&pQInfo->dataReady); - pQInfo->over = 1; - - pointInterpSupporterDestroy(&interpInfo); - return TSDB_CODE_SUCCESS; - } - } - } - - /* - * here we set the value for before and after the specified time into the - * parameter for interpolation query - */ - pointInterpSupporterSetData(pQInfo, &interpInfo); - pointInterpSupporterDestroy(&interpInfo); - - if (!forwardQueryStartPosIfNeeded(pQInfo, pSupporter, dataInDisk, dataInCache)) { - return TSDB_CODE_SUCCESS; - } - - int64_t rs = taosGetIntervalStartTimestamp(pSupporter->rawSKey, pQuery->intervalTime, pQuery->intervalTimeUnit, - pQuery->precision); - taosInitInterpoInfo(&pRuntimeEnv->interpoInfo, pQuery->order.order, rs, 0, 0); - allocMemForInterpo(pSupporter, pQuery, pMeterObj); - - if (!isPointInterpoQuery(pQuery)) { - assert(pQuery->pos >= 0 && pQuery->slot >= 0); - } - - // the pQuery->skey is changed during normalizedFirstQueryRange, so set the newest lastkey value - pQuery->lastKey = pQuery->skey; - pRuntimeEnv->stableQuery = false; - - return TSDB_CODE_SUCCESS; -} - -void vnodeQueryFreeQInfoEx(SQInfo *pQInfo) { - if (pQInfo == NULL || pQInfo->pTableQuerySupporter == NULL) { - return; - } - - SQuery * pQuery = &pQInfo->query; - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - teardownQueryRuntimeEnv(&pSupporter->runtimeEnv); - tfree(pSupporter->pMeterSidExtInfo); - - if (pSupporter->pMetersHashTable != NULL) { - taosHashCleanup(pSupporter->pMetersHashTable); - pSupporter->pMetersHashTable = NULL; - } - - if (pSupporter->pSidSet != NULL || isGroupbyNormalCol(pQInfo->query.pGroupbyExpr) || - isIntervalQuery(pQuery)) { - int32_t size = 0; - if (isGroupbyNormalCol(pQInfo->query.pGroupbyExpr) || isIntervalQuery(pQuery)) { - size = 10000; - } else if (pSupporter->pSidSet != NULL) { - size = pSupporter->pSidSet->numOfSubSet; - } - - for (int32_t i = 0; i < size; ++i) { - // destroyTimeWindowRes(&pSupporter->pResult[i], pQInfo->query.numOfOutputCols); - } - } - - tSidSetDestroy(&pSupporter->pSidSet); - - if (pSupporter->pMeterDataInfo != NULL) { - for (int32_t j = 0; j < pSupporter->numOfMeters; ++j) { - destroyMeterQueryInfo(pSupporter->pMeterDataInfo[j].pMeterQInfo, pQuery->numOfOutputCols); - free(pSupporter->pMeterDataInfo[j].pBlock); - } - } - - tfree(pSupporter->pMeterDataInfo); - - tfree(pQInfo->pTableQuerySupporter); -} - -int32_t vnodeSTableQueryPrepare(SQInfo *pQInfo, SQuery *pQuery, void *param) { - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->skey > pQuery->ekey)) || - (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->ekey > pQuery->skey))) { - dTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->skey, pQuery->ekey, - pQuery->order.order); - - sem_post(&pQInfo->dataReady); - pQInfo->over = 1; - - return TSDB_CODE_SUCCESS; - } - - pQInfo->over = 0; - pQInfo->pointsRead = 0; - pQuery->pointsRead = 0; - - changeExecuteScanOrder(pQuery, true); - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - - doInitQueryFileInfoFD(&pRuntimeEnv->vnodeFileInfo); - vnodeInitDataBlockInfo(&pRuntimeEnv->loadBlockInfo); - vnodeInitLoadCompBlockInfo(&pRuntimeEnv->loadCompBlockInfo); - - /* - * since we employ the output control mechanism in main loop. - * so, disable it during data block scan procedure. - */ - setScanLimitationByResultBuffer(pQuery); - - // save raw query range for applying to each subgroup - pSupporter->rawEKey = pQuery->ekey; - pSupporter->rawSKey = pQuery->skey; - pQuery->lastKey = pQuery->skey; - - // create runtime environment - SColumnModel *pTagSchemaInfo = pSupporter->pSidSet->pColumnModel; - - // get one queried meter - SMeterObj *pTable = getMeterObj(pSupporter->pMetersHashTable, pSupporter->pSidSet->pSids[0]->sid); - - pRuntimeEnv->pTSBuf = param; - pRuntimeEnv->cur.vnodeIndex = -1; - - // set the ts-comp file traverse order - if (param != NULL) { - int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSQL_SO_ASC : TSQL_SO_DESC; - tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order); - } - - int32_t ret = setupQueryRuntimeEnv(pMeter, pQuery, &pSupporter->runtimeEnv, pTagSchemaInfo, TSQL_SO_ASC, true); - if (ret != TSDB_CODE_SUCCESS) { - return ret; - } - - ret = allocateRuntimeEnvBuf(pRuntimeEnv, pTable); - if (ret != TSDB_CODE_SUCCESS) { - return ret; - } - - tSidSetSort(pSupporter->pSidSet); - vnodeRecordAllFiles(pQInfo, pTable->vnode); - - int32_t size = getInitialPageNum(pSupporter); - ret = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, size, pQuery->rowSize); - if (ret != TSDB_CODE_SUCCESS) { - return ret; - } - - if (pQuery->intervalTime == 0) { - int16_t type = TSDB_DATA_TYPE_NULL; - - if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // group by columns not tags; - type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr); - } else { - type = TSDB_DATA_TYPE_INT; // group id - } - - initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type); - } - - pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, true); - - // metric query do not invoke interpolation, it will be done at the second-stage merge - if (!isPointInterpoQuery(pQuery)) { - pQuery->interpoType = TSDB_INTERPO_NONE; - } - - TSKEY revisedStime = taosGetIntervalStartTimestamp(pSupporter->rawSKey, pQuery->intervalTime, - pQuery->intervalTimeUnit, pQuery->precision); - taosInitInterpoInfo(&pRuntimeEnv->interpoInfo, pQuery->order.order, revisedStime, 0, 0); - pRuntimeEnv->stableQuery = true; - - return TSDB_CODE_SUCCESS; -} - -/** - * decrease the refcount for each table involved in this query - * @param pQInfo - */ -void vnodeDecMeterRefcnt(SQInfo *pQInfo) { - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - if (pSupporter == NULL || pSupporter->numOfMeters == 1) { - atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1); - dTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode, - pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries); - } else { - int32_t num = 0; - for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) { - SMeterObj *pTable = getMeterObj(pSupporter->pMetersHashTable, pSupporter->pSidSet->pSids[i]->sid); - atomic_fetch_sub_32(&(pTable->numOfQueries), 1); - - if (pTable->numOfQueries > 0) { - dTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pTable->vnode, pTable->sid, - pTable->meterId, pTable->numOfQueries); - num++; - } - } - - /* - * in order to reduce log output, for all meters of which numOfQueries count are 0, - * we do not output corresponding information - */ - num = pSupporter->numOfMeters - num; - dTrace("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo, - pSupporter->numOfMeters, num); - } -} - -TSKEY getTimestampInCacheBlock(SQueryRuntimeEnv *pRuntimeEnv, SCacheBlock *pBlock, int32_t index) { - if (pBlock == NULL || index >= pBlock->numOfPoints || index < 0) { - return -1; - } - - return ((TSKEY *)(pRuntimeEnv->primaryColBuffer->data))[index]; -} - -/* - * NOTE: pQuery->pos will not change, the corresponding data block will be loaded into buffer - * loadDataBlockOnDemand will change the value of pQuery->pos, according to the pQuery->lastKey - */ -TSKEY getTimestampInDiskBlock(SQueryRuntimeEnv *pRuntimeEnv, int32_t index) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - /* - * the corresponding compblock info has been loaded already - * todo add check for compblock loaded - */ - SCompBlock *pBlock = getDiskDataBlock(pQuery, pQuery->slot); - - // this block must be loaded into buffer - SLoadDataBlockInfo *pLoadInfo = &pRuntimeEnv->loadBlockInfo; - assert(pQuery->pos >= 0 && pQuery->pos < pBlock->numOfPoints); - - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - int32_t fileIndex = vnodeGetVnodeHeaderFileIndex(&pQuery->fileId, pRuntimeEnv, pQuery->order.order); - - dTrace("QInfo:%p vid:%d sid:%d id:%s, fileId:%d, slot:%d load data block due to primary key required", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->fileId, pQuery->slot); - - bool loadTS = true; - bool loadFields = true; - int32_t slot = pQuery->slot; - - int32_t ret = loadDataBlockIntoMem(pBlock, &pQuery->pFields[slot], pRuntimeEnv, fileIndex, loadTS, loadFields); - if (ret != TSDB_CODE_SUCCESS) { - return -1; - } - - SET_DATA_BLOCK_LOADED(pRuntimeEnv->blockStatus); - SET_FILE_BLOCK_FLAG(pRuntimeEnv->blockStatus); - - assert(pQuery->fileId == pLoadInfo->fileId && pQuery->slot == pLoadInfo->slotIdx); - return ((TSKEY *)pRuntimeEnv->primaryColBuffer->data)[index]; -} - -// todo remove this function -static TSKEY getFirstDataBlockInCache(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - assert(pQuery->fileId == -1 && QUERY_IS_ASC_QUERY(pQuery)); - - /* - * get the start position in cache according to the pQuery->lastkey - * - * In case of cache and disk file data overlaps and all required data are commit to disk file, - * there are no qualified data available in cache, we need to set the QUERY_COMPLETED flag. - * - * If cache data and disk-based data are not completely overlapped, cacheBoundaryCheck function will set the - * correct status flag. - */ - TSKEY nextTimestamp = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, true); - if (nextTimestamp < 0) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - } else if (nextTimestamp > pQuery->ekey) { - setQueryStatus(pQuery, QUERY_COMPLETED); - } - - return nextTimestamp; -} - -TSKEY getQueryPositionForCacheInvalid(SQueryRuntimeEnv *pRuntimeEnv, __block_search_fn_t searchFn) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SQInfo * pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - - dTrace( - "QInfo:%p vid:%d sid:%d id:%s cache block re-allocated to other meter, " - "try get query start position in file/cache, qrange:%" PRId64 "-%" PRId64 ", lastKey:%" PRId64, - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, pQuery->lastKey); - - if (step == QUERY_DESC_FORWARD_STEP) { - /* - * In descending order query, if the cache is invalid, it must be flushed to disk. - * Try to find the appropriate position in file, and no need to search cache any more. - */ - bool ret = getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_LESS_EQUAL, searchFn); - - dTrace("QInfo:%p vid:%d sid:%d id:%s find the possible position in file, fileId:%d, slot:%d, pos:%d", pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->fileId, pQuery->slot, pQuery->pos); - - if (ret) { - TSKEY key = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - - // key in query range. If not, no qualified in disk file - if (key < pQuery->ekey) { - setQueryStatus(pQuery, QUERY_COMPLETED); - } - - return key; - } else { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return -1; // no data to check - } - } else { // asc query - bool ret = getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_GREATER_EQUAL, searchFn); - if (ret) { - dTrace("QInfo:%p vid:%d sid:%d id:%s find the possible position, fileId:%d, slot:%d, pos:%d", pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->fileId, pQuery->slot, pQuery->pos); - - TSKEY key = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - - // key in query range. If not, no qualified in disk file - if (key > pQuery->ekey) { - setQueryStatus(pQuery, QUERY_COMPLETED); - } - - return key; - } else { - /* - * all data in file is less than the pQuery->lastKey, try cache again. - * cache block status will be set in getFirstDataBlockInCache function - */ - TSKEY key = getFirstDataBlockInCache(pRuntimeEnv); - - dTrace("QInfo:%p vid:%d sid:%d id:%s find the new position in cache, fileId:%d, slot:%d, pos:%d", pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->fileId, pQuery->slot, pQuery->pos); - return key; - } - } -} - -static int32_t moveToNextBlockInCache(SQueryRuntimeEnv *pRuntimeEnv, int32_t step, __block_search_fn_t searchFn) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - assert(pQuery->fileId < 0); - - /* - * ascending order to last cache block all data block in cache have been iterated, no need to set - * pRuntimeEnv->nextPos. done - */ - if (step == QUERY_ASC_FORWARD_STEP && pQuery->slot == pQuery->currentSlot) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return DISK_DATA_LOADED; - } - - /* - * descending order to first cache block, try file - * NOTE: use the real time cache information, not the snapshot - */ - int32_t numOfBlocks = pCacheInfo->numOfBlocks; - int32_t currentSlot = pCacheInfo->currentSlot; - - int32_t firstSlot = getFirstCacheSlot(numOfBlocks, currentSlot, pCacheInfo); - - if (step == QUERY_DESC_FORWARD_STEP && pQuery->slot == firstSlot) { - bool ret = getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_LESS_EQUAL, searchFn); - if (ret) { - TSKEY key = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); - - // key in query range. If not, no qualified in disk file - if (key < pQuery->ekey) { - setQueryStatus(pQuery, QUERY_COMPLETED); - } - - // the skip operation does NOT set the startPos yet - // assert(pRuntimeEnv->startPos.fileId < 0); - } else { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - } - return DISK_DATA_LOADED; - } - - /* now still iterate the cache data blocks */ - pQuery->slot = (pQuery->slot + step + pCacheInfo->maxBlocks) % pCacheInfo->maxBlocks; - SCacheBlock *pBlock = getCacheDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot); - - /* - * data in this cache block has been flushed to disk, then we should locate the start position in file. - * In both desc/asc query, this situation may occur. And we need to locate the start query position in file or cache. - */ - if (pBlock == NULL) { - getQueryPositionForCacheInvalid(pRuntimeEnv, searchFn); - - return DISK_DATA_LOADED; - } else { - pQuery->pos = (QUERY_IS_ASC_QUERY(pQuery)) ? 0 : pBlock->numOfPoints - 1; - - TSKEY startkey = getTimestampInCacheBlock(pRuntimeEnv, pBlock, pQuery->pos); - if (startkey < 0) { - setQueryStatus(pQuery, QUERY_COMPLETED); - } - - SET_CACHE_BLOCK_FLAG(pRuntimeEnv->blockStatus); - - dTrace("QInfo:%p check cache block, blockId:%d slot:%d pos:%d, blockstatus:%d", GET_QINFO_ADDR(pQuery), - pQuery->blockId, pQuery->slot, pQuery->pos, pRuntimeEnv->blockStatus); - } - - return DISK_DATA_LOADED; -} - -/** - * move the cursor to next block and not load - */ -static int32_t moveToNextBlock(SQueryRuntimeEnv *pRuntimeEnv, int32_t step, __block_search_fn_t searchFn, - bool loadData) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - SET_DATA_BLOCK_NOT_LOADED(pRuntimeEnv->blockStatus); - - if (pQuery->fileId >= 0) { - int32_t fileIndex = -1; - - /* - * 1. ascending order. The last data block of data file - * 2. descending order. The first block of file - */ - if ((step == QUERY_ASC_FORWARD_STEP && (pQuery->slot == pQuery->numOfBlocks - 1)) || - (step == QUERY_DESC_FORWARD_STEP && (pQuery->slot == 0))) { - fileIndex = getNextDataFileCompInfo(pRuntimeEnv, pMeterObj, step); - /* data maybe in cache */ - - if (fileIndex >= 0) { // next file - pQuery->slot = (step == QUERY_ASC_FORWARD_STEP) ? 0 : pQuery->numOfBlocks - 1; - pQuery->pos = (step == QUERY_ASC_FORWARD_STEP) ? 0 : pQuery->pBlock[pQuery->slot].numOfPoints - 1; - } else { // try data in cache - assert(pQuery->fileId == -1); - - if (step == QUERY_ASC_FORWARD_STEP) { - getFirstDataBlockInCache(pRuntimeEnv); - } else { // no data to check for desc order query - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - } - - return DISK_DATA_LOADED; - } - } else { // next block in the same file - int32_t fid = pQuery->fileId; - fileIndex = vnodeGetVnodeHeaderFileIndex(&fid, pRuntimeEnv, pQuery->order.order); - - pQuery->slot += step; - pQuery->pos = (step == QUERY_ASC_FORWARD_STEP) ? 0 : pQuery->pBlock[pQuery->slot].numOfPoints - 1; - } - - assert(pQuery->pBlock != NULL); - - /* no need to load data, return directly */ - if (!loadData) { - return DISK_DATA_LOADED; - } - - // load data block function will change the value of pQuery->pos - int32_t ret = - LoadDatablockOnDemand(&pQuery->pBlock[pQuery->slot], &pQuery->pFields[pQuery->slot], &pRuntimeEnv->blockStatus, - pRuntimeEnv, fileIndex, pQuery->slot, searchFn, true); - if (ret != DISK_DATA_LOADED) { - return ret; - } - } else { // data in cache - return moveToNextBlockInCache(pRuntimeEnv, step, searchFn); - } - - return DISK_DATA_LOADED; -} - -static int32_t doHandleDataBlockImpl(SQueryRuntimeEnv *pRuntimeEnv, SBlockInfo *pBlockInfo, - __block_search_fn_t searchFn, int32_t blockLoadStatus, int32_t *forwardStep) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SQueryCostSummary *pSummary = &pRuntimeEnv->summary; - int32_t numOfRes = 0; - - if (IS_DISK_DATA_BLOCK(pQuery) && blockLoadStatus != DISK_DATA_LOADED) { - *forwardStep = pBlockInfo->size; - return numOfRes; - } - - SField *pFields = NULL; - if (IS_DISK_DATA_BLOCK(pQuery)) { - pFields = pQuery->pFields[pQuery->slot]; - } else { // in case of cache data block, no need to load operation - assert(vnodeIsDatablockLoaded(pRuntimeEnv, pRuntimeEnv->pMeterObj, -1, true) == DISK_BLOCK_NO_NEED_TO_LOAD); - pFields = NULL; - } - - int64_t start = taosGetTimestampUs(); - - *forwardStep = - tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, pFields, searchFn, &numOfRes, &pRuntimeEnv->windowResInfo); - - int64_t elapsedTime = taosGetTimestampUs() - start; - - if (IS_DISK_DATA_BLOCK(pQuery)) { - pSummary->fileTimeUs += elapsedTime; - } else { - pSummary->cacheTimeUs += elapsedTime; - } - - return numOfRes; -} - -// previous time window may not be of the same size of pQuery->intervalTime -static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) { - int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - - pTimeWindow->skey += (pQuery->slidingTime * factor); - pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1); -} - -static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - bool LOAD_DATA = true; - int64_t cnt = 0; - - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; - int32_t blockLoadStatus = DISK_DATA_LOADED; - - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - - // initial data block always be loaded - SPositionInfo *pStartPos = &pRuntimeEnv->startPos; - assert(pQuery->slot == pStartPos->slot); - - dTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 - ", order:%d, start fileId:%d, slot:%d, pos:%d, bstatus:%d", - GET_QINFO_ADDR(pQuery), pQuery->skey, pQuery->ekey, pQuery->lastKey, pQuery->order.order, pStartPos->fileId, - pStartPos->slot, pStartPos->pos, pRuntimeEnv->blockStatus); - - while (1) { - // check if query is killed or not set the status of query to pass the status check - if (isQueryKilled(pQInfo)) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return cnt; - } - - int32_t forwardStep = 0; - SBlockInfo blockInfo = getBlockInfo(pRuntimeEnv); - /*int32_t numOfRes = */ doHandleDataBlockImpl(pRuntimeEnv, &blockInfo, searchFn, blockLoadStatus, &forwardStep); - - dTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 - ", fileId:%d, slot:%d, pos:%d, bstatus:%d, rows:%d, checked:%d", - GET_QINFO_ADDR(pQuery), blockInfo.keyFirst, blockInfo.keyLast, pQuery->fileId, pQuery->slot, pQuery->pos, - pRuntimeEnv->blockStatus, blockInfo.size, forwardStep); - - // save last access position - int32_t accessPos = pQuery->pos + (forwardStep - 1) * step; - savePointPosition(&pRuntimeEnv->endPos, pQuery->fileId, pQuery->slot, accessPos); - - cnt += forwardStep; - - if (queryPausedInCurrentBlock(pQuery, &blockInfo, forwardStep)) { - int32_t nextPos = accessPos + step; - - /* - * set the next access position, nextPos only required when the interval query and projection query - * that cause output buffer overflow. When the query is completed, no need to load the next block any more. - */ - if (!Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED) && Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) { - if (nextPos >= blockInfo.size || nextPos < 0) { - moveToNextBlock(pRuntimeEnv, step, searchFn, !LOAD_DATA); - - // slot/pos/fileId is updated in moveToNextBlock function - savePointPosition(&pRuntimeEnv->nextPos, pQuery->fileId, pQuery->slot, pQuery->pos); - } else { - savePointPosition(&pRuntimeEnv->nextPos, pQuery->fileId, pQuery->slot, nextPos); - } - } - - break; - } else { // query not completed, move to next block - blockLoadStatus = moveToNextBlock(pRuntimeEnv, step, searchFn, LOAD_DATA); - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { - setQueryStatus(pQuery, QUERY_COMPLETED); - break; - } - } - - // check next block - blockInfo = getBlockInfo(pRuntimeEnv); - - if ((QUERY_IS_ASC_QUERY(pQuery) && blockInfo.keyFirst > pQuery->ekey) || - (!QUERY_IS_ASC_QUERY(pQuery) && blockInfo.keyLast < pQuery->ekey)) { - setQueryStatus(pQuery, QUERY_COMPLETED); - break; - } - - if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) { - break; - } - - } // while(1) - - if (isIntervalQuery(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) { - if (Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) { - closeAllTimeWindow(&pRuntimeEnv->windowResInfo); - pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1; - } else if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) { // check if window needs to be closed - SBlockInfo blockInfo = getBlockInfo(pRuntimeEnv); - - // check if need to close window result or not - TSKEY t = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.keyFirst : blockInfo.keyLast; - doCheckQueryCompleted(pRuntimeEnv, t, &pRuntimeEnv->windowResInfo); - } - } - - return cnt; -} - -static void updatelastkey(SQuery *pQuery, SMeterQueryInfo *pMeterQInfo) { pMeterQInfo->lastKey = pQuery->lastKey; } - -/* - * set tag value in SQLFunctionCtx - * e.g.,tag information into input buffer - */ -static void doSetTagValueInParam(SColumnModel *pTagSchema, int32_t tagColIdx, SMeterSidExtInfo *pMeterSidInfo, - tVariant *param) { - assert(tagColIdx >= 0); - - int16_t offset = getColumnModelOffset(pTagSchema, tagColIdx); - - void * pStr = (char *)pMeterSidInfo->tags + offset; - SSchema *pCol = getColumnModelSchema(pTagSchema, tagColIdx); - - tVariantDestroy(param); - - if (isNull(pStr, pCol->type)) { - param->nType = TSDB_DATA_TYPE_NULL; - } else { - tVariantCreateFromBinary(param, pStr, pCol->bytes, pCol->type); - } -} - -void vnodeSetTagValueInParam(tSidSet *pSidSet, SQueryRuntimeEnv *pRuntimeEnv, SMeterSidExtInfo *pMeterSidInfo) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SColumnModel *pTagSchema = pSidSet->pColumnModel; - - SSqlFuncExprMsg *pFuncMsg = &pQuery->pSelectExpr[0].pBase; - if (pQuery->numOfOutputCols == 1 && pFuncMsg->functionId == TSDB_FUNC_TS_COMP) { - assert(pFuncMsg->numOfParams == 1); - doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag); - } else { - // set tag value, by which the results are aggregated. - for (int32_t idx = 0; idx < pQuery->numOfOutputCols; ++idx) { - SColIndexEx *pColEx = &pQuery->pSelectExpr[idx].pBase.colInfo; - - // ts_comp column required the tag value for join filter - if (!TSDB_COL_IS_TAG(pColEx->flag)) { - continue; - } - - doSetTagValueInParam(pTagSchema, pColEx->colIdx, pMeterSidInfo, &pRuntimeEnv->pCtx[idx].tag); - } - - // set the join tag for first column - SSqlFuncExprMsg *pFuncMsg = &pQuery->pSelectExpr[0].pBase; - if (pFuncMsg->functionId == TSDB_FUNC_TS && pFuncMsg->colInfo.colIdx == PRIMARYKEY_TIMESTAMP_COL_INDEX && - pRuntimeEnv->pTSBuf != NULL) { - assert(pFuncMsg->numOfParams == 1); - doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag); - } - } -} - -static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - if (!mergeFlag) { - pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes; - pCtx[i].currentStage = FIRST_STAGE_MERGE; - - resetResultInfo(pCtx[i].resultInfo); - aAggs[functionId].init(&pCtx[i]); - } - - pCtx[i].hasNull = true; - pCtx[i].nStartQueryTimestamp = timestamp; - pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes); - // pCtx[i].aInputElemBuf = ((char *)inputSrc->data) + - // ((int32_t)pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage) + - // pCtx[i].outputBytes * inputIdx; - - // in case of tag column, the tag information should be extracted from input buffer - if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) { - tVariantDestroy(&pCtx[i].tag); - tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType); - } - } - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - if (functionId == TSDB_FUNC_TAG_DUMMY) { - continue; - } - - aAggs[functionId].distMergeFunc(&pCtx[i]); - } -} - -static void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) { - if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) { - switch (srcDataType) { - case TSDB_DATA_TYPE_BINARY: - printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1)); - break; - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_BOOL: - printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1)); - break; - case TSDB_DATA_TYPE_SMALLINT: - printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1)); - break; - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_TIMESTAMP: - printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1)); - break; - case TSDB_DATA_TYPE_INT: - printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1)); - break; - case TSDB_DATA_TYPE_FLOAT: - printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1)); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1)); - break; - } - } else if (functionId == TSDB_FUNC_AVG) { - printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double))); - } else if (functionId == TSDB_FUNC_SPREAD) { - printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double))); - } else if (functionId == TSDB_FUNC_TWA) { - data += 1; - printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8), - *(int64_t *)(data + 16), *(int64_t *)(data + 24)); - } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) { - switch (srcDataType) { - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_BOOL: - printf("%d\t", *(int8_t *)data); - break; - case TSDB_DATA_TYPE_SMALLINT: - printf("%d\t", *(int16_t *)data); - break; - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_TIMESTAMP: - printf("%" PRId64 "\t", *(int64_t *)data); - break; - case TSDB_DATA_TYPE_INT: - printf("%d\t", *(int *)data); - break; - case TSDB_DATA_TYPE_FLOAT: - printf("%f\t", *(float *)data); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf("%f\t", *(float *)data); - break; - } - } else if (functionId == TSDB_FUNC_SUM) { - if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) { - printf("%lf\t", *(float *)data); - } else { - printf("%" PRId64 "\t", *(int64_t *)data); - } - } else { - printf("%s\t", data); - } -} - -void UNUSED_FUNC displayInterResult(SData **pdata, SQuery *pQuery, int32_t numOfRows) { - int32_t numOfCols = pQuery->numOfOutputCols; - printf("super table query intermediate result, total:%d\n", numOfRows); - - SQInfo * pQInfo = (SQInfo *)(GET_QINFO_ADDR(pQuery)); - SMeterObj *pMeterObj = pQInfo->pObj; - - for (int32_t j = 0; j < numOfRows; ++j) { - for (int32_t i = 0; i < numOfCols; ++i) { - switch (pQuery->pSelectExpr[i].resType) { - case TSDB_DATA_TYPE_BINARY: { - int32_t colIdx = pQuery->pSelectExpr[i].pBase.colInfo.colIdx; - int32_t type = 0; - - if (TSDB_COL_IS_TAG(pQuery->pSelectExpr[i].pBase.colInfo.flag)) { - type = pQuery->pSelectExpr[i].resType; - } else { - type = pMeterObj->schema[colIdx].type; - } - printBinaryData(pQuery->pSelectExpr[i].pBase.functionId, pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j, - type); - break; - } - case TSDB_DATA_TYPE_TIMESTAMP: - case TSDB_DATA_TYPE_BIGINT: - printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j)); - break; - case TSDB_DATA_TYPE_INT: - printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j)); - break; - case TSDB_DATA_TYPE_FLOAT: - printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j)); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j)); - break; - } - } - printf("\n"); - } -} - -// static tFilePage *getMeterDataPage(SDiskbasedResultBuf *pResultBuf, SMeterQueryInfo *pMeterQueryInfo, -// int32_t index) { -// SIDList pList = getDataBufPagesIdList(pResultBuf, pMeterQueryInfo->sid); -// return getResultBufferPageById(pResultBuf, pList.pData[index]); -//} - -// typedef struct Position { -// int32_t pageIdx; -// int32_t rowIdx; -//} Position; - -typedef struct SCompSupporter { - SMeterDataInfo ** pMeterDataInfo; - int32_t * position; - STableQuerySupportObj *pSupporter; -} SCompSupporter; - -int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) { - int32_t left = *(int32_t *)pLeft; - int32_t right = *(int32_t *)pRight; - - SCompSupporter * supporter = (SCompSupporter *)param; - SQueryRuntimeEnv *pRuntimeEnv = &supporter->pSupporter->runtimeEnv; - - int32_t leftPos = supporter->position[left]; - int32_t rightPos = supporter->position[right]; - - /* left source is exhausted */ - if (leftPos == -1) { - return 1; - } - - /* right source is exhausted*/ - if (rightPos == -1) { - return -1; - } - - SWindowResInfo *pWindowResInfo1 = &supporter->pMeterDataInfo[left]->pMeterQInfo->windowResInfo; - SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos); - - char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1); - TSKEY leftTimestamp = GET_INT64_VAL(b1); - - SWindowResInfo *pWindowResInfo2 = &supporter->pMeterDataInfo[right]->pMeterQInfo->windowResInfo; - SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos); - - char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2); - TSKEY rightTimestamp = GET_INT64_VAL(b2); - - if (leftTimestamp == rightTimestamp) { - return 0; - } - - return leftTimestamp > rightTimestamp ? 1 : -1; -} - -int32_t mergeMetersResultToOneGroups(STableQuerySupportObj *pSupporter) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - - int64_t st = taosGetTimestampMs(); - int32_t ret = TSDB_CODE_SUCCESS; - - while (pSupporter->subgroupIdx < pSupporter->pSidSet->numOfSubSet) { - int32_t start = pSupporter->pSidSet->starterPos[pSupporter->subgroupIdx]; - int32_t end = pSupporter->pSidSet->starterPos[pSupporter->subgroupIdx + 1]; - - ret = doMergeMetersResultsToGroupRes(pSupporter, pQuery, pRuntimeEnv, pSupporter->pMeterDataInfo, start, end); - if (ret < 0) { // not enough disk space to save the data into disk - return -1; - } - - pSupporter->subgroupIdx += 1; - - // this group generates at least one result, return results - if (ret > 0) { - break; - } - - assert(pSupporter->numOfGroupResultPages == 0); - dTrace("QInfo:%p no result in group %d, continue", GET_QINFO_ADDR(pQuery), pSupporter->subgroupIdx - 1); - } - - dTrace("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%lldms", GET_QINFO_ADDR(pQuery), - pSupporter->subgroupIdx - 1, pSupporter->pSidSet->numOfSubSet, taosGetTimestampMs() - st); - - return TSDB_CODE_SUCCESS; -} - -void copyResToQueryResultBuf(STableQuerySupportObj *pSupporter, SQuery *pQuery) { - if (pSupporter->offset == pSupporter->numOfGroupResultPages) { - pSupporter->numOfGroupResultPages = 0; - - // current results of group has been sent to client, try next group - if (mergeMetersResultToOneGroups(pSupporter) != TSDB_CODE_SUCCESS) { - return; // failed to save data in the disk - } - - // set current query completed - if (pSupporter->numOfGroupResultPages == 0 && pSupporter->subgroupIdx == pSupporter->pSidSet->numOfSubSet) { - pSupporter->meterIdx = pSupporter->pSidSet->numOfSids; - return; - } - } - - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; - - int32_t id = getGroupResultId(pSupporter->subgroupIdx - 1); - SIDList list = getDataBufPagesIdList(pResultBuf, pSupporter->offset + id); - - int32_t total = 0; - for (int32_t i = 0; i < list.size; ++i) { - tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[i]); - total += pData->numOfElems; - } - - pQuery->sdata[0]->len = total; - - int32_t offset = 0; - for (int32_t num = 0; num < list.size; ++num) { - tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[num]); - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; - char * pDest = pQuery->sdata[i]->data; - - memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->numOfElems, - bytes * pData->numOfElems); - } - - offset += pData->numOfElems; - } - - assert(pQuery->pointsRead == 0); - - pQuery->pointsRead += pQuery->sdata[0]->len; - pSupporter->offset += 1; -} - -int64_t getNumOfResultWindowRes(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindowRes) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - int64_t maxOutput = 0; - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; - - /* - * ts, tag, tagprj function can not decide the output number of current query - * the number of output result is decided by main output - */ - if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) { - continue; - } - - SResultInfo *pResultInfo = &pWindowRes->resultInfo[j]; - if (pResultInfo != NULL && maxOutput < pResultInfo->numOfRes) { - maxOutput = pResultInfo->numOfRes; - } - } - - return maxOutput; -} - -int32_t doMergeMetersResultsToGroupRes(STableQuerySupportObj *pSupporter, SQuery *pQuery, SQueryRuntimeEnv *pRuntimeEnv, - SMeterDataInfo *pMeterDataInfo, int32_t start, int32_t end) { - tFilePage ** buffer = (tFilePage **)pQuery->sdata; - int32_t * posList = calloc((end - start), sizeof(int32_t)); - SMeterDataInfo **pTableList = malloc(POINTER_BYTES * (end - start)); - - // todo opt for the case of one table per group - int32_t numOfMeters = 0; - for (int32_t i = start; i < end; ++i) { - int32_t sid = pMeterDataInfo[i].pMeterQInfo->sid; - - SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, sid); - if (list.size > 0 && pMeterDataInfo[i].pMeterQInfo->windowResInfo.size > 0) { - pTableList[numOfMeters] = &pMeterDataInfo[i]; - numOfMeters += 1; - } - } - - if (numOfMeters == 0) { - tfree(posList); - tfree(pTableList); - - assert(pSupporter->numOfGroupResultPages == 0); - return 0; - } - - SCompSupporter cs = {pTableList, posList, pSupporter}; - - SLoserTreeInfo *pTree = NULL; - tLoserTreeCreate(&pTree, numOfMeters, &cs, tableResultComparFn); - - SResultInfo *pResultInfo = calloc(pQuery->numOfOutputCols, sizeof(SResultInfo)); - setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery); - - resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo); - int64_t lastTimestamp = -1; - - int64_t startt = taosGetTimestampMs(); - - while (1) { - int32_t pos = pTree->pNode[0].index; - - SWindowResInfo *pWindowResInfo = &pTableList[pos]->pMeterQInfo->windowResInfo; - SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]); - - char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes); - TSKEY ts = GET_INT64_VAL(b); - - assert(ts == pWindowRes->window.skey); - int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes); - if (num <= 0) { - cs.position[pos] += 1; - - if (cs.position[pos] >= pWindowResInfo->size) { - cs.position[pos] = -1; - - // all input sources are exhausted - if (--numOfMeters == 0) { - break; - } - } - } else { - if (ts == lastTimestamp) { // merge with the last one - doMerge(pRuntimeEnv, ts, pWindowRes, true); - } else { // copy data to disk buffer - if (buffer[0]->numOfElems == pQuery->pointsToRead) { - if (flushFromResultBuf(pSupporter, pQuery, pRuntimeEnv) != TSDB_CODE_SUCCESS) { - return -1; - } - - resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo); - } - - doMerge(pRuntimeEnv, ts, pWindowRes, false); - buffer[0]->numOfElems += 1; - } - - lastTimestamp = ts; - - cs.position[pos] += 1; - if (cs.position[pos] >= pWindowResInfo->size) { - cs.position[pos] = -1; - - // all input sources are exhausted - if (--numOfMeters == 0) { - break; - } - } - } - - tLoserTreeAdjust(pTree, pos + pTree->numOfEntries); - } - - if (buffer[0]->numOfElems != 0) { // there are data in buffer - if (flushFromResultBuf(pSupporter, pQuery, pRuntimeEnv) != TSDB_CODE_SUCCESS) { - // dError("QInfo:%p failed to flush data into temp file, abort query", GET_QINFO_ADDR(pQuery), - // pSupporter->extBufFile); - tfree(pTree); - tfree(pTableList); - tfree(posList); - tfree(pResultInfo); - - return -1; - } - } - - int64_t endt = taosGetTimestampMs(); - -#ifdef _DEBUG_VIEW - displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->len); -#endif - - dTrace("QInfo:%p result merge completed, elapsed time:%" PRId64 " ms", GET_QINFO_ADDR(pQuery), endt - startt); - tfree(pTree); - tfree(pTableList); - tfree(posList); - tfree(pResultInfo); - - pSupporter->offset = 0; - - return pSupporter->numOfGroupResultPages; -} - -int32_t flushFromResultBuf(STableQuerySupportObj *pSupporter, const SQuery *pQuery, - const SQueryRuntimeEnv *pRuntimeEnv) { - SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; - int32_t capacity = (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / pQuery->rowSize; - - // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000. - int32_t pageId = -1; - - int32_t remain = pQuery->sdata[0]->len; - int32_t offset = 0; - - while (remain > 0) { - int32_t r = remain; - if (r > capacity) { - r = capacity; - } - - int32_t id = getGroupResultId(pSupporter->subgroupIdx) + pSupporter->numOfGroupResultPages; - tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId); - - // pagewise copy to dest buffer - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; - buf->numOfElems = r; - - memcpy(buf->data + pRuntimeEnv->offset[i] * buf->numOfElems, ((char *)pQuery->sdata[i]->data) + offset * bytes, - buf->numOfElems * bytes); - } - - offset += r; - remain -= r; - } - - pSupporter->numOfGroupResultPages += 1; - return TSDB_CODE_SUCCESS; -} - -void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) { - for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { - pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes; - pCtx[k].size = 1; - pCtx[k].startOffset = 0; - pCtx[k].resultInfo = &pResultInfo[k]; - - pQuery->sdata[k]->len = 0; - } -} - -void setMeterDataInfo(SMeterDataInfo *pMeterDataInfo, SMeterObj *pMeterObj, int32_t meterIdx, int32_t groupId) { - pMeterDataInfo->pMeterObj = pMeterObj; - pMeterDataInfo->groupIdx = groupId; - pMeterDataInfo->meterOrderIdx = meterIdx; -} - -static void doDisableFunctsForSupplementaryScan(SQuery *pQuery, SWindowResInfo *pWindowResInfo, int32_t order) { - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i); - if (!pStatus->closed) { - continue; - } - - SWindowResult *buf = getWindowResult(pWindowResInfo, i); - - // open/close the specified query for each group result - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int32_t functId = pQuery->pSelectExpr[j].pBase.functionId; - - if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSQL_SO_DESC) || - ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSQL_SO_ASC)) { - buf->resultInfo[j].complete = false; - } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) { - buf->resultInfo[j].complete = true; - } - } - } -} - -void disableFunctForTableSuppleScan(SQueryRuntimeEnv *pRuntimeEnv, int32_t order) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - // group by normal columns and interval query on normal table - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - pRuntimeEnv->pCtx[i].order = (pRuntimeEnv->pCtx[i].order) ^ 1u; - } - - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) { - doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order); - } else { // for simple result of table query, - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int32_t functId = pQuery->pSelectExpr[j].pBase.functionId; - SQLFunctionCtx* pCtx = &pRuntimeEnv->pCtx[j]; - - if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSQL_SO_DESC) || - ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSQL_SO_ASC)) { - pCtx->resultInfo->complete = false; - } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) { - pCtx->resultInfo->complete = true; - } - } - } - - pQuery->order.order = pQuery->order.order ^ 1u; -} - -void disableFunctForSuppleScan(STableQuerySupportObj *pSupporter, int32_t order) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - pRuntimeEnv->pCtx[i].order = (pRuntimeEnv->pCtx[i].order) ^ 1u; - } - - if (isIntervalQuery(pQuery)) { - for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) { - SMeterQueryInfo *pMeterQueryInfo = pSupporter->pMeterDataInfo[i].pMeterQInfo; - SWindowResInfo * pWindowResInfo = &pMeterQueryInfo->windowResInfo; - - doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order); - } - } else { - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order); - } - - pQuery->order.order = (pQuery->order.order) ^ 1u; -} - -void enableFunctForMasterScan(SQueryRuntimeEnv *pRuntimeEnv, int32_t order) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - pRuntimeEnv->pCtx[i].order = (pRuntimeEnv->pCtx[i].order) ^ 1u; - } - - pQuery->order.order = (pQuery->order.order) ^ 1u; -} - -void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) { - int32_t numOfCols = pQuery->numOfOutputCols; - - pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo)); - pResultRow->pos = *posInfo; - - // set the intermediate result output buffer - setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery); -} - -void clearTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindowRes) { - if (pWindowRes == NULL) { - return; - } - - for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutputCols; ++i) { - SResultInfo *pResultInfo = &pWindowRes->resultInfo[i]; - - char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes); - size_t size = pRuntimeEnv->pQuery->pSelectExpr[i].resBytes; - memset(s, 0, size); - - resetResultInfo(pResultInfo); - } - - pWindowRes->numOfRows = 0; - // pWindowRes->nAlloc = 0; - pWindowRes->pos = (SPosInfo){-1, -1}; - pWindowRes->status.closed = false; - pWindowRes->window = (STimeWindow){0, 0}; -} - -/** - * The source window result pos attribution of the source window result does not assign to the destination, - * since the attribute of "Pos" is bound to each window result when the window result is created in the - * disk-based result buffer. - */ -void copyTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *dst, const SWindowResult *src) { - dst->numOfRows = src->numOfRows; - // dst->nAlloc = src->nAlloc; - dst->window = src->window; - dst->status = src->status; - - int32_t nOutputCols = pRuntimeEnv->pQuery->numOfOutputCols; - - for (int32_t i = 0; i < nOutputCols; ++i) { - SResultInfo *pDst = &dst->resultInfo[i]; - SResultInfo *pSrc = &src->resultInfo[i]; - - char *buf = pDst->interResultBuf; - memcpy(pDst, pSrc, sizeof(SResultInfo)); - pDst->interResultBuf = buf; // restore the allocated buffer - - // copy the result info struct - memcpy(pDst->interResultBuf, pSrc->interResultBuf, pDst->bufLen); - - // copy the output buffer data from src to dst, the position info keep unchanged - char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst); - char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src); - size_t s = pRuntimeEnv->pQuery->pSelectExpr[i].resBytes; - - memcpy(dstBuf, srcBuf, s); - } -} - -void destroyTimeWindowRes(SWindowResult *pWindowRes, int32_t nOutputCols) { - if (pWindowRes == NULL) { - return; - } - - for (int32_t i = 0; i < nOutputCols; ++i) { - free(pWindowRes->resultInfo[i].interResultBuf); - } - - free(pWindowRes->resultInfo); -} - -void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - int32_t rows = pRuntimeEnv->pMeterObj->pointsPerFileBlock; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - pCtx->aOutputBuf = pQuery->sdata[i]->data; - - /* - * set the output buffer information and intermediate buffer - * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc. - */ - resetResultInfo(&pRuntimeEnv->resultInfo[i]); - pCtx->resultInfo = &pRuntimeEnv->resultInfo[i]; - - // set the timestamp output buffer for top/bottom/diff query - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { - pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf; - } - - memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].resBytes * rows); - } - - initCtxOutputBuf(pRuntimeEnv); -} - -void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - // reset the execution contexts - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; - assert(functionId != TSDB_FUNC_DIFF); - - // set next output position - if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) { - pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output; - } - - if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) { - /* - * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned - * in the procedure of top/bottom routine - * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer - * - * diff function is handled in multi-output function - */ - pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output; - } - - resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo); - } -} - -void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; - pRuntimeEnv->pCtx[j].currentStage = 0; - - aAggs[functionId].init(&pRuntimeEnv->pCtx[j]); - } -} - -void doSkipResults(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - if (pQuery->pointsRead == 0 || pQuery->limit.offset == 0) { - return; - } - - if (pQuery->pointsRead <= pQuery->limit.offset) { - pQuery->limit.offset -= pQuery->pointsRead; - - pQuery->pointsRead = 0; - pQuery->pointsOffset = pQuery->pointsToRead; // clear all data in result buffer - - resetCtxOutputBuf(pRuntimeEnv); - - // clear the buffer is full flag if exists - pQuery->over &= (~QUERY_RESBUF_FULL); - } else { - int32_t numOfSkip = (int32_t)pQuery->limit.offset; - pQuery->pointsRead -= numOfSkip; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; - - memmove(pQuery->sdata[i]->data, pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->pointsRead * bytes); - pRuntimeEnv->pCtx[i].aOutputBuf += bytes * numOfSkip; - - if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) { - pRuntimeEnv->pCtx[i].ptsOutputBuf += TSDB_KEYSIZE * numOfSkip; - } - } - - pQuery->limit.offset = 0; - } -} - -typedef struct SQueryStatus { - SPositionInfo start; - SPositionInfo next; - SPositionInfo end; - int8_t overStatus; - TSKEY lastKey; - STSCursor cur; -} SQueryStatus; -// todo refactor -static void queryStatusSave(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatus *pStatus) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - pStatus->overStatus = pQuery->over; - pStatus->lastKey = pQuery->lastKey; - - pStatus->start = pRuntimeEnv->startPos; - pStatus->next = pRuntimeEnv->nextPos; - pStatus->end = pRuntimeEnv->endPos; - - pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf); // save the cursor - - if (pRuntimeEnv->pTSBuf) { - pRuntimeEnv->pTSBuf->cur.order ^= 1u; - tsBufNextPos(pRuntimeEnv->pTSBuf); - } - - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); - - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - pQuery->lastKey = pQuery->skey; - pRuntimeEnv->startPos = pRuntimeEnv->endPos; -} - -static void queryStatusRestore(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatus *pStatus) { - SQuery *pQuery = pRuntimeEnv->pQuery; - SWAP(pQuery->skey, pQuery->ekey, TSKEY); - - pQuery->lastKey = pStatus->lastKey; - pQuery->over = pStatus->overStatus; - - pRuntimeEnv->startPos = pStatus->start; - pRuntimeEnv->nextPos = pStatus->next; - pRuntimeEnv->endPos = pStatus->end; - - tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur); -} - -static void doSingleMeterSupplementScan(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SQueryStatus qStatus = {0}; - - if (!needSupplementaryScan(pQuery)) { - return; - } - - dTrace("QInfo:%p start to supp scan", GET_QINFO_ADDR(pQuery)); - - SET_SUPPLEMENT_SCAN_FLAG(pRuntimeEnv); - - // usually this load operation will incur load disk block operation - TSKEY endKey = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->endPos); - - assert((QUERY_IS_ASC_QUERY(pQuery) && endKey <= pQuery->ekey) || - (!QUERY_IS_ASC_QUERY(pQuery) && endKey >= pQuery->ekey)); - - // close necessary function execution during supplementary scan - disableFunctForTableSuppleScan(pRuntimeEnv, pQuery->order.order); - queryStatusSave(pRuntimeEnv, &qStatus); - - doScanAllDataBlocks(pRuntimeEnv); - - // set the correct start position, and load the corresponding block in buffer if required. - TSKEY actKey = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->startPos); - assert((QUERY_IS_ASC_QUERY(pQuery) && actKey >= pQuery->skey) || - (!QUERY_IS_ASC_QUERY(pQuery) && actKey <= pQuery->skey)); - - queryStatusRestore(pRuntimeEnv, &qStatus); - enableFunctForMasterScan(pRuntimeEnv, pQuery->order.order); - SET_MASTER_SCAN_FLAG(pRuntimeEnv); -} - -void setQueryStatus(SQuery *pQuery, int8_t status) { - if (status == QUERY_NOT_COMPLETED) { - pQuery->over = status; - } else { - // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first - pQuery->over &= (~QUERY_NOT_COMPLETED); - pQuery->over |= status; - } -} - -bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - bool toContinue = false; - - if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) { - // for each group result, call the finalize function for each column - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - SWindowResult *pResult = getWindowResult(pWindowResInfo, i); - if (!pResult->status.closed) { - continue; - } - - setWindowResOutputBuf(pRuntimeEnv, pResult); - - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int16_t functId = pQuery->pSelectExpr[j].pBase.functionId; - if (functId == TSDB_FUNC_TS) { - continue; - } - - aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]); - SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]); - - toContinue |= (!pResInfo->complete); - } - } - } else { - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int16_t functId = pQuery->pSelectExpr[j].pBase.functionId; - if (functId == TSDB_FUNC_TS) { - continue; - } - - aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]); - SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]); - - toContinue |= (!pResInfo->complete); - } - } - - return toContinue; -} - -void vnodeScanAllData(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); - - /* store the start query position */ - savePointPosition(&pRuntimeEnv->startPos, pQuery->fileId, pQuery->slot, pQuery->pos); - int64_t oldSkey = pQuery->skey; - int64_t oldEkey = pQuery->ekey; - - int64_t skey = pQuery->lastKey; - int32_t status = pQuery->over; - - SET_MASTER_SCAN_FLAG(pRuntimeEnv); - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - - while (1) { - doScanAllDataBlocks(pRuntimeEnv); - - if (!needScanDataBlocksAgain(pRuntimeEnv)) { - // restore the status - if (pRuntimeEnv->scanFlag == REPEAT_SCAN) { - pQuery->over = status; - } - break; - } - - /* - * set the correct start position, and load the corresponding block in buffer for next - * round scan all data blocks. - */ - TSKEY key = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->startPos); - assert((QUERY_IS_ASC_QUERY(pQuery) && key >= pQuery->skey) || (!QUERY_IS_ASC_QUERY(pQuery) && key <= pQuery->skey)); - - status = pQuery->over; - pQuery->ekey = pQuery->lastKey - step; - pQuery->lastKey = pQuery->skey; - - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); - pRuntimeEnv->scanFlag = REPEAT_SCAN; - - /* check if query is killed or not */ - if (isQueryKilled(pQInfo)) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return; - } - } - - // no need to set the end key - int64_t curLastKey = pQuery->lastKey; - pQuery->skey = skey; - pQuery->ekey = pQuery->lastKey - step; - - doSingleMeterSupplementScan(pRuntimeEnv); - - // update the pQuery->skey/pQuery->ekey to limit the scan scope of sliding query during supplementary scan - pQuery->skey = oldSkey; - pQuery->ekey = oldEkey; - pQuery->lastKey = curLastKey; -} - -void doFinalizeResult(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) { - // for each group result, call the finalize function for each column - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { - closeAllTimeWindow(pWindowResInfo); - } - - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - SWindowResult *buf = &pWindowResInfo->pResult[i]; - if (!isWindowResClosed(pWindowResInfo, i)) { - continue; - } - - setWindowResOutputBuf(pRuntimeEnv, buf); - - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - aAggs[pQuery->pSelectExpr[j].pBase.functionId].xFinalize(&pRuntimeEnv->pCtx[j]); - } - - /* - * set the number of output results for group by normal columns, the number of output rows usually is 1 except - * the top and bottom query - */ - buf->numOfRows = getNumOfResult(pRuntimeEnv); - } - - } else { - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - aAggs[pQuery->pSelectExpr[j].pBase.functionId].xFinalize(&pRuntimeEnv->pCtx[j]); - } - } -} - -static bool hasMainOutput(SQuery *pQuery) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - - if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) { - return true; - } - } - - return false; -} - -int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - bool hasMainFunction = hasMainOutput(pQuery); - - int64_t maxOutput = 0; - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; - - /* - * ts, tag, tagprj function can not decide the output number of current query - * the number of output result is decided by main output - */ - if (hasMainFunction && - (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) { - continue; - } - - SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]); - if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) { - maxOutput = pResInfo->numOfRes; - } - } - - return maxOutput; -} - -static int32_t offsetComparator(const void *pLeft, const void *pRight) { - SMeterDataInfo **pLeft1 = (SMeterDataInfo **)pLeft; - SMeterDataInfo **pRight1 = (SMeterDataInfo **)pRight; - - if ((*pLeft1)->offsetInHeaderFile == (*pRight1)->offsetInHeaderFile) { - return 0; - } - - return ((*pLeft1)->offsetInHeaderFile > (*pRight1)->offsetInHeaderFile) ? 1 : -1; -} - -/** - * - * @param pQInfo - * @param fid - * @param pQueryFileInfo - * @param start - * @param end - * @param pMeterHeadDataInfo - * @return - */ -int32_t vnodeFilterQualifiedMeters(SQInfo *pQInfo, int32_t vid, tSidSet *pSidSet, SMeterDataInfo *pMeterDataInfo, - int32_t *numOfMeters, SMeterDataInfo ***pReqMeterDataInfo) { - SQuery *pQuery = &pQInfo->query; - - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SMeterSidExtInfo ** pMeterSidExtInfo = pSupporter->pMeterSidExtInfo; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - - SVnodeObj *pVnode = &vnodeList[vid]; - - char *buf = calloc(1, getCompHeaderSegSize(&pVnode->cfg)); - if (buf == NULL) { - *numOfMeters = 0; - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - SQueryFilesInfo *pVnodeFileInfo = &pRuntimeEnv->vnodeFileInfo; - - int32_t headerSize = getCompHeaderSegSize(&pVnode->cfg); - lseek(pVnodeFileInfo->headerFd, TSDB_FILE_HEADER_LEN, SEEK_SET); - read(pVnodeFileInfo->headerFd, buf, headerSize); - - // check the offset value integrity - if (validateHeaderOffsetSegment(pQInfo, pRuntimeEnv->vnodeFileInfo.headerFilePath, vid, buf - TSDB_FILE_HEADER_LEN, - headerSize) < 0) { - free(buf); - *numOfMeters = 0; - - return TSDB_CODE_FILE_CORRUPTED; - } - - int64_t oldestKey = getOldestKey(pVnode->numOfFiles, pVnode->fileId, &pVnode->cfg); - (*pReqMeterDataInfo) = malloc(POINTER_BYTES * pSidSet->numOfSids); - if (*pReqMeterDataInfo == NULL) { - free(buf); - *numOfMeters = 0; - - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - int32_t groupId = 0; - TSKEY skey, ekey; - - for (int32_t i = 0; i < pSidSet->numOfSids; ++i) { // load all meter meta info - SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[i]->sid); - if (pMeterObj == NULL) { - dError("QInfo:%p failed to find required sid:%d", pQInfo, pMeterSidExtInfo[i]->sid); - continue; - } - - if (i >= pSidSet->starterPos[groupId + 1]) { - groupId += 1; - } - - SMeterDataInfo *pOneMeterDataInfo = &pMeterDataInfo[i]; - if (pOneMeterDataInfo->pMeterObj == NULL) { - setMeterDataInfo(pOneMeterDataInfo, pMeterObj, i, groupId); - } - - /* restore possible exists new query range for this meter, which starts from cache */ - if (pOneMeterDataInfo->pMeterQInfo != NULL) { - skey = pOneMeterDataInfo->pMeterQInfo->lastKey; - } else { - skey = pSupporter->rawSKey; - } - - // query on disk data files, which actually starts from the lastkey - ekey = pSupporter->rawEKey; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - assert(skey >= pSupporter->rawSKey); - if (ekey < oldestKey || skey > pMeterObj->lastKeyOnFile) { - continue; - } - } else { - assert(skey <= pSupporter->rawSKey); - if (skey < oldestKey || ekey > pMeterObj->lastKeyOnFile) { - continue; - } - } - - int64_t headerOffset = sizeof(SCompHeader) * pMeterObj->sid; - SCompHeader *compHeader = (SCompHeader *)(buf + headerOffset); - if (compHeader->compInfoOffset == 0) { // current table is empty - continue; - } - - // corrupted file may cause the invalid compInfoOffset, check needs - int32_t compHeaderOffset = getCompHeaderStartPosition(&pVnode->cfg); - if (validateCompBlockOffset(pQInfo, pMeterObj, compHeader, &pRuntimeEnv->vnodeFileInfo, compHeaderOffset) != - TSDB_CODE_SUCCESS) { - free(buf); - *numOfMeters = 0; - - return TSDB_CODE_FILE_CORRUPTED; - } - - pOneMeterDataInfo->offsetInHeaderFile = (uint64_t)compHeader->compInfoOffset; - - if (pOneMeterDataInfo->pMeterQInfo == NULL) { - pOneMeterDataInfo->pMeterQInfo = - createMeterQueryInfo(pSupporter, pMeterObj->sid, pSupporter->rawSKey, pSupporter->rawEKey); - } - - (*pReqMeterDataInfo)[*numOfMeters] = pOneMeterDataInfo; - (*numOfMeters) += 1; - } - - assert(*numOfMeters <= pSidSet->numOfSids); - - /* enable sequentially access*/ - if (*numOfMeters > 1) { - qsort((*pReqMeterDataInfo), *numOfMeters, POINTER_BYTES, offsetComparator); - } - - free(buf); - - return TSDB_CODE_SUCCESS; -} - -SMeterQueryInfo *createMeterQueryInfo(STableQuerySupportObj *pSupporter, int32_t sid, TSKEY skey, TSKEY ekey) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - - SMeterQueryInfo *pMeterQueryInfo = calloc(1, sizeof(SMeterQueryInfo)); - - pMeterQueryInfo->skey = skey; - pMeterQueryInfo->ekey = ekey; - pMeterQueryInfo->lastKey = skey; - - pMeterQueryInfo->sid = sid; - pMeterQueryInfo->cur.vnodeIndex = -1; - - initWindowResInfo(&pMeterQueryInfo->windowResInfo, pRuntimeEnv, 100, 100, TSDB_DATA_TYPE_INT); - return pMeterQueryInfo; -} - -void destroyMeterQueryInfo(SMeterQueryInfo *pMeterQueryInfo, int32_t numOfCols) { - if (pMeterQueryInfo == NULL) { - return; - } - - // free(pMeterQueryInfo->pageList); - // for (int32_t i = 0; i < numOfCols; ++i) { - // tfree(pMeterQueryInfo->[i].interResultBuf); - // } - - // free(pMeterQueryInfo->resultInfo); - free(pMeterQueryInfo); -} - -void changeMeterQueryInfoForSuppleQuery(SQuery *pQuery, SMeterQueryInfo *pMeterQueryInfo, TSKEY skey, TSKEY ekey) { - if (pMeterQueryInfo == NULL) { - return; - } - - // order has change already! - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - if (!QUERY_IS_ASC_QUERY(pQuery)) { - assert(pMeterQueryInfo->ekey >= pMeterQueryInfo->lastKey + step); - } else { - assert(pMeterQueryInfo->ekey <= pMeterQueryInfo->lastKey + step); - } - - pMeterQueryInfo->ekey = pMeterQueryInfo->lastKey + step; - - SWAP(pMeterQueryInfo->skey, pMeterQueryInfo->ekey, TSKEY); - pMeterQueryInfo->lastKey = pMeterQueryInfo->skey; - - pMeterQueryInfo->cur.order = pMeterQueryInfo->cur.order ^ 1u; - pMeterQueryInfo->cur.vnodeIndex = -1; -} - -void restoreIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, SMeterQueryInfo *pMeterQueryInfo) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - pQuery->skey = pMeterQueryInfo->skey; - pQuery->ekey = pMeterQueryInfo->ekey; - pQuery->lastKey = pMeterQueryInfo->lastKey; - - assert(((pQuery->lastKey >= pQuery->skey) && QUERY_IS_ASC_QUERY(pQuery)) || - ((pQuery->lastKey <= pQuery->skey) && !QUERY_IS_ASC_QUERY(pQuery))); -} - -static void clearAllMeterDataBlockInfo(SMeterDataInfo **pMeterDataInfo, int32_t start, int32_t end) { - for (int32_t i = start; i < end; ++i) { - tfree(pMeterDataInfo[i]->pBlock); - pMeterDataInfo[i]->numOfBlocks = 0; - pMeterDataInfo[i]->start = -1; - } -} - -static bool getValidDataBlocksRangeIndex(SMeterDataInfo *pMeterDataInfo, SQuery *pQuery, SCompBlock *pCompBlock, - int64_t numOfBlocks, TSKEY minval, TSKEY maxval, int32_t *end) { - SMeterObj *pMeterObj = pMeterDataInfo->pMeterObj; - SQInfo * pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - - /* - * search the possible blk that may satisfy the query condition always start from the min value, therefore, - * the order is always ascending order - */ - pMeterDataInfo->start = binarySearchForBlockImpl(pCompBlock, (int32_t)numOfBlocks, minval, TSQL_SO_ASC); - if (minval > pCompBlock[pMeterDataInfo->start].keyLast || maxval < pCompBlock[pMeterDataInfo->start].keyFirst) { - dTrace("QInfo:%p vid:%d sid:%d id:%s, no result in files", pQInfo, pMeterObj->vnode, pMeterObj->sid, - pMeterObj->meterId); - return false; - } - - // incremental checks following blocks until whose time range does not overlap with the query range - *end = pMeterDataInfo->start; - while (*end <= (numOfBlocks - 1)) { - if (pCompBlock[*end].keyFirst <= maxval && pCompBlock[*end].keyLast >= maxval) { - break; - } - - if (pCompBlock[*end].keyFirst > maxval) { - *end -= 1; - break; - } - - if (*end == numOfBlocks - 1) { - break; - } else { - ++(*end); - } - } - - return true; -} - -static bool setValidDataBlocks(SMeterDataInfo *pMeterDataInfo, int32_t end) { - int32_t size = (end - pMeterDataInfo->start) + 1; - assert(size > 0); - - if (size != pMeterDataInfo->numOfBlocks) { - memmove(pMeterDataInfo->pBlock, &pMeterDataInfo->pBlock[pMeterDataInfo->start], size * sizeof(SCompBlock)); - - char *tmp = realloc(pMeterDataInfo->pBlock, size * sizeof(SCompBlock)); - if (tmp == NULL) { - return false; - } - - pMeterDataInfo->pBlock = (SCompBlock *)tmp; - pMeterDataInfo->numOfBlocks = size; - } - - return true; -} - -static bool setCurrentQueryRange(SMeterDataInfo *pMeterDataInfo, SQuery *pQuery, TSKEY endKey, TSKEY *minval, - TSKEY *maxval) { - SQInfo * pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - SMeterObj * pMeterObj = pMeterDataInfo->pMeterObj; - SMeterQueryInfo *pMeterQInfo = pMeterDataInfo->pMeterQInfo; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - *minval = pMeterQInfo->lastKey; - *maxval = endKey; - } else { - *minval = endKey; - *maxval = pMeterQInfo->lastKey; - } - - if (*minval > *maxval) { - qTrace("QInfo:%p vid:%d sid:%d id:%s, no result in files, qrange:%" PRId64 "-%" PRId64 ", lastKey:%" PRId64, pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pMeterQInfo->skey, pMeterQInfo->ekey, - pMeterQInfo->lastKey); - return false; - } else { - qTrace("QInfo:%p vid:%d sid:%d id:%s, query in files, qrange:%" PRId64 "-%" PRId64 ", lastKey:%" PRId64, pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pMeterQInfo->skey, pMeterQInfo->ekey, - pMeterQInfo->lastKey); - return true; - } -} - -/** - * @param pSupporter - * @param pQuery - * @param numOfMeters - * @param filePath - * @param pMeterDataInfo - * @return - */ -int32_t getDataBlocksForMeters(STableQuerySupportObj *pSupporter, SQuery *pQuery, int32_t numOfMeters, - const char *filePath, SMeterDataInfo **pMeterDataInfo, uint32_t *numOfBlocks) { - SQInfo * pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - SQueryCostSummary *pSummary = &pSupporter->runtimeEnv.summary; - - TSKEY minval, maxval; - - *numOfBlocks = 0; - SQueryFilesInfo *pVnodeFileInfo = &pSupporter->runtimeEnv.vnodeFileInfo; - - // sequentially scan this header file to extract the compHeader info - for (int32_t j = 0; j < numOfMeters; ++j) { - SMeterObj *pMeterObj = pMeterDataInfo[j]->pMeterObj; - - lseek(pVnodeFileInfo->headerFd, pMeterDataInfo[j]->offsetInHeaderFile, SEEK_SET); - - SCompInfo compInfo = {0}; - read(pVnodeFileInfo->headerFd, &compInfo, sizeof(SCompInfo)); - - int32_t ret = validateCompBlockInfoSegment(pQInfo, filePath, pMeterObj->vnode, &compInfo, - pMeterDataInfo[j]->offsetInHeaderFile); - if (ret != TSDB_CODE_SUCCESS) { // file corrupted - clearAllMeterDataBlockInfo(pMeterDataInfo, 0, numOfMeters); - return TSDB_CODE_FILE_CORRUPTED; - } - - if (compInfo.numOfBlocks <= 0 || compInfo.uid != pMeterDataInfo[j]->pMeterObj->uid) { - clearAllMeterDataBlockInfo(pMeterDataInfo, 0, numOfMeters); - continue; - } - - int32_t size = compInfo.numOfBlocks * sizeof(SCompBlock); - size_t bufferSize = size + sizeof(TSCKSUM); - - pMeterDataInfo[j]->numOfBlocks = compInfo.numOfBlocks; - char *p = realloc(pMeterDataInfo[j]->pBlock, bufferSize); - if (p == NULL) { - clearAllMeterDataBlockInfo(pMeterDataInfo, 0, numOfMeters); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } else { - memset(p, 0, bufferSize); - pMeterDataInfo[j]->pBlock = (SCompBlock *)p; - } - - read(pVnodeFileInfo->headerFd, pMeterDataInfo[j]->pBlock, bufferSize); - TSCKSUM checksum = *(TSCKSUM *)((char *)pMeterDataInfo[j]->pBlock + size); - - int64_t st = taosGetTimestampUs(); - - // check compblock integrity - ret = validateCompBlockSegment(pQInfo, filePath, &compInfo, (char *)pMeterDataInfo[j]->pBlock, pMeterObj->vnode, - checksum); - if (ret != TSDB_CODE_SUCCESS) { - clearAllMeterDataBlockInfo(pMeterDataInfo, 0, numOfMeters); - return TSDB_CODE_FILE_CORRUPTED; - } - - int64_t et = taosGetTimestampUs(); - - pSummary->readCompInfo++; - pSummary->totalCompInfoSize += (size + sizeof(SCompInfo) + sizeof(TSCKSUM)); - pSummary->loadCompInfoUs += (et - st); - - if (!setCurrentQueryRange(pMeterDataInfo[j], pQuery, pSupporter->rawEKey, &minval, &maxval)) { - clearAllMeterDataBlockInfo(pMeterDataInfo, j, j + 1); - continue; - } - - int32_t end = 0; - if (!getValidDataBlocksRangeIndex(pMeterDataInfo[j], pQuery, pMeterDataInfo[j]->pBlock, compInfo.numOfBlocks, - minval, maxval, &end)) { - // current table has no qualified data blocks, erase its information. - clearAllMeterDataBlockInfo(pMeterDataInfo, j, j + 1); - continue; - } - - if (!setValidDataBlocks(pMeterDataInfo[j], end)) { - clearAllMeterDataBlockInfo(pMeterDataInfo, 0, numOfMeters); - - pQInfo->killed = 1; // set query kill, abort current query since no memory available - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - qTrace("QInfo:%p vid:%d sid:%d id:%s, startIndex:%d, %d blocks qualified", pQInfo, pMeterObj->vnode, pMeterObj->sid, - pMeterObj->meterId, pMeterDataInfo[j]->start, pMeterDataInfo[j]->numOfBlocks); - - (*numOfBlocks) += pMeterDataInfo[j]->numOfBlocks; - } - - return TSDB_CODE_SUCCESS; -} - -static void freeDataBlockFieldInfo(SMeterDataBlockInfoEx *pDataBlockInfoEx, int32_t len) { - for (int32_t i = 0; i < len; ++i) { - tfree(pDataBlockInfoEx[i].pBlock.fields); - } -} - -void freeMeterBlockInfoEx(SMeterDataBlockInfoEx *pDataBlockInfoEx, int32_t len) { - freeDataBlockFieldInfo(pDataBlockInfoEx, len); - tfree(pDataBlockInfoEx); -} - -typedef struct SBlockOrderSupporter { - int32_t numOfMeters; - SMeterDataBlockInfoEx **pDataBlockInfoEx; - int32_t * blockIndexArray; - int32_t * numOfBlocksPerMeter; -} SBlockOrderSupporter; - -static int32_t blockAccessOrderComparator(const void *pLeft, const void *pRight, void *param) { - int32_t leftTableIndex = *(int32_t *)pLeft; - int32_t rightTableIndex = *(int32_t *)pRight; - - SBlockOrderSupporter *pSupporter = (SBlockOrderSupporter *)param; - - int32_t leftTableBlockIndex = pSupporter->blockIndexArray[leftTableIndex]; - int32_t rightTableBlockIndex = pSupporter->blockIndexArray[rightTableIndex]; - - if (leftTableBlockIndex > pSupporter->numOfBlocksPerMeter[leftTableIndex]) { - /* left block is empty */ - return 1; - } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerMeter[rightTableIndex]) { - /* right block is empty */ - return -1; - } - - SMeterDataBlockInfoEx *pLeftBlockInfoEx = &pSupporter->pDataBlockInfoEx[leftTableIndex][leftTableBlockIndex]; - SMeterDataBlockInfoEx *pRightBlockInfoEx = &pSupporter->pDataBlockInfoEx[rightTableIndex][rightTableBlockIndex]; - - // assert(pLeftBlockInfoEx->pBlock.compBlock->offset != pRightBlockInfoEx->pBlock.compBlock->offset); - if (pLeftBlockInfoEx->pBlock.compBlock->offset == pRightBlockInfoEx->pBlock.compBlock->offset && - pLeftBlockInfoEx->pBlock.compBlock->last == pRightBlockInfoEx->pBlock.compBlock->last) { - // todo add more information - dError("error in header file, two block with same offset:%p", pLeftBlockInfoEx->pBlock.compBlock->offset); - } - - return pLeftBlockInfoEx->pBlock.compBlock->offset > pRightBlockInfoEx->pBlock.compBlock->offset ? 1 : -1; -} - -void cleanBlockOrderSupporter(SBlockOrderSupporter *pSupporter, int32_t numOfTables) { - tfree(pSupporter->numOfBlocksPerMeter); - tfree(pSupporter->blockIndexArray); - - for (int32_t i = 0; i < numOfTables; ++i) { - tfree(pSupporter->pDataBlockInfoEx[i]); - } - - tfree(pSupporter->pDataBlockInfoEx); -} - -int32_t createDataBlocksInfoEx(SMeterDataInfo **pMeterDataInfo, int32_t numOfMeters, - SMeterDataBlockInfoEx **pDataBlockInfoEx, int32_t numOfCompBlocks, - int32_t *numOfAllocBlocks, int64_t addr) { - // release allocated memory first - freeDataBlockFieldInfo(*pDataBlockInfoEx, *numOfAllocBlocks); - - if (*numOfAllocBlocks == 0 || *numOfAllocBlocks < numOfCompBlocks) { - char *tmp = realloc((*pDataBlockInfoEx), sizeof(SMeterDataBlockInfoEx) * numOfCompBlocks); - if (tmp == NULL) { - tfree(*pDataBlockInfoEx); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - *pDataBlockInfoEx = (SMeterDataBlockInfoEx *)tmp; - memset((*pDataBlockInfoEx), 0, sizeof(SMeterDataBlockInfoEx) * numOfCompBlocks); - *numOfAllocBlocks = numOfCompBlocks; - } - - SBlockOrderSupporter supporter = {0}; - supporter.numOfMeters = numOfMeters; - supporter.numOfBlocksPerMeter = calloc(1, sizeof(int32_t) * numOfMeters); - supporter.blockIndexArray = calloc(1, sizeof(int32_t) * numOfMeters); - supporter.pDataBlockInfoEx = calloc(1, POINTER_BYTES * numOfMeters); - - if (supporter.numOfBlocksPerMeter == NULL || supporter.blockIndexArray == NULL || - supporter.pDataBlockInfoEx == NULL) { - cleanBlockOrderSupporter(&supporter, 0); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - int32_t cnt = 0; - int32_t numOfQualMeters = 0; - for (int32_t j = 0; j < numOfMeters; ++j) { - if (pMeterDataInfo[j]->numOfBlocks == 0) { - continue; - } - - SCompBlock *pBlock = pMeterDataInfo[j]->pBlock; - supporter.numOfBlocksPerMeter[numOfQualMeters] = pMeterDataInfo[j]->numOfBlocks; - - char *buf = calloc(1, sizeof(SMeterDataBlockInfoEx) * pMeterDataInfo[j]->numOfBlocks); - if (buf == NULL) { - cleanBlockOrderSupporter(&supporter, numOfQualMeters); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - supporter.pDataBlockInfoEx[numOfQualMeters] = (SMeterDataBlockInfoEx *)buf; - - for (int32_t k = 0; k < pMeterDataInfo[j]->numOfBlocks; ++k) { - SMeterDataBlockInfoEx *pBlockInfoEx = &supporter.pDataBlockInfoEx[numOfQualMeters][k]; - - pBlockInfoEx->pBlock.compBlock = &pBlock[k]; - pBlockInfoEx->pBlock.fields = NULL; - - pBlockInfoEx->pMeterDataInfo = pMeterDataInfo[j]; - pBlockInfoEx->groupIdx = pMeterDataInfo[j]->groupIdx; // set the group index - pBlockInfoEx->blockIndex = pMeterDataInfo[j]->start + k; // set the block index in original meter - cnt++; - } - - numOfQualMeters++; - } - - dTrace("QInfo %p create data blocks info struct completed", addr); - - assert(cnt == numOfCompBlocks && numOfQualMeters <= numOfMeters); // the pMeterDataInfo[j]->numOfBlocks may be 0 - supporter.numOfMeters = numOfQualMeters; - SLoserTreeInfo *pTree = NULL; - - uint8_t ret = tLoserTreeCreate(&pTree, supporter.numOfMeters, &supporter, blockAccessOrderComparator); - if (ret != TSDB_CODE_SUCCESS) { - cleanBlockOrderSupporter(&supporter, numOfMeters); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - int32_t numOfTotal = 0; - - while (numOfTotal < cnt) { - int32_t pos = pTree->pNode[0].index; - SMeterDataBlockInfoEx *pBlocksInfoEx = supporter.pDataBlockInfoEx[pos]; - int32_t index = supporter.blockIndexArray[pos]++; - - (*pDataBlockInfoEx)[numOfTotal++] = pBlocksInfoEx[index]; - - // set data block index overflow, in order to disable the offset comparator - if (supporter.blockIndexArray[pos] >= supporter.numOfBlocksPerMeter[pos]) { - supporter.blockIndexArray[pos] = supporter.numOfBlocksPerMeter[pos] + 1; - } - - tLoserTreeAdjust(pTree, pos + supporter.numOfMeters); - } - - /* - * available when no import exists - * for(int32_t i = 0; i < cnt - 1; ++i) { - * assert((*pDataBlockInfoEx)[i].pBlock.compBlock->offset < (*pDataBlockInfoEx)[i+1].pBlock.compBlock->offset); - * } - */ - - dTrace("QInfo %p %d data blocks sort completed", addr, cnt); - cleanBlockOrderSupporter(&supporter, numOfMeters); - free(pTree); - - return TSDB_CODE_SUCCESS; -} - -/** - * set output buffer for different group - * @param pRuntimeEnv - * @param pDataBlockInfoEx - */ -void setExecutionContext(STableQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQueryInfo, int32_t meterIdx, - int32_t groupIdx, TSKEY nextKey) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SWindowResInfo * pWindowResInfo = &pRuntimeEnv->windowResInfo; - int32_t GROUPRESULTID = 1; - - SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIdx, sizeof(groupIdx)); - if (pWindowRes == NULL) { - return; - } - - /* - * not assign result buffer yet, add new result buffer - * all group belong to one result set, and each group result has different group id so set the id to be one - */ - if (pWindowRes->pos.pageId == -1) { - if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) != - TSDB_CODE_SUCCESS) { - return; - } - } - - setWindowResOutputBuf(pRuntimeEnv, pWindowRes); - initCtxOutputBuf(pRuntimeEnv); - - pMeterQueryInfo->lastKey = nextKey; - setAdditionalInfo(pSupporter, meterIdx, pMeterQueryInfo); -} - -static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - // Note: pResult->pos[i]->numOfElems == 0, there is only fixed number of results for each group - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); - - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { - pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf; - } - - /* - * set the output buffer information and intermediate buffer - * not all queries require the interResultBuf, such as COUNT - */ - pCtx->resultInfo = &pResult->resultInfo[i]; - - // set super table query flag - SResultInfo *pResInfo = GET_RES_INFO(pCtx); - pResInfo->superTableQ = pRuntimeEnv->stableQuery; - } -} - -int32_t setAdditionalInfo(STableQuerySupportObj *pSupporter, int32_t meterIdx, SMeterQueryInfo *pMeterQueryInfo) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - assert(pMeterQueryInfo->lastKey > 0); - - vnodeSetTagValueInParam(pSupporter->pSidSet, pRuntimeEnv, pSupporter->pMeterSidExtInfo[meterIdx]); - - // both the master and supplement scan needs to set the correct ts comp start position - if (pRuntimeEnv->pTSBuf != NULL) { - if (pMeterQueryInfo->cur.vnodeIndex == -1) { - pMeterQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key; - - tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pMeterQueryInfo->tag); - - // keep the cursor info of current meter - pMeterQueryInfo->cur = pRuntimeEnv->pTSBuf->cur; - } else { - tsBufSetCursor(pRuntimeEnv->pTSBuf, &pMeterQueryInfo->cur); - } - } - - return 0; -} - -/* - * There are two cases to handle: - * - * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey, - * pQuery->skey, and pQuery->eKey. - * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be - * merged during merge stage. In this case, we need the pMeterQueryInfo->lastResRows to decide if there - * is a previous result generated or not. - */ -void setIntervalQueryRange(SMeterQueryInfo *pMeterQueryInfo, STableQuerySupportObj *pSupporter, TSKEY key) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - - if (pMeterQueryInfo->queryRangeSet) { - pQuery->lastKey = key; - pMeterQueryInfo->lastKey = key; - } else { - pQuery->skey = key; - STimeWindow win = {.skey = key, pSupporter->rawEKey}; - - // for too small query range, no data in this interval. - if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->ekey < pQuery->skey)) || - (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->skey < pQuery->ekey))) { - return; - } - - /** - * In handling the both ascending and descending order super table query, we need to find the first qualified - * timestamp of this table, and then set the first qualified start timestamp. - * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional - * operations involve. - */ - if (!QUERY_IS_ASC_QUERY(pQuery)) { - TSKEY k = getGreaterEqualTimestamp(pRuntimeEnv); - win.skey = k; - win.ekey = key; // current key is the last timestamp value that are contained in query time window - - SPositionInfo p = {.fileId = pQuery->fileId, .slot = pQuery->slot, .pos = pQuery->pos}; - loadRequiredBlockIntoMem(pRuntimeEnv, &p); - } - - TSKEY skey1, ekey1; - TSKEY windowSKey = 0, windowEKey = 0; - - SWindowResInfo *pWindowResInfo = &pMeterQueryInfo->windowResInfo; - - doGetAlignedIntervalQueryRangeImpl(pQuery, win.skey, win.skey, win.ekey, &skey1, &ekey1, &windowSKey, &windowEKey); - pWindowResInfo->startTime = windowSKey; // windowSKey may be 0 in case of 1970 timestamp - // assert(pWindowResInfo->startTime > 0); - - if (pWindowResInfo->prevSKey == 0) { - if (QUERY_IS_ASC_QUERY(pQuery)) { - pWindowResInfo->prevSKey = windowSKey; - } else { - assert(win.ekey == pQuery->skey); - pWindowResInfo->prevSKey = windowSKey + ((win.ekey - windowSKey) / pQuery->slidingTime) * pQuery->slidingTime; - } - } - - pMeterQueryInfo->queryRangeSet = 1; - pMeterQueryInfo->lastKey = pQuery->skey; - pMeterQueryInfo->skey = pQuery->skey; - - pQuery->lastKey = pQuery->skey; - } -} - -bool requireTimestamp(SQuery *pQuery) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; i++) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) { - return true; - } - } - return false; -} - -static void setTimestampRange(SQueryRuntimeEnv *pRuntimeEnv, int64_t stime, int64_t etime) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; - - if (functionId == TSDB_FUNC_SPREAD) { - pRuntimeEnv->pCtx[i].param[1].dKey = stime; - pRuntimeEnv->pCtx[i].param[2].dKey = etime; - - pRuntimeEnv->pCtx[i].param[1].nType = TSDB_DATA_TYPE_DOUBLE; - pRuntimeEnv->pCtx[i].param[2].nType = TSDB_DATA_TYPE_DOUBLE; - } - } -} - -bool needPrimaryTimestampCol(SQuery *pQuery, SBlockInfo *pBlockInfo) { - /* - * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position - * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases; - */ - bool loadPrimaryTS = (pQuery->lastKey >= pBlockInfo->keyFirst && pQuery->lastKey <= pBlockInfo->keyLast) || - (pQuery->ekey >= pBlockInfo->keyFirst && pQuery->ekey <= pBlockInfo->keyLast) || - requireTimestamp(pQuery); - - return loadPrimaryTS; -} - -int32_t LoadDatablockOnDemand(SCompBlock *pBlock, SField **pFields, uint8_t *blkStatus, SQueryRuntimeEnv *pRuntimeEnv, - int32_t fileIdx, int32_t slotIdx, __block_search_fn_t searchFn, bool onDemand) { - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; - - TSKEY *primaryKeys = (TSKEY *)pRuntimeEnv->primaryColBuffer->data; - - pQuery->slot = slotIdx; - pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pBlock->numOfPoints - 1; - - SET_FILE_BLOCK_FLAG(*blkStatus); - SET_DATA_BLOCK_NOT_LOADED(*blkStatus); - - if (((pQuery->lastKey <= pBlock->keyFirst && pQuery->ekey >= pBlock->keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->ekey <= pBlock->keyFirst && pQuery->lastKey >= pBlock->keyLast && !QUERY_IS_ASC_QUERY(pQuery))) && - onDemand) { - uint32_t req = 0; - if (pQuery->numOfFilterCols > 0) { - req = BLK_DATA_ALL_NEEDED; - } else { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t functID = pQuery->pSelectExpr[i].pBase.functionId; - req |= aAggs[functID].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlock->keyFirst, pBlock->keyLast, - pQuery->pSelectExpr[i].pBase.colInfo.colId, *blkStatus); - } - - if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) { - req |= BLK_DATA_ALL_NEEDED; - } - } - - if (req == BLK_DATA_NO_NEEDED) { - qTrace("QInfo:%p vid:%d sid:%d id:%s, slot:%d, data block ignored, brange:%" PRId64 "-%" PRId64 ", rows:%d", - GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->slot, - pBlock->keyFirst, pBlock->keyLast, pBlock->numOfPoints); - - setTimestampRange(pRuntimeEnv, pBlock->keyFirst, pBlock->keyLast); - } else if (req == BLK_DATA_FILEDS_NEEDED) { - if (loadDataBlockFieldsInfo(pRuntimeEnv, pBlock, pFields) < 0) { - return DISK_DATA_LOAD_FAILED; - } - } else { - assert(req == BLK_DATA_ALL_NEEDED); - goto _load_all; - } - } else { - _load_all: - if (loadDataBlockFieldsInfo(pRuntimeEnv, pBlock, pFields) < 0) { - return DISK_DATA_LOAD_FAILED; - } - - if ((pQuery->lastKey <= pBlock->keyFirst && pQuery->ekey >= pBlock->keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->lastKey >= pBlock->keyLast && pQuery->ekey <= pBlock->keyFirst && !QUERY_IS_ASC_QUERY(pQuery))) { - /* - * if this block is completed included in the query range, do more filter operation - * filter the data block according to the value filter condition. - * no need to load the data block, continue for next block - */ - if (!needToLoadDataBlock(pQuery, *pFields, pRuntimeEnv->pCtx, pBlock->numOfPoints)) { -#if defined(_DEBUG_VIEW) - dTrace("QInfo:%p fileId:%d, slot:%d, block discarded by per-filter, ", GET_QINFO_ADDR(pQuery), pQuery->fileId, - pQuery->slot); -#endif - qTrace("QInfo:%p id:%s slot:%d, data block ignored by pre-filter, fields loaded, brange:%" PRId64 "-%" PRId64 - ", rows:%d", - GET_QINFO_ADDR(pQuery), pMeterObj->meterId, pQuery->slot, pBlock->keyFirst, pBlock->keyLast, - pBlock->numOfPoints); - return DISK_DATA_DISCARDED; - } - } - - SBlockInfo binfo = getBlockBasicInfo(pRuntimeEnv, pBlock, BLK_FILE_BLOCK); - bool loadTS = needPrimaryTimestampCol(pQuery, &binfo); - - /* - * the pRuntimeEnv->pMeterObj is not updated during loop, since which meter this block is belonged to is not matter - * in order to enforce to load the data block, we HACK the load check procedure, - * by changing pQuery->slot each time to IGNORE the pLoadInfo data check. It is NOT a normal way. - */ - int32_t ret = loadDataBlockIntoMem(pBlock, pFields, pRuntimeEnv, fileIdx, loadTS, false); - SET_DATA_BLOCK_LOADED(*blkStatus); - - if (ret < 0) { - return DISK_DATA_LOAD_FAILED; - } - - /* find first qualified record position in this block */ - if (loadTS) { - pQuery->pos = searchFn((char *)primaryKeys, pBlock->numOfPoints, pQuery->lastKey, pQuery->order.order); - - /* boundary timestamp check */ - assert(pBlock->keyFirst == primaryKeys[0] && pBlock->keyLast == primaryKeys[pBlock->numOfPoints - 1]); - } - - /* - * NOTE: - * if the query of current timestamp window is COMPLETED, the query range condition may not be satisfied - * such as: - * pQuery->lastKey + 1 == pQuery->ekey for descending order interval query - * pQuery->lastKey - 1 == pQuery->ekey for ascending query - */ - assert(((pQuery->ekey >= pQuery->lastKey || pQuery->ekey == pQuery->lastKey - 1) && QUERY_IS_ASC_QUERY(pQuery)) || - ((pQuery->ekey <= pQuery->lastKey || pQuery->ekey == pQuery->lastKey + 1) && !QUERY_IS_ASC_QUERY(pQuery))); - } - - return DISK_DATA_LOADED; -} - -bool onDemandLoadDatablock(SQuery *pQuery, int16_t queryRangeSet) { - return (pQuery->intervalTime == 0) || ((queryRangeSet == 1) && (isIntervalQuery(pQuery))); -} - -static int32_t getNumOfSubset(STableQuerySupportObj *pSupporter) { - SQuery *pQuery = pSupporter->runtimeEnv.pQuery; - - int32_t totalSubset = 0; - if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (isIntervalQuery(pQuery))) { - totalSubset = numOfClosedTimeWindow(&pSupporter->runtimeEnv.windowResInfo); - } else { - totalSubset = pSupporter->pSidSet->numOfSubSet; - } - - return totalSubset; -} - -static int32_t doCopyToSData(STableQuerySupportObj *pSupporter, SWindowResult *result, int32_t orderType) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - - int32_t numOfResult = 0; - int32_t startIdx = 0; - int32_t step = -1; - - dTrace("QInfo:%p start to copy data from windowResInfo to pQuery buf", GET_QINFO_ADDR(pQuery)); - int32_t totalSubset = getNumOfSubset(pSupporter); - - if (orderType == TSQL_SO_ASC) { - startIdx = pSupporter->subgroupIdx; - step = 1; - } else { // desc order copy all data - startIdx = totalSubset - pSupporter->subgroupIdx - 1; - step = -1; - } - - for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) { - if (result[i].numOfRows == 0) { - pSupporter->offset = 0; - pSupporter->subgroupIdx += 1; - continue; - } - - assert(result[i].numOfRows >= 0 && pSupporter->offset <= 1); - - int32_t numOfRowsToCopy = result[i].numOfRows - pSupporter->offset; - int32_t oldOffset = pSupporter->offset; - - /* - * current output space is not enough to keep all the result data of this group, only copy partial results - * to SQuery object's result buffer - */ - if (numOfRowsToCopy > pQuery->pointsToRead - numOfResult) { - numOfRowsToCopy = pQuery->pointsToRead - numOfResult; - pSupporter->offset += numOfRowsToCopy; - } else { - pSupporter->offset = 0; - pSupporter->subgroupIdx += 1; - } - - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - int32_t size = pRuntimeEnv->pCtx[j].outputBytes; - - char *out = pQuery->sdata[j]->data + numOfResult * size; - char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]); - memcpy(out, in + oldOffset * size, size * numOfRowsToCopy); - } - - numOfResult += numOfRowsToCopy; - if (numOfResult == pQuery->pointsToRead) { - break; - } - } - - dTrace("QInfo:%p copy data to SQuery buf completed", GET_QINFO_ADDR(pQuery)); - -#ifdef _DEBUG_VIEW - displayInterResult(pQuery->sdata, pQuery, numOfResult); -#endif - return numOfResult; -} - -/** - * copyFromWindowResToSData support copy data in ascending/descending order - * For interval query of both super table and table, copy the data in ascending order, since the output results are - * ordered in SWindowResutl already. While handling the group by query for both table and super table, - * all group result are completed already. - * - * @param pQInfo - * @param result - */ -void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) { - SQuery * pQuery = &pQInfo->query; - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSQL_SO_ASC; - int32_t numOfResult = doCopyToSData(pSupporter, result, orderType); - - pQuery->pointsRead += numOfResult; - assert(pQuery->pointsRead <= pQuery->pointsToRead); -} - -static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, SMeterDataInfo *pMeterDataInfo) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - // update the number of result for each, only update the number of rows for the corresponding window result. - if (pQuery->intervalTime == 0) { - int32_t g = pMeterDataInfo->groupIdx; - assert(pRuntimeEnv->windowResInfo.size > 0); - - SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&g, sizeof(g)); - if (pWindowRes->numOfRows == 0) { - pWindowRes->numOfRows = getNumOfResult(pRuntimeEnv); - } - } -} - -void stableApplyFunctionsOnBlock(STableQuerySupportObj *pSupporter, SMeterDataInfo *pMeterDataInfo, - SBlockInfo *pBlockInfo, SField *pFields, __block_search_fn_t searchFn) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - SMeterQueryInfo * pMeterQueryInfo = pMeterDataInfo->pMeterQInfo; - SWindowResInfo * pWindowResInfo = &pMeterQueryInfo->windowResInfo; - - int64_t *pPrimaryKey = (int64_t *)pRuntimeEnv->primaryColBuffer->data; - - int32_t forwardStep = - getNumOfRowsInTimeWindow(pQuery, pBlockInfo, pPrimaryKey, pQuery->pos, pQuery->ekey, searchFn, true); - - int32_t numOfRes = 0; - if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) { - numOfRes = rowwiseApplyAllFunctions(pRuntimeEnv, &forwardStep, pFields, pBlockInfo, pWindowResInfo); - } else { - numOfRes = blockwiseApplyAllFunctions(pRuntimeEnv, forwardStep, pFields, pBlockInfo, pWindowResInfo, searchFn); - } - - assert(numOfRes >= 0); - - updateWindowResNumOfRes(pRuntimeEnv, pMeterDataInfo); - updatelastkey(pQuery, pMeterQueryInfo); -} - -// we need to split the refstatsult into different packages. -int32_t vnodeGetResultSize(void *thandle, int32_t *numOfRows) { - SQInfo *pQInfo = (SQInfo *)thandle; - SQuery *pQuery = &pQInfo->query; - - /* - * get the file size and set the numOfRows to be the file size, since for tsComp query, - * the returned row size is equalled to 1 - * - * TODO handle the case that the file is too large to send back one time - */ - if (pQInfo->pTableQuerySupporter != NULL && isTSCompQuery(pQuery) && (*numOfRows) > 0) { - struct stat fstat; - if (stat(pQuery->sdata[0]->data, &fstat) == 0) { - *numOfRows = fstat.st_size; - return fstat.st_size; - } else { - dError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno)); - return 0; - } - } else { - return pQInfo->query.rowSize * (*numOfRows); - } -} - -int64_t vnodeGetOffsetVal(void *thandle) { - SQInfo *pQInfo = (SQInfo *)thandle; - return pQInfo->query.limit.offset; -} - -bool vnodeHasRemainResults(void *handle) { - SQInfo * pQInfo = (SQInfo *)handle; - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - if (pSupporter == NULL || pQInfo->query.interpoType == TSDB_INTERPO_NONE) { - return false; - } - - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - - SInterpolationInfo *pInterpoInfo = &pRuntimeEnv->interpoInfo; - if (pQuery->limit.limit > 0 && pQInfo->pointsRead >= pQuery->limit.limit) { - return false; - } - - int32_t remain = taosNumOfRemainPoints(pInterpoInfo); - if (remain > 0) { - return true; - } else { - if (pRuntimeEnv->pInterpoBuf == NULL) { - return false; - } - - // query has completed - if (Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) { - TSKEY ekey = taosGetRevisedEndKey(pSupporter->rawEKey, pQuery->order.order, pQuery->intervalTime, - pQuery->intervalTimeUnit, pQuery->precision); - int32_t numOfTotal = taosGetNumOfResultWithInterpo(pInterpoInfo, (TSKEY *)pRuntimeEnv->pInterpoBuf[0]->data, - remain, pQuery->intervalTime, ekey, pQuery->pointsToRead); - return numOfTotal > 0; - } - - return false; - } -} - -static int32_t resultInterpolate(SQInfo *pQInfo, tFilePage **data, tFilePage **pDataSrc, int32_t numOfRows, - int32_t outputRows) { - SQuery * pQuery = &pQInfo->query; - SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->pTableQuerySupporter->runtimeEnv; - - assert(pRuntimeEnv->pCtx[0].outputBytes == TSDB_KEYSIZE); - - // build support structure for performing interpolation - SSchema *pSchema = calloc(1, sizeof(SSchema) * pQuery->numOfOutputCols); - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - pSchema[i].bytes = pRuntimeEnv->pCtx[i].outputBytes; - pSchema[i].type = pQuery->pSelectExpr[i].resType; - } - - SColumnModel *pModel = createColumnModel(pSchema, pQuery->numOfOutputCols, pQuery->pointsToRead); - - char * srcData[TSDB_MAX_COLUMNS] = {0}; - int32_t functions[TSDB_MAX_COLUMNS] = {0}; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - srcData[i] = pDataSrc[i]->data; - functions[i] = pQuery->pSelectExpr[i].pBase.functionId; - } - - int32_t numOfRes = taosDoInterpoResult(&pRuntimeEnv->interpoInfo, pQuery->interpoType, data, numOfRows, outputRows, - pQuery->intervalTime, (int64_t *)pDataSrc[0]->data, pModel, srcData, - pQuery->defaultVal, functions, pRuntimeEnv->pMeterObj->pointsPerFileBlock); - - destroyColumnModel(pModel); - free(pSchema); - - return numOfRes; -} - -static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) { - SMeterObj *pObj = pQInfo->pObj; - SQuery * pQuery = &pQInfo->query; - - int tnumOfRows = vnodeList[pObj->vnode].cfg.rowsInFileBlock; - - // for metric query, bufIndex always be 0. - for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) { // pQInfo->bufIndex == 0 - int32_t bytes = pQuery->pSelectExpr[col].resBytes; - - memmove(data, pQuery->sdata[col]->data + bytes * tnumOfRows * pQInfo->bufIndex, bytes * numOfRows); - data += bytes * numOfRows; - } -} - -/** - * Copy the result data/file to output message buffer. - * If the result is in file format, read file from disk and copy to output buffer, compression is not involved since - * data in file is already compressed. - * In case of other result in buffer, compress the result before copy once the tsComressMsg is set. - * - * @param handle - * @param data - * @param numOfRows the number of rows that are not returned in current retrieve - * @return - */ -int32_t vnodeCopyQueryResultToMsg(void *handle, char *data, int32_t numOfRows) { - SQInfo *pQInfo = (SQInfo *)handle; - SQuery *pQuery = &pQInfo->query; - - assert(pQuery->pSelectExpr != NULL && pQuery->numOfOutputCols > 0); - - // load data from file to msg buffer - if (isTSCompQuery(pQuery)) { - int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666); - - // make sure file exist - if (FD_VALID(fd)) { - size_t s = lseek(fd, 0, SEEK_END); - dTrace("QInfo:%p ts comp data return, file:%s, size:%zu", pQInfo, pQuery->sdata[0]->data, s); - - lseek(fd, 0, SEEK_SET); - read(fd, data, s); - close(fd); - - unlink(pQuery->sdata[0]->data); - } else { - dError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo, - pQuery->sdata[0]->data, strerror(errno)); - } - } else { - doCopyQueryResultToMsg(pQInfo, numOfRows, data); - } - - return numOfRows; -} - -int32_t vnodeQueryResultInterpolate(SQInfo *pQInfo, tFilePage **pDst, tFilePage **pDataSrc, int32_t numOfRows, - int32_t *numOfInterpo) { - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - - while (1) { - numOfRows = taosNumOfRemainPoints(&pRuntimeEnv->interpoInfo); - - TSKEY ekey = taosGetRevisedEndKey(pSupporter->rawEKey, pQuery->order.order, pQuery->intervalTime, - pQuery->intervalTimeUnit, pQuery->precision); - int32_t numOfFinalRows = taosGetNumOfResultWithInterpo(&pRuntimeEnv->interpoInfo, (TSKEY *)pDataSrc[0]->data, - numOfRows, pQuery->intervalTime, ekey, pQuery->pointsToRead); - - int32_t ret = resultInterpolate(pQInfo, pDst, pDataSrc, numOfRows, numOfFinalRows); - assert(ret == numOfFinalRows); - - /* reached the start position of according to offset value, return immediately */ - if (pQuery->limit.offset == 0) { - return ret; - } - - if (pQuery->limit.offset < ret) { - ret -= pQuery->limit.offset; - // todo !!!!there exactly number of interpo is not valid. - // todo refactor move to the beginning of buffer - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].resBytes * pQuery->limit.offset, - ret * pQuery->pSelectExpr[i].resBytes); - } - pQuery->limit.offset = 0; - return ret; - } else { - pQuery->limit.offset -= ret; - ret = 0; - } - - if (!vnodeHasRemainResults(pQInfo)) { - return ret; - } - } -} - -void vnodePrintQueryStatistics(STableQuerySupportObj *pSupporter) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - - SQuery *pQuery = pRuntimeEnv->pQuery; - SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - - SQueryCostSummary *pSummary = &pRuntimeEnv->summary; - if (pRuntimeEnv->pResultBuf == NULL) { - pSummary->tmpBufferInDisk = 0; - } else { - pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf); - } - - dTrace("QInfo:%p statis: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo, - pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0); - - dTrace("QInfo:%p statis: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo, - pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField, - pSummary->loadFieldUs / 1000.0); - - dTrace( - "QInfo:%p statis: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes", - pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0, - pSummary->skippedFileBlocks, pSummary->totalGenData); - - dTrace("QInfo:%p statis: cache blocks:%d", pQInfo, pSummary->blocksInCache, 0); - dTrace("QInfo:%p statis: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk); - - dTrace("QInfo:%p statis: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables); - dTrace("QInfo:%p statis: seek ops:%d", pQInfo, pSummary->numOfSeek); - - double total = pSummary->fileTimeUs + pSummary->cacheTimeUs; - double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs; - // assert(io <= pSummary->fileTimeUs); - - // todo add the intermediate result save cost!! - double computing = total - io; - - dTrace( - "QInfo:%p statis: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%)," - "comput:%.2fms(%.2f%)", - pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total, - pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total, - computing / 1000.0, computing * 100 / total); -} diff --git a/src/vnode/detail/src/vnodeQueryProcess.c b/src/vnode/detail/src/vnodeQueryProcess.c deleted file mode 100644 index 23520f35a1..0000000000 --- a/src/vnode/detail/src/vnodeQueryProcess.c +++ /dev/null @@ -1,1364 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "qextbuffer.h" -#include "taosmsg.h" -#include "tscJoinProcess.h" -#include "ttime.h" -#include "vnode.h" -#include "vnodeRead.h" -#include "vnodeUtil.h" - -#include "vnodeQueryImpl.h" - -#define ALL_CACHE_BLOCKS_CHECKED(q) \ - (((q)->slot == (q)->currentSlot && QUERY_IS_ASC_QUERY(q)) || \ - ((q)->slot == (q)->firstSlot && (!QUERY_IS_ASC_QUERY(q)))) - -#define FORWARD_CACHE_BLOCK_CHECK_SLOT(slot, step, maxblocks) (slot) = ((slot) + (step) + (maxblocks)) % (maxblocks); - -static bool isGroupbyEachTable(SSqlGroupbyExpr *pGroupbyExpr, tSidSet *pSidset) { - if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) { - return false; - } - - for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) { - SColIndexEx *pColIndex = &pGroupbyExpr->columnInfo[i]; - if (pColIndex->flag == TSDB_COL_TAG) { - assert(pSidset->numOfSids == pSidset->numOfSubSet); - return true; - } - } - - return false; -} - -static bool doCheckWithPrevQueryRange(SQuery *pQuery, TSKEY nextKey) { - if ((nextKey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (nextKey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - return false; - } - - return true; -} - -/** - * The start position of the first check cache block is located before starting the loop. - * And the start position for next cache blocks needs to be decided before checking each cache block. - */ -static void setStartPositionForCacheBlock(SQuery *pQuery, SCacheBlock *pBlock, bool *firstCheckSlot) { - if (!(*firstCheckSlot)) { - if (QUERY_IS_ASC_QUERY(pQuery)) { - pQuery->pos = 0; - } else { - pQuery->pos = pBlock->numOfPoints - 1; - } - } else { - (*firstCheckSlot) = false; - } -} - -static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]); - if (pResInfo != NULL) { - pResInfo->complete = false; - } - } -} - -static void queryOnMultiDataCache(SQInfo *pQInfo, SMeterDataInfo *pMeterDataInfo) { - SQuery * pQuery = &pQInfo->query; - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQueryRuntimeEnv * pRuntimeEnv = &pQInfo->pTableQuerySupporter->runtimeEnv; - - SMeterSidExtInfo **pMeterSidExtInfo = pSupporter->pMeterSidExtInfo; - - SMeterObj *pTempMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[0]->sid); - assert(pTempMeterObj != NULL); - - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pTempMeterObj->searchAlgorithm]; - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - - dTrace("QInfo:%p start to query data in cache", pQInfo); - int64_t st = taosGetTimestampUs(); - int32_t totalBlocks = 0; - - for (int32_t groupIdx = 0; groupIdx < pSupporter->pSidSet->numOfSubSet; ++groupIdx) { - int32_t start = pSupporter->pSidSet->starterPos[groupIdx]; - int32_t end = pSupporter->pSidSet->starterPos[groupIdx + 1] - 1; - - if (isQueryKilled(pQInfo)) { - return; - } - - for (int32_t k = start; k <= end; ++k) { - SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[k]->sid); - if (pMeterObj == NULL) { - dError("QInfo:%p failed to find meterId:%d, continue", pQInfo, pMeterSidExtInfo[k]->sid); - continue; - } - - pQInfo->pObj = pMeterObj; - pRuntimeEnv->pMeterObj = pMeterObj; - - if (pMeterDataInfo[k].pMeterQInfo == NULL) { - pMeterDataInfo[k].pMeterQInfo = - createMeterQueryInfo(pSupporter, pMeterObj->sid, pSupporter->rawSKey, pSupporter->rawEKey); - } - - if (pMeterDataInfo[k].pMeterObj == NULL) { // no data in disk for this meter, set its pointer - setMeterDataInfo(&pMeterDataInfo[k], pMeterObj, k, groupIdx); - } - - assert(pMeterDataInfo[k].meterOrderIdx == k && pMeterObj == pMeterDataInfo[k].pMeterObj); - - SMeterQueryInfo *pMeterQueryInfo = pMeterDataInfo[k].pMeterQInfo; - restoreIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - - /* - * Update the query meter column index and the corresponding filter column index - * the original column index info may be inconsistent with current meter in cache. - * - * The stable schema has been changed, but the meter schema, along with the data in cache, - * will not be updated until data with new schema arrive. - */ - vnodeUpdateQueryColumnIndex(pQuery, pMeterObj); - vnodeUpdateFilterColumnIndex(pQuery); - - if ((pQuery->lastKey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->lastKey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - dTrace("QInfo:%p vid:%d sid:%d id:%s, query completed, ignore data in cache. qrange:%" PRId64 "-%" PRId64 - ", lastKey:%" PRId64, - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, - pQuery->lastKey); - - continue; - } - - qTrace("QInfo:%p vid:%d sid:%d id:%s, query in cache, qrange:%" PRId64 "-%" PRId64 ", lastKey:%" PRId64, pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, pQuery->lastKey); - - /* - * find the appropriated start position in cache - * NOTE: (taking ascending order query for example) - * for the specific query range [pQuery->lastKey, pQuery->ekey], there may be no qualified result in cache. - * Therefore, we need the first point that is greater(less) than the pQuery->lastKey, so the boundary check - * should be ignored (the fourth parameter). - */ - TSKEY nextKey = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, true); - if (nextKey < 0 || !doCheckWithPrevQueryRange(pQuery, nextKey)) { - qTrace("QInfo:%p vid:%d sid:%d id:%s, no data qualified in cache, cache blocks:%d, lastKey:%" PRId64, pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->numOfBlocks, pQuery->lastKey); - continue; - } - - // data in this block may be flushed to disk and this block is allocated to other meter - // todo try with remain cache blocks - SCacheBlock *pBlock = getCacheDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot); - if (pBlock == NULL) { - continue; - } - - bool firstCheckSlot = true; - SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; - - for (int32_t i = 0; i < pCacheInfo->maxBlocks; ++i) { - pBlock = getCacheDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot); - - /* - * 1. pBlock == NULL. The cache block may be flushed to disk, so it is not available, skip and try next - * The check for empty block is refactor to getCacheDataBlock function - */ - if (pBlock == NULL) { - if (ALL_CACHE_BLOCKS_CHECKED(pQuery)) { - break; - } - - FORWARD_CACHE_BLOCK_CHECK_SLOT(pQuery->slot, step, pCacheInfo->maxBlocks); - continue; - } - - setStartPositionForCacheBlock(pQuery, pBlock, &firstCheckSlot); - - TSKEY *primaryKeys = (TSKEY *)pRuntimeEnv->primaryColBuffer->data; - TSKEY key = primaryKeys[pQuery->pos]; - - // in handling file data block, the timestamp range validation is done during fetching candidate file blocks - if ((key > pSupporter->rawEKey && QUERY_IS_ASC_QUERY(pQuery)) || - (key < pSupporter->rawEKey && !QUERY_IS_ASC_QUERY(pQuery))) { - break; - } - - if (pQuery->intervalTime == 0) { - setExecutionContext(pSupporter, pMeterQueryInfo, k, pMeterDataInfo[k].groupIdx, key); - } else { - setIntervalQueryRange(pMeterQueryInfo, pSupporter, key); - int32_t ret = setAdditionalInfo(pSupporter, k, pMeterQueryInfo); - if (ret != TSDB_CODE_SUCCESS) { - pQInfo->killed = 1; - return; - } - } - - qTrace("QInfo:%p vid:%d sid:%d id:%s, query in cache, qrange:%" PRId64 "-%" PRId64 ", lastKey:%" PRId64, pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, pQuery->lastKey); - - // only record the key on last block - SET_CACHE_BLOCK_FLAG(pRuntimeEnv->blockStatus); - SBlockInfo binfo = getBlockInfo(pRuntimeEnv); - - dTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", fileId:%d, slot:%d, pos:%d, bstatus:%d", - GET_QINFO_ADDR(pQuery), binfo.keyFirst, binfo.keyLast, pQuery->fileId, pQuery->slot, pQuery->pos, - pRuntimeEnv->blockStatus); - - totalBlocks++; - stableApplyFunctionsOnBlock(pSupporter, &pMeterDataInfo[k], &binfo, NULL, searchFn); - - if (ALL_CACHE_BLOCKS_CHECKED(pQuery)) { - break; - } - - FORWARD_CACHE_BLOCK_CHECK_SLOT(pQuery->slot, step, pCacheInfo->maxBlocks); - } - } - } - - int64_t time = taosGetTimestampUs() - st; - SQueryCostSummary *pSummary = &pRuntimeEnv->summary; - - pSummary->blocksInCache += totalBlocks; - pSummary->cacheTimeUs += time; - pSummary->numOfTables = pSupporter->pSidSet->numOfSids; - - dTrace("QInfo:%p complete check %d cache blocks, elapsed time:%.3fms", pQInfo, totalBlocks, time / 1000.0); - - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); -} - -static void queryOnMultiDataFiles(SQInfo *pQInfo, SMeterDataInfo *pMeterDataInfo) { - SQuery * pQuery = &pQInfo->query; - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - SMeterDataBlockInfoEx *pDataBlockInfoEx = NULL; - int32_t nAllocBlocksInfoSize = 0; - - SMeterObj * pTempMeter = getMeterObj(pSupporter->pMetersHashTable, pSupporter->pMeterSidExtInfo[0]->sid); - __block_search_fn_t searchFn = vnodeSearchKeyFunc[pTempMeter->searchAlgorithm]; - - int32_t vnodeId = pTempMeter->vnode; - SQueryFilesInfo *pVnodeFileInfo = &pRuntimeEnv->vnodeFileInfo; - - dTrace("QInfo:%p start to check data blocks in %d files", pQInfo, pVnodeFileInfo->numOfFiles); - - int32_t fid = QUERY_IS_ASC_QUERY(pQuery) ? -1 : INT32_MAX; - int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - SQueryCostSummary *pSummary = &pRuntimeEnv->summary; - - int64_t totalBlocks = 0; - int64_t st = taosGetTimestampUs(); - - while (1) { - if (isQueryKilled(pQInfo)) { - break; - } - - int32_t fileIdx = vnodeGetVnodeHeaderFileIndex(&fid, pRuntimeEnv, pQuery->order.order); - if (fileIdx < 0) { // no valid file, abort current search - break; - } - - pRuntimeEnv->startPos.fileId = fid; - pQuery->fileId = fid; - pSummary->numOfFiles++; - - if (vnodeGetHeaderFile(pRuntimeEnv, fileIdx) != TSDB_CODE_SUCCESS) { - fid += step; - continue; - } - - int32_t numOfQualifiedMeters = 0; - assert(fileIdx == pRuntimeEnv->vnodeFileInfo.current); - - SMeterDataInfo **pReqMeterDataInfo = NULL; - int32_t ret = vnodeFilterQualifiedMeters(pQInfo, vnodeId, pSupporter->pSidSet, pMeterDataInfo, - &numOfQualifiedMeters, &pReqMeterDataInfo); - if (ret != TSDB_CODE_SUCCESS) { - dError("QInfo:%p failed to create meterdata struct to perform query processing, abort", pQInfo); - - tfree(pReqMeterDataInfo); - pQInfo->code = -ret; - pQInfo->killed = 1; - - return; - } - - dTrace("QInfo:%p file:%s, %d meters qualified", pQInfo, pVnodeFileInfo->dataFilePath, numOfQualifiedMeters); - - // none of meters in query set have pHeaderFileData in this file, try next file - if (numOfQualifiedMeters == 0) { - fid += step; - tfree(pReqMeterDataInfo); - continue; - } - - uint32_t numOfBlocks = 0; - ret = getDataBlocksForMeters(pSupporter, pQuery, numOfQualifiedMeters, pVnodeFileInfo->headerFilePath, - pReqMeterDataInfo, &numOfBlocks); - if (ret != TSDB_CODE_SUCCESS) { - dError("QInfo:%p failed to get data block before scan data blocks, abort", pQInfo); - - tfree(pReqMeterDataInfo); - pQInfo->code = -ret; - pQInfo->killed = 1; - - return; - } - - dTrace("QInfo:%p file:%s, %d meters contains %d blocks to be checked", pQInfo, pVnodeFileInfo->dataFilePath, - numOfQualifiedMeters, numOfBlocks); - - if (numOfBlocks == 0) { - fid += step; - tfree(pReqMeterDataInfo); - continue; - } - - ret = createDataBlocksInfoEx(pReqMeterDataInfo, numOfQualifiedMeters, &pDataBlockInfoEx, numOfBlocks, - &nAllocBlocksInfoSize, (int64_t)pQInfo); - if (ret != TSDB_CODE_SUCCESS) { // failed to create data blocks - dError("QInfo:%p build blockInfoEx failed, abort", pQInfo); - tfree(pReqMeterDataInfo); - - pQInfo->code = -ret; - pQInfo->killed = 1; - return; - } - - dTrace("QInfo:%p start to load %d blocks and check", pQInfo, numOfBlocks); - int64_t TRACE_OUTPUT_BLOCK_CNT = 10000; - int64_t stimeUnit = 0; - int64_t etimeUnit = 0; - - totalBlocks += numOfBlocks; - - // sequentially scan the pHeaderFileData file - int32_t j = QUERY_IS_ASC_QUERY(pQuery) ? 0 : numOfBlocks - 1; - - for (; j < numOfBlocks && j >= 0; j += step) { - if (isQueryKilled(pQInfo)) { - break; - } - - /* output elapsed time for log every TRACE_OUTPUT_BLOCK_CNT blocks */ - if (j == 0) { - stimeUnit = taosGetTimestampMs(); - } else if ((j % TRACE_OUTPUT_BLOCK_CNT) == 0) { - etimeUnit = taosGetTimestampMs(); - dTrace("QInfo:%p load and check %" PRId64 " blocks, and continue. elapsed:%" PRId64 " ms", pQInfo, - TRACE_OUTPUT_BLOCK_CNT, etimeUnit - stimeUnit); - stimeUnit = taosGetTimestampMs(); - } - - SMeterDataBlockInfoEx *pInfoEx = &pDataBlockInfoEx[j]; - SMeterDataInfo * pOneMeterDataInfo = pInfoEx->pMeterDataInfo; - SMeterQueryInfo * pMeterQueryInfo = pOneMeterDataInfo->pMeterQInfo; - SMeterObj * pMeterObj = pOneMeterDataInfo->pMeterObj; - - pQInfo->pObj = pMeterObj; - pRuntimeEnv->pMeterObj = pMeterObj; - - restoreIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - - if ((pQuery->lastKey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->lastKey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) { - qTrace("QInfo:%p vid:%d sid:%d id:%s, query completed, no need to scan this data block. qrange:%" PRId64 - "-%" PRId64 ", lastKey:%" PRId64, - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, - pQuery->lastKey); - - continue; - } - - SCompBlock *pBlock = pInfoEx->pBlock.compBlock; - bool ondemandLoad = onDemandLoadDatablock(pQuery, pMeterQueryInfo->queryRangeSet); - ret = LoadDatablockOnDemand(pBlock, &pInfoEx->pBlock.fields, &pRuntimeEnv->blockStatus, pRuntimeEnv, fileIdx, - pInfoEx->blockIndex, searchFn, ondemandLoad); - if (ret != DISK_DATA_LOADED) { - pSummary->skippedFileBlocks++; - continue; - } - - SBlockInfo binfo = getBlockBasicInfo(pRuntimeEnv, pBlock, BLK_FILE_BLOCK); - int64_t nextKey = -1; - - assert(pQuery->pos >= 0 && pQuery->pos < pBlock->numOfPoints); - TSKEY *primaryKeys = (TSKEY *)pRuntimeEnv->primaryColBuffer->data; - - if (IS_DATA_BLOCK_LOADED(pRuntimeEnv->blockStatus) && needPrimaryTimestampCol(pQuery, &binfo)) { - nextKey = primaryKeys[pQuery->pos]; - - if (!doCheckWithPrevQueryRange(pQuery, nextKey)) { - qTrace("QInfo:%p vid:%d sid:%d id:%s, no data qualified in data file, lastKey:%" PRId64, pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->numOfBlocks, pQuery->lastKey); - continue; - } - } else { - // if data block is not loaded, it must be the intermediate blocks - assert((pBlock->keyFirst >= pQuery->lastKey && pBlock->keyLast <= pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) || - (pBlock->keyFirst >= pQuery->ekey && pBlock->keyLast <= pQuery->lastKey && !QUERY_IS_ASC_QUERY(pQuery))); - nextKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlock->keyFirst : pBlock->keyLast; - } - - if (pQuery->intervalTime == 0) { - setExecutionContext(pSupporter, pMeterQueryInfo, pOneMeterDataInfo->meterOrderIdx, pOneMeterDataInfo->groupIdx, - nextKey); - } else { // interval query - setIntervalQueryRange(pMeterQueryInfo, pSupporter, nextKey); - ret = setAdditionalInfo(pSupporter, pOneMeterDataInfo->meterOrderIdx, pMeterQueryInfo); - if (ret != TSDB_CODE_SUCCESS) { - tfree(pReqMeterDataInfo); // error code has been set - pQInfo->killed = 1; - return; - } - } - - stableApplyFunctionsOnBlock(pSupporter, pOneMeterDataInfo, &binfo, pInfoEx->pBlock.fields, searchFn); - } - - tfree(pReqMeterDataInfo); - - // try next file - fid += step; - } - - int64_t time = taosGetTimestampUs() - st; - dTrace("QInfo:%p complete check %d files, %d blocks, elapsed time:%.3fms", pQInfo, pVnodeFileInfo->numOfFiles, - totalBlocks, time / 1000.0); - - pSummary->fileTimeUs += time; - pSummary->readDiskBlocks += totalBlocks; - pSummary->numOfTables = pSupporter->pSidSet->numOfSids; - - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); - freeMeterBlockInfoEx(pDataBlockInfoEx, nAllocBlocksInfoSize); -} - -static bool multimeterMultioutputHelper(SQInfo *pQInfo, bool *dataInDisk, bool *dataInCache, int32_t index, - int32_t start) { - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - SMeterSidExtInfo **pMeterSidExtInfo = pSupporter->pMeterSidExtInfo; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = &pQInfo->query; - - setQueryStatus(pQuery, QUERY_NOT_COMPLETED); - - SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[index]->sid); - if (pMeterObj == NULL) { - dError("QInfo:%p do not find required meter id: %d, all meterObjs id is:", pQInfo, pMeterSidExtInfo[index]->sid); - return false; - } - - vnodeSetTagValueInParam(pSupporter->pSidSet, pRuntimeEnv, pMeterSidExtInfo[index]); - - dTrace("QInfo:%p query on (%d): vid:%d sid:%d meterId:%s, qrange:%" PRId64 "-%" PRId64, pQInfo, index - start, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey); - - pQInfo->pObj = pMeterObj; - pQuery->lastKey = pQuery->skey; - pRuntimeEnv->pMeterObj = pMeterObj; - - vnodeUpdateQueryColumnIndex(pQuery, pRuntimeEnv->pMeterObj); - vnodeUpdateFilterColumnIndex(pQuery); - - vnodeCheckIfDataExists(pRuntimeEnv, pMeterObj, dataInDisk, dataInCache); - - // data in file or cache is not qualified for the query. abort - if (!(dataInCache || dataInDisk)) { - dTrace("QInfo:%p vid:%d sid:%d meterId:%s, qrange:%" PRId64 "-%" PRId64 ", nores, %p", pQInfo, pMeterObj->vnode, - pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, pQuery); - return false; - } - - if (pRuntimeEnv->pTSBuf != NULL) { - if (pRuntimeEnv->cur.vnodeIndex == -1) { - int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key; - STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag); - - // failed to find data with the specified tag value - if (elem.vnode < 0) { - return false; - } - } else { - tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur); - } - } - - initCtxOutputBuf(pRuntimeEnv); - return true; -} - -static int64_t doCheckMetersInGroup(SQInfo *pQInfo, int32_t index, int32_t start) { - SQuery * pQuery = &pQInfo->query; - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - - bool dataInDisk = true; - bool dataInCache = true; - if (!multimeterMultioutputHelper(pQInfo, &dataInDisk, &dataInCache, index, start)) { - return 0; - } - -#if DEFAULT_IO_ENGINE == IO_ENGINE_MMAP - for (int32_t i = 0; i < pRuntimeEnv->numOfFiles; ++i) { - resetMMapWindow(&pRuntimeEnv->pVnodeFiles[i]); - } -#endif - - SPointInterpoSupporter pointInterpSupporter = {0}; - pointInterpSupporterInit(pQuery, &pointInterpSupporter); - - if (!normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &pointInterpSupporter, NULL)) { - pointInterpSupporterDestroy(&pointInterpSupporter); - return 0; - } - - /* - * here we set the value for before and after the specified time into the - * parameter for interpolation query - */ - pointInterpSupporterSetData(pQInfo, &pointInterpSupporter); - pointInterpSupporterDestroy(&pointInterpSupporter); - - vnodeScanAllData(pRuntimeEnv); - - // first/last_row query, do not invoke the finalize for super table query - doFinalizeResult(pRuntimeEnv); - - int64_t numOfRes = getNumOfResult(pRuntimeEnv); - assert(numOfRes == 1 || numOfRes == 0); - - // accumulate the point interpolation result - if (numOfRes > 0) { - pQuery->pointsRead += numOfRes; - forwardCtxOutputBuf(pRuntimeEnv, numOfRes); - } - - return numOfRes; -} - -/** - * super table query handler - * 1. super table projection query, group-by on normal columns query, ts-comp query - * 2. point interpolation query, last row query - * - * @param pQInfo - */ -static void vnodeSTableSeqProcessor(SQInfo *pQInfo) { - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - SMeterSidExtInfo **pMeterSidExtInfo = pSupporter->pMeterSidExtInfo; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - - SQuery * pQuery = &pQInfo->query; - tSidSet *pSids = pSupporter->pSidSet; - - int32_t vid = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[0]->sid)->vnode; - - if (isPointInterpoQuery(pQuery)) { - resetCtxOutputBuf(pRuntimeEnv); - - assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0); - - while (pSupporter->subgroupIdx < pSids->numOfSubSet) { - int32_t start = pSids->starterPos[pSupporter->subgroupIdx]; - int32_t end = pSids->starterPos[pSupporter->subgroupIdx + 1] - 1; - - if (isFirstLastRowQuery(pQuery)) { - dTrace("QInfo:%p last_row query on vid:%d, numOfGroups:%d, current group:%d", pQInfo, vid, pSids->numOfSubSet, - pSupporter->subgroupIdx); - - TSKEY key = -1; - int32_t index = -1; - - // choose the last key for one group - pSupporter->meterIdx = start; - - for (int32_t k = start; k <= end; ++k, pSupporter->meterIdx++) { - if (isQueryKilled(pQInfo)) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return; - } - - // get the last key of meters that belongs to this group - SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[k]->sid); - if (pMeterObj != NULL) { - if (key < pMeterObj->lastKey) { - key = pMeterObj->lastKey; - index = k; - } - } - } - - pQuery->skey = key; - pQuery->ekey = key; - pSupporter->rawSKey = key; - pSupporter->rawEKey = key; - - int64_t num = doCheckMetersInGroup(pQInfo, index, start); - assert(num >= 0); - } else { - dTrace("QInfo:%p interp query on vid:%d, numOfGroups:%d, current group:%d", pQInfo, vid, pSids->numOfSubSet, - pSupporter->subgroupIdx); - - for (int32_t k = start; k <= end; ++k) { - if (isQueryKilled(pQInfo)) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return; - } - - pQuery->skey = pSupporter->rawSKey; - pQuery->ekey = pSupporter->rawEKey; - - int64_t num = doCheckMetersInGroup(pQInfo, k, start); - if (num == 1) { - break; - } - } - } - - pSupporter->subgroupIdx++; - - // output buffer is full, return to client - if (pQuery->pointsRead >= pQuery->pointsToRead) { - break; - } - } - } else { - /* - * 1. super table projection query, 2. group-by on normal columns query, 3. ts-comp query - */ - assert(pSupporter->meterIdx >= 0); - - /* - * if the subgroup index is larger than 0, results generated by group by tbname,k is existed. - * we need to return it to client in the first place. - */ - if (pSupporter->subgroupIdx > 0) { - copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); - pQInfo->pointsRead += pQuery->pointsRead; - - if (pQuery->pointsRead > 0) { - return; - } - } - - if (pSupporter->meterIdx >= pSids->numOfSids) { - return; - } - - resetCtxOutputBuf(pRuntimeEnv); - resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo); - - while (pSupporter->meterIdx < pSupporter->numOfMeters) { - int32_t k = pSupporter->meterIdx; - - if (isQueryKilled(pQInfo)) { - setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); - return; - } - - TSKEY skey = pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key; - if (skey > 0) { - pQuery->skey = skey; - } - - bool dataInDisk = true; - bool dataInCache = true; - if (!multimeterMultioutputHelper(pQInfo, &dataInDisk, &dataInCache, k, 0)) { - pQuery->skey = pSupporter->rawSKey; - pQuery->ekey = pSupporter->rawEKey; - - pSupporter->meterIdx++; - continue; - } - -#if DEFAULT_IO_ENGINE == IO_ENGINE_MMAP - for (int32_t i = 0; i < pRuntimeEnv->numOfFiles; ++i) { - resetMMapWindow(&pRuntimeEnv->pVnodeFiles[i]); - } -#endif - - SPointInterpoSupporter pointInterpSupporter = {0}; - if (normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &pointInterpSupporter, NULL) == false) { - pQuery->skey = pSupporter->rawSKey; - pQuery->ekey = pSupporter->rawEKey; - - pSupporter->meterIdx++; - continue; - } - - // TODO handle the limit problem - if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) { - forwardQueryStartPosition(pRuntimeEnv); - - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { - pQuery->skey = pSupporter->rawSKey; - pQuery->ekey = pSupporter->rawEKey; - - pSupporter->meterIdx++; - continue; - } - } - - vnodeScanAllData(pRuntimeEnv); - - pQuery->pointsRead = getNumOfResult(pRuntimeEnv); - doSkipResults(pRuntimeEnv); - - // the limitation of output result is reached, set the query completed - if (doRevisedResultsByLimit(pQInfo)) { - pSupporter->meterIdx = pSupporter->pSidSet->numOfSids; - break; - } - - // enable execution for next table, when handling the projection query - enableExecutionForNextTable(pRuntimeEnv); - - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { - /* - * query range is identical in terms of all meters involved in query, - * so we need to restore them at the *beginning* of query on each meter, - * not the consecutive query on meter on which is aborted due to buffer limitation - * to ensure that, we can reset the query range once query on a meter is completed. - */ - pQuery->skey = pSupporter->rawSKey; - pQuery->ekey = pSupporter->rawEKey; - pSupporter->meterIdx++; - - pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key = pQuery->lastKey; - - // if the buffer is full or group by each table, we need to jump out of the loop - if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL) || - isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)) { - break; - } - - } else { // forward query range - pQuery->skey = pQuery->lastKey; - - // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter - if (pQuery->pointsRead == 0) { - assert(!Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)); - continue; - } else { - pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key = pQuery->lastKey; - // buffer is full, wait for the next round to retrieve data from current meter - assert(Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)); - break; - } - } - } - } - - /* - * 1. super table projection query, group-by on normal columns query, ts-comp query - * 2. point interpolation query, last row query - * - * group-by on normal columns query and last_row query do NOT invoke the finalizer here, - * since the finalize stage will be done at the client side. - * - * projection query, point interpolation query do not need the finalizer. - * - * Only the ts-comp query requires the finalizer function to be executed here. - */ - if (isTSCompQuery(pQuery)) { - doFinalizeResult(pRuntimeEnv); - } - - if (pRuntimeEnv->pTSBuf != NULL) { - pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur; - } - - // todo refactor - if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status; - pStatus->closed = true; // enable return all results for group by normal columns - - SWindowResult *pResult = &pWindowResInfo->pResult[i]; - for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { - pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes); - } - } - - pQInfo->pTableQuerySupporter->subgroupIdx = 0; - pQuery->pointsRead = 0; - copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult); - } - - pQInfo->pointsRead += pQuery->pointsRead; - pQuery->pointsOffset = pQuery->pointsToRead; - - dTrace( - "QInfo %p vid:%d, numOfMeters:%d, index:%d, numOfGroups:%d, %d points returned, totalRead:%d totalReturn:%d," - "next skey:%" PRId64 ", offset:%" PRId64, - pQInfo, vid, pSids->numOfSids, pSupporter->meterIdx, pSids->numOfSubSet, pQuery->pointsRead, pQInfo->pointsRead, - pQInfo->pointsReturned, pQuery->skey, pQuery->limit.offset); -} - -static void doOrderedScan(SQInfo *pQInfo) { - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQuery * pQuery = &pQInfo->query; - - if (QUERY_IS_ASC_QUERY(pQuery)) { - queryOnMultiDataFiles(pQInfo, pSupporter->pMeterDataInfo); - if (pQInfo->code != TSDB_CODE_SUCCESS) { - return; - } - - queryOnMultiDataCache(pQInfo, pSupporter->pMeterDataInfo); - } else { - queryOnMultiDataCache(pQInfo, pSupporter->pMeterDataInfo); - if (pQInfo->code != TSDB_CODE_SUCCESS) { - return; - } - - queryOnMultiDataFiles(pQInfo, pSupporter->pMeterDataInfo); - } -} - -static void setupMeterQueryInfoForSupplementQuery(STableQuerySupportObj *pSupporter) { - SQuery *pQuery = pSupporter->runtimeEnv.pQuery; - - for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) { - SMeterQueryInfo *pMeterQueryInfo = pSupporter->pMeterDataInfo[i].pMeterQInfo; - changeMeterQueryInfoForSuppleQuery(pQuery, pMeterQueryInfo, pSupporter->rawSKey, pSupporter->rawEKey); - } -} - -static void doMultiMeterSupplementaryScan(SQInfo *pQInfo) { - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = &pQInfo->query; - - if (!needSupplementaryScan(pQuery)) { - dTrace("QInfo:%p no need to do supplementary scan, query completed", pQInfo); - return; - } - - SET_SUPPLEMENT_SCAN_FLAG(pRuntimeEnv); - disableFunctForSuppleScan(pSupporter, pQuery->order.order); - - if (pRuntimeEnv->pTSBuf != NULL) { - pRuntimeEnv->pTSBuf->cur.order = pRuntimeEnv->pTSBuf->cur.order ^ 1u; - } - - SWAP(pSupporter->rawSKey, pSupporter->rawEKey, TSKEY); - setupMeterQueryInfoForSupplementQuery(pSupporter); - - int64_t st = taosGetTimestampMs(); - - doOrderedScan(pQInfo); - - int64_t et = taosGetTimestampMs(); - dTrace("QInfo:%p supplementary scan completed, elapsed time: %lldms", pQInfo, et - st); - - /* - * restore the env - * the meter query info is not reset to the original state - */ - SWAP(pSupporter->rawSKey, pSupporter->rawEKey, TSKEY); - enableFunctForMasterScan(pRuntimeEnv, pQuery->order.order); - - if (pRuntimeEnv->pTSBuf != NULL) { - pRuntimeEnv->pTSBuf->cur.order = pRuntimeEnv->pTSBuf->cur.order ^ 1; - } - - SET_MASTER_SCAN_FLAG(pRuntimeEnv); -} - -static void vnodeMultiMeterQueryProcessor(SQInfo *pQInfo) { - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = &pQInfo->query; - - if (pSupporter->subgroupIdx > 0) { - /* - * if the subgroupIdx > 0, the query process must be completed yet, we only need to - * copy the data into output buffer - */ - if (pQuery->intervalTime > 0) { - copyResToQueryResultBuf(pSupporter, pQuery); - -#ifdef _DEBUG_VIEW - displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->len); -#endif - } else { - copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); - } - - pQInfo->pointsRead += pQuery->pointsRead; - - if (pQuery->pointsRead == 0) { - vnodePrintQueryStatistics(pSupporter); - } - - dTrace("QInfo:%p points returned:%d, totalRead:%d totalReturn:%d", pQInfo, pQuery->pointsRead, pQInfo->pointsRead, - pQInfo->pointsReturned); - return; - } - - pSupporter->pMeterDataInfo = (SMeterDataInfo *)calloc(1, sizeof(SMeterDataInfo) * pSupporter->numOfMeters); - if (pSupporter->pMeterDataInfo == NULL) { - dError("QInfo:%p failed to allocate memory, %s", pQInfo, strerror(errno)); - pQInfo->code = -TSDB_CODE_SERV_OUT_OF_MEMORY; - return; - } - - dTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, group:%d", pQInfo, pSupporter->rawSKey, - pSupporter->rawEKey, pQuery->order.order, pSupporter->pSidSet->numOfSubSet); - - dTrace("QInfo:%p main query scan start", pQInfo); - int64_t st = taosGetTimestampMs(); - doOrderedScan(pQInfo); - int64_t et = taosGetTimestampMs(); - dTrace("QInfo:%p main scan completed, elapsed time: %lldms, supplementary scan start, order:%d", pQInfo, et - st, - pQuery->order.order ^ 1u); - - if (pQuery->intervalTime > 0) { - for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) { - SMeterQueryInfo *pMeterQueryInfo = pSupporter->pMeterDataInfo[i].pMeterQInfo; - closeAllTimeWindow(&pMeterQueryInfo->windowResInfo); - } - } else { // close results for group result - closeAllTimeWindow(&pRuntimeEnv->windowResInfo); - } - - doMultiMeterSupplementaryScan(pQInfo); - - if (isQueryKilled(pQInfo)) { - dTrace("QInfo:%p query killed, abort", pQInfo); - return; - } - - if (pQuery->intervalTime > 0 || isSumAvgRateQuery(pQuery)) { - assert(pSupporter->subgroupIdx == 0 && pSupporter->numOfGroupResultPages == 0); - - if (mergeMetersResultToOneGroups(pSupporter) == TSDB_CODE_SUCCESS) { - copyResToQueryResultBuf(pSupporter, pQuery); - -#ifdef _DEBUG_VIEW - displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->len); -#endif - } - } else { // not a interval query - copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); - } - - // handle the limitation of output buffer - pQInfo->pointsRead += pQuery->pointsRead; - dTrace("QInfo:%p points returned:%d, totalRead:%d totalReturn:%d", pQInfo, pQuery->pointsRead, pQInfo->pointsRead, - pQInfo->pointsReturned); -} - -/* - * in each query, this function will be called only once, no retry for further result. - * - * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a]; - * select count(*) from table_name group by status_column; - */ -static void vnodeSingleTableFixedOutputProcessor(SQInfo *pQInfo) { - SQuery * pQuery = &pQInfo->query; - SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->pTableQuerySupporter->runtimeEnv; - - assert(pQuery->slot >= 0 && pQuery->pos >= 0); - - vnodeScanAllData(pRuntimeEnv); - doFinalizeResult(pRuntimeEnv); - - if (isQueryKilled(pQInfo)) { - return; - } - - // since the numOfOutputElems must be identical for all sql functions that are allowed to be executed simutanelously. - pQuery->pointsRead = getNumOfResult(pRuntimeEnv); - assert(pQuery->pointsRead <= pQuery->pointsToRead && - Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)); - - // must be top/bottom query if offset > 0 - if (pQuery->limit.offset > 0) { - assert(isTopBottomQuery(pQuery)); - } - - doSkipResults(pRuntimeEnv); - doRevisedResultsByLimit(pQInfo); - - pQInfo->pointsRead = pQuery->pointsRead; -} - -static void vnodeSingleTableMultiOutputProcessor(SQInfo *pQInfo) { - SQuery * pQuery = &pQInfo->query; - SMeterObj *pMeterObj = pQInfo->pObj; - - SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->pTableQuerySupporter->runtimeEnv; - - // for ts_comp query, re-initialized is not allowed - if (!isTSCompQuery(pQuery)) { - resetCtxOutputBuf(pRuntimeEnv); - } - - while (1) { - vnodeScanAllData(pRuntimeEnv); - doFinalizeResult(pRuntimeEnv); - - if (isQueryKilled(pQInfo)) { - return; - } - - pQuery->pointsRead = getNumOfResult(pRuntimeEnv); - if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->pointsRead > 0) { - doSkipResults(pRuntimeEnv); - } - - /* - * 1. if pQuery->pointsRead == 0, pQuery->limit.offset >= 0, still need to check data - * 2. if pQuery->pointsRead > 0, pQuery->limit.offset must be 0 - */ - if (pQuery->pointsRead > 0 || Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) { - break; - } - - TSKEY nextTimestamp = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos); - assert(nextTimestamp > 0 || ((nextTimestamp < 0) && Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK))); - - dTrace("QInfo:%p vid:%d sid:%d id:%s, skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64, - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->limit.offset, pQuery->lastKey, - pQuery->ekey); - - resetCtxOutputBuf(pRuntimeEnv); - } - - doRevisedResultsByLimit(pQInfo); - pQInfo->pointsRead += pQuery->pointsRead; - - if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) { - TSKEY nextTimestamp = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos); - assert(nextTimestamp > 0 || ((nextTimestamp < 0) && Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK))); - - dTrace("QInfo:%p vid:%d sid:%d id:%s, query abort due to buffer limitation, next qrange:%" PRId64 "-%" PRId64, - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->lastKey, pQuery->ekey); - } - - dTrace("QInfo:%p vid:%d sid:%d id:%s, %d points returned, totalRead:%d totalReturn:%d", pQInfo, pMeterObj->vnode, - pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, pQInfo->pointsRead, pQInfo->pointsReturned); - - pQuery->pointsOffset = pQuery->pointsToRead; // restore the available buffer - if (!isTSCompQuery(pQuery)) { - assert(pQuery->pointsRead <= pQuery->pointsToRead); - } -} - -static void vnodeSingleMeterIntervalMainLooper(STableQuerySupportObj *pSupporter, SQueryRuntimeEnv *pRuntimeEnv) { - SQuery *pQuery = pRuntimeEnv->pQuery; - - while (1) { - initCtxOutputBuf(pRuntimeEnv); - vnodeScanAllData(pRuntimeEnv); - - if (isQueryKilled(pQInfo)) { - return; - } - - assert(!Q_STATUS_EQUAL(pQuery->over, QUERY_NOT_COMPLETED)); - doFinalizeResult(pRuntimeEnv); - - // here we can ignore the records in case of no interpolation - // todo handle offset, in case of top/bottom interval query - if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 && - pQuery->interpoType == TSDB_INTERPO_NONE) { - // maxOutput <= 0, means current query does not generate any results - int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo); - - int32_t c = MIN(numOfClosed, pQuery->limit.offset); - clearFirstNTimeWindow(pRuntimeEnv, c); - pQuery->limit.offset -= c; - } - - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { - break; - } - - // load the data block for the next retrieve - loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos); - if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) { - break; - } - } -} - -/* handle time interval query on single table */ -static void vnodeSingleTableIntervalProcessor(SQInfo *pQInfo) { - SQuery * pQuery = &(pQInfo->query); - SMeterObj *pMeterObj = pQInfo->pObj; - - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - - int32_t numOfInterpo = 0; - - while (1) { - resetCtxOutputBuf(pRuntimeEnv); - vnodeSingleMeterIntervalMainLooper(pSupporter, pRuntimeEnv); - - if (pQuery->intervalTime > 0) { - pSupporter->subgroupIdx = 0; // always start from 0 - pQuery->pointsRead = 0; - copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); - - clearFirstNTimeWindow(pRuntimeEnv, pSupporter->subgroupIdx); - } - - // the offset is handled at prepare stage if no interpolation involved - if (pQuery->interpoType == TSDB_INTERPO_NONE) { - doRevisedResultsByLimit(pQInfo); - break; - } else { - taosInterpoSetStartInfo(&pRuntimeEnv->interpoInfo, pQuery->pointsRead, pQuery->interpoType); - SData **pInterpoBuf = pRuntimeEnv->pInterpoBuf; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - memcpy(pInterpoBuf[i]->data, pQuery->sdata[i]->data, pQuery->pointsRead * pQuery->pSelectExpr[i].resBytes); - } - - numOfInterpo = 0; - pQuery->pointsRead = vnodeQueryResultInterpolate(pQInfo, (tFilePage **)pQuery->sdata, (tFilePage **)pInterpoBuf, - pQuery->pointsRead, &numOfInterpo); - - dTrace("QInfo: %p interpo completed, final:%d", pQInfo, pQuery->pointsRead); - if (pQuery->pointsRead > 0 || Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) { - doRevisedResultsByLimit(pQInfo); - break; - } - - // no result generated yet, continue retrieve data - pQuery->pointsRead = 0; - } - } - - // all data scanned, the group by normal column can return - if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {//todo refactor with merge interval time result - pSupporter->subgroupIdx = 0; - pQuery->pointsRead = 0; - copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); - clearFirstNTimeWindow(pRuntimeEnv, pSupporter->subgroupIdx); - } - - pQInfo->pointsRead += pQuery->pointsRead; - pQInfo->pointsInterpo += numOfInterpo; - - dTrace("%p vid:%d sid:%d id:%s, %d points returned %d points interpo, totalRead:%d totalInterpo:%d totalReturn:%d", - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, numOfInterpo, - pQInfo->pointsRead - pQInfo->pointsInterpo, pQInfo->pointsInterpo, pQInfo->pointsReturned); -} - -void vnodeSingleTableQuery(SSchedMsg *pMsg) { - SQInfo *pQInfo = (SQInfo *)pMsg->ahandle; - - if (pQInfo == NULL || pQInfo->pTableQuerySupporter == NULL) { - dTrace("%p freed abort query", pQInfo); - return; - } - - if (pQInfo->killed) { - dTrace("QInfo:%p it is already killed, abort", pQInfo); - vnodeDecRefCount(pQInfo); - - return; - } - - assert(pQInfo->refCount >= 1); - - SQuery * pQuery = &pQInfo->query; - SMeterObj * pMeterObj = pQInfo->pObj; - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - - assert(pRuntimeEnv->pMeterObj == pMeterObj); - - dTrace("vid:%d sid:%d id:%s, query thread is created, numOfQueries:%d, QInfo:%p", pMeterObj->vnode, pMeterObj->sid, - pMeterObj->meterId, pMeterObj->numOfQueries, pQInfo); - - if (vnodeHasRemainResults(pQInfo)) { - /* - * There are remain results that are not returned due to result interpolation - * So, we do keep in this procedure instead of launching retrieve procedure for next results. - */ - int32_t numOfInterpo = 0; - - int32_t remain = taosNumOfRemainPoints(&pRuntimeEnv->interpoInfo); - pQuery->pointsRead = vnodeQueryResultInterpolate(pQInfo, (tFilePage **)pQuery->sdata, - (tFilePage **)pRuntimeEnv->pInterpoBuf, remain, &numOfInterpo); - - doRevisedResultsByLimit(pQInfo); - - pQInfo->pointsInterpo += numOfInterpo; - pQInfo->pointsRead += pQuery->pointsRead; - - dTrace( - "QInfo:%p vid:%d sid:%d id:%s, %d points returned %d points interpo, totalRead:%d totalInterpo:%d " - "totalReturn:%d", - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, numOfInterpo, - pQInfo->pointsRead, pQInfo->pointsInterpo, pQInfo->pointsReturned); - - sem_post(&pQInfo->dataReady); - vnodeDecRefCount(pQInfo); - - return; - } - - // here we have scan all qualified data in both data file and cache - if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { - // continue to get push data from the group result - if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || - (pQuery->intervalTime > 0 && pQInfo->pointsReturned < pQuery->limit.limit)) { - //todo limit the output for interval query? - pQuery->pointsRead = 0; - pSupporter->subgroupIdx = 0; // always start from 0 - - if (pRuntimeEnv->windowResInfo.size > 0) { - copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); - pQInfo->pointsRead += pQuery->pointsRead; - - clearFirstNTimeWindow(pRuntimeEnv, pSupporter->subgroupIdx); - - if (pQuery->pointsRead > 0) { - dTrace("QInfo:%p vid:%d sid:%d id:%s, %d points returned %d from group results, totalRead:%d totalReturn:%d", - pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, pQInfo->pointsRead, - pQInfo->pointsInterpo, pQInfo->pointsReturned); - - sem_post(&pQInfo->dataReady); - vnodeDecRefCount(pQInfo); - - return; - } - } - } - - pQInfo->over = 1; - dTrace("QInfo:%p vid:%d sid:%d id:%s, query over, %d points are returned", pQInfo, pMeterObj->vnode, pMeterObj->sid, - pMeterObj->meterId, pQInfo->pointsRead); - - vnodePrintQueryStatistics(pSupporter); - sem_post(&pQInfo->dataReady); - - vnodeDecRefCount(pQInfo); - return; - } - - /* number of points returned during this query */ - pQuery->pointsRead = 0; - assert(pQuery->pos >= 0 && pQuery->slot >= 0); - - int64_t st = taosGetTimestampUs(); - - // group by normal column, sliding window query, interval query are handled by interval query processor - if (pQuery->intervalTime != 0 || isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // interval (down sampling operation) - assert(pQuery->checkBufferInLoop == 0 && pQuery->pointsOffset == pQuery->pointsToRead); - vnodeSingleTableIntervalProcessor(pQInfo); - } else { - if (isFixedOutputQuery(pQuery)) { - assert(pQuery->checkBufferInLoop == 0); - vnodeSingleTableFixedOutputProcessor(pQInfo); - } else { // diff/add/multiply/subtract/division - assert(pQuery->checkBufferInLoop == 1); - vnodeSingleTableMultiOutputProcessor(pQInfo); - } - } - - // record the total elapsed time - pQInfo->useconds += (taosGetTimestampUs() - st); - - /* check if query is killed or not */ - if (isQueryKilled(pQInfo)) { - dTrace("QInfo:%p query is killed", pQInfo); - pQInfo->over = 1; - } else { - dTrace("QInfo:%p vid:%d sid:%d id:%s, meter query thread completed, %d points are returned", pQInfo, - pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead); - } - - sem_post(&pQInfo->dataReady); - vnodeDecRefCount(pQInfo); -} - -void vnodeMultiMeterQuery(SSchedMsg *pMsg) { - SQInfo *pQInfo = (SQInfo *)pMsg->ahandle; - - if (pQInfo == NULL || pQInfo->pTableQuerySupporter == NULL) { - return; - } - - if (pQInfo->killed) { - vnodeDecRefCount(pQInfo); - dTrace("QInfo:%p it is already killed, abort", pQInfo); - return; - } - - assert(pQInfo->refCount >= 1); - - SQuery *pQuery = &pQInfo->query; - pQuery->pointsRead = 0; - - int64_t st = taosGetTimestampUs(); - if (pQuery->intervalTime > 0 || - (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr))) { - assert(pQuery->checkBufferInLoop == 0); - vnodeMultiMeterQueryProcessor(pQInfo); - } else { - assert((pQuery->checkBufferInLoop == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) || - isGroupbyNormalCol(pQuery->pGroupbyExpr)); - - vnodeSTableSeqProcessor(pQInfo); - } - - /* record the total elapsed time */ - pQInfo->useconds += (taosGetTimestampUs() - st); - pQInfo->over = isQueryKilled(pQInfo) ? 1 : 0; - - taosInterpoSetStartInfo(&pQInfo->pTableQuerySupporter->runtimeEnv.interpoInfo, pQuery->pointsRead, - pQInfo->query.interpoType); - - STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter; - - if (pQuery->pointsRead == 0) { - pQInfo->over = 1; - dTrace("QInfo:%p over, %d meters queried, %d points are returned", pQInfo, pSupporter->numOfMeters, - pQInfo->pointsRead); - vnodePrintQueryStatistics(pSupporter); - } - - sem_post(&pQInfo->dataReady); - vnodeDecRefCount(pQInfo); -} diff --git a/src/vnode/detail/src/vnodeRead.c b/src/vnode/detail/src/vnodeRead.c deleted file mode 100644 index 86f508dd91..0000000000 --- a/src/vnode/detail/src/vnodeRead.c +++ /dev/null @@ -1,1153 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "hash.h" -#include "hashfunc.h" -#include "ihash.h" -#include "qast.h" -#include "qextbuffer.h" -#include "taosmsg.h" -#include "tscJoinProcess.h" -#include "tscompression.h" -#include "vnode.h" -#include "vnodeRead.h" -#include "vnodeUtil.h" - -int (*pQueryFunc[])(SMeterObj *, SQuery *) = {vnodeQueryFromCache, vnodeQueryFromFile}; - -int vnodeInterpolationSearchKey(char *pValue, int num, TSKEY key, int order) { - int firstPos, lastPos, midPos = -1; - int delta, numOfPoints; - TSKEY *keyList; - - keyList = (TSKEY *)pValue; - firstPos = 0; - lastPos = num - 1; - - if (order == 0) { - // from latest to oldest - while (1) { - if (key >= keyList[lastPos]) return lastPos; - if (key == keyList[firstPos]) return firstPos; - if (key < keyList[firstPos]) return firstPos - 1; - - numOfPoints = lastPos - firstPos + 1; - delta = keyList[lastPos] - keyList[firstPos]; - midPos = (key - keyList[firstPos]) / delta * numOfPoints + firstPos; - - if (key < keyList[midPos]) { - lastPos = midPos - 1; - } else if (key > keyList[midPos]) { - firstPos = midPos + 1; - } else { - break; - } - } - - } else { - // from oldest to latest - while (1) { - if (key <= keyList[firstPos]) return firstPos; - if (key == keyList[lastPos]) return lastPos; - - if (key > keyList[lastPos]) { - lastPos = lastPos + 1; - if (lastPos >= num) return -1; - } - - numOfPoints = lastPos - firstPos + 1; - delta = keyList[lastPos] - keyList[firstPos]; - midPos = (key - keyList[firstPos]) / delta * numOfPoints + firstPos; - - if (key < keyList[midPos]) { - lastPos = midPos - 1; - } else if (key > keyList[midPos]) { - firstPos = midPos + 1; - } else { - break; - } - } - } - - return midPos; -} - -int vnodeBinarySearchKey(char *pValue, int num, TSKEY key, int order) { - int firstPos, lastPos, midPos = -1; - int numOfPoints; - TSKEY *keyList; - - if (num <= 0) return -1; - - keyList = (TSKEY *)pValue; - firstPos = 0; - lastPos = num - 1; - - if (order == 0) { - // find the first position which is smaller than the key - while (1) { - if (key >= keyList[lastPos]) return lastPos; - if (key == keyList[firstPos]) return firstPos; - if (key < keyList[firstPos]) return firstPos - 1; - - numOfPoints = lastPos - firstPos + 1; - midPos = (numOfPoints >> 1) + firstPos; - - if (key < keyList[midPos]) { - lastPos = midPos - 1; - } else if (key > keyList[midPos]) { - firstPos = midPos + 1; - } else { - break; - } - } - - } else { - // find the first position which is bigger than the key - while (1) { - if (key <= keyList[firstPos]) return firstPos; - if (key == keyList[lastPos]) return lastPos; - - if (key > keyList[lastPos]) { - lastPos = lastPos + 1; - if (lastPos >= num) - return -1; - else - return lastPos; - } - - numOfPoints = lastPos - firstPos + 1; - midPos = (numOfPoints >> 1) + firstPos; - - if (key < keyList[midPos]) { - lastPos = midPos - 1; - } else if (key > keyList[midPos]) { - firstPos = midPos + 1; - } else { - break; - } - } - } - - return midPos; -} - -int (*vnodeSearchKeyFunc[])(char *pValue, int num, TSKEY key, int order) = {vnodeBinarySearchKey, - vnodeInterpolationSearchKey}; - -static SQInfo *vnodeAllocateQInfoCommon(SQueryMeterMsg *pQueryMsg, SMeterObj *pMeterObj, SSqlFunctionExpr *pExprs) { - SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo)); - if (pQInfo == NULL) { - return NULL; - } - - SQuery *pQuery = &(pQInfo->query); - - SColumnInfo *colList = pQueryMsg->colList; - - short numOfCols = pQueryMsg->numOfCols; - short numOfOutputCols = pQueryMsg->numOfOutputCols; - - pQuery->numOfCols = numOfCols; - pQuery->numOfOutputCols = numOfOutputCols; - - pQuery->limit.limit = pQueryMsg->limit; - pQuery->limit.offset = pQueryMsg->offset; - - pQuery->order.order = pQueryMsg->order; - pQuery->order.orderColId = pQueryMsg->orderColId; - - pQuery->colList = calloc(1, sizeof(SSingleColumnFilterInfo) * numOfCols); - if (pQuery->colList == NULL) { - goto _clean_memory; - } - - for (int16_t i = 0; i < numOfCols; ++i) { - pQuery->colList[i].req[0] = 1; // column required during mater scan of data blocks - pQuery->colList[i].colIdxInBuf = i; - - pQuery->colList[i].data = colList[i]; - SColumnInfo *pColInfo = &pQuery->colList[i].data; - - pColInfo->filters = NULL; - - if (colList[i].numOfFilters > 0) { - pColInfo->filters = calloc(1, colList[i].numOfFilters * sizeof(SColumnFilterInfo)); - - for (int32_t j = 0; j < colList[i].numOfFilters; ++j) { - tscColumnFilterInfoCopy(&pColInfo->filters[j], &colList[i].filters[j]); - } - } else { - pQuery->colList[i].data.filters = NULL; - } - } - - vnodeUpdateQueryColumnIndex(pQuery, pMeterObj); - - for (int16_t col = 0; col < numOfOutputCols; ++col) { - assert(pExprs[col].resBytes > 0); - - pQuery->rowSize += pExprs[col].resBytes; - if (TSDB_COL_IS_TAG(pExprs[col].pBase.colInfo.flag)) { - continue; - } - - int16_t colId = pExprs[col].pBase.colInfo.colId; - int16_t functId = pExprs[col].pBase.functionId; - - // build the projection of actual column data in buffer and the real column index - for (int32_t k = 0; k < numOfCols; ++k) { - if (pQuery->colList[k].data.colId == colId) { - pExprs[col].pBase.colInfo.colIdxInBuf = (int16_t)k; - pExprs[col].pBase.colInfo.colIdx = pQuery->colList[k].colIdx; - - if (((functId == TSDB_FUNC_FIRST_DST || functId == TSDB_FUNC_FIRST) && pQuery->order.order == TSQL_SO_DESC) || - ((functId == TSDB_FUNC_LAST_DST || functId == TSDB_FUNC_LAST) && pQuery->order.order == TSQL_SO_ASC)) { - pQuery->colList[k].req[1] = 1; - } else if (functId == TSDB_FUNC_STDDEV) { - pQuery->colList[k].req[1] = 1; - } - break; - } - } - } - - pQuery->pSelectExpr = pExprs; - - int32_t ret = vnodeCreateFilterInfo(pQInfo, pQuery); - if (ret != TSDB_CODE_SUCCESS) { - goto _clean_memory; - } - - vnodeUpdateFilterColumnIndex(pQuery); - pQuery->precision = vnodeList[pMeterObj->vnode].cfg.precision; - - return pQInfo; - -_clean_memory: - tfree(pQuery->pFilterInfo); - tfree(pQuery->colList); - tfree(pQInfo); - - return NULL; -} - -static SQInfo *vnodeAllocateQInfoEx(SQueryMeterMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pExprs, - SMeterObj *pMeterObj) { - SQInfo *pQInfo = vnodeAllocateQInfoCommon(pQueryMsg, pMeterObj, pExprs); - if (pQInfo == NULL) { - tfree(pExprs); - tfree(pGroupbyExpr); - - return NULL; - } - - SQuery *pQuery = &(pQInfo->query); - - /* pQuery->sdata is the results output buffer. */ - pQuery->sdata = (SData **)calloc(pQuery->numOfOutputCols, sizeof(SData *)); - if (pQuery->sdata == NULL) { - goto sign_clean_memory; - } - - pQuery->pGroupbyExpr = pGroupbyExpr; - pQuery->intervalTime = pQueryMsg->intervalTime; - pQuery->slidingTime = pQueryMsg->slidingTime; - pQuery->interpoType = pQueryMsg->interpoType; - pQuery->intervalTimeUnit = pQueryMsg->intervalTimeUnit; - - pQInfo->query.pointsToRead = vnodeList[pMeterObj->vnode].cfg.rowsInFileBlock; - - for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) { - assert(pExprs[col].interResBytes >= pExprs[col].resBytes); - - // allocate additional memory for interResults that are usually larger then final results - size_t size = (pQInfo->query.pointsToRead + 1) * pExprs[col].resBytes + pExprs[col].interResBytes + sizeof(SData); - pQuery->sdata[col] = (SData *)calloc(1, size); - if (pQuery->sdata[col] == NULL) { - goto sign_clean_memory; - } - } - - if (pQuery->interpoType != TSDB_INTERPO_NONE) { - pQuery->defaultVal = malloc(sizeof(int64_t) * pQuery->numOfOutputCols); - if (pQuery->defaultVal == NULL) { - goto sign_clean_memory; - } - - // the first column is the timestamp - memcpy(pQuery->defaultVal, (char *)pQueryMsg->defaultVal, pQuery->numOfOutputCols * sizeof(int64_t)); - } - - // to make sure third party won't overwrite this structure - pQInfo->signature = (uint64_t)pQInfo; - pQInfo->pObj = pMeterObj; - pQuery->slot = -1; - pQuery->pos = -1; - pQuery->hfd = -1; - pQuery->dfd = -1; - pQuery->lfd = -1; - - dTrace("vid:%d sid:%d meterId:%s, QInfo is allocated:%p", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, - pQInfo); - - return pQInfo; - -sign_clean_memory: - tfree(pQuery->defaultVal); - - if (pQuery->sdata != NULL) { - for (int16_t col = 0; col < pQuery->numOfOutputCols; ++col) { - tfree(pQuery->sdata[col]); - } - } - - tfree(pQuery->sdata); - tfree(pQuery->pFilterInfo); - tfree(pQuery->colList); - - tfree(pExprs); - tfree(pGroupbyExpr); - - tfree(pQInfo); - - return NULL; -} - -SQInfo *vnodeAllocateQInfo(SQueryMeterMsg *pQueryMsg, SMeterObj *pObj, SSqlFunctionExpr *pExprs) { - SQInfo *pQInfo = vnodeAllocateQInfoCommon(pQueryMsg, pObj, pExprs); - if (pQInfo == NULL) { - tfree(pExprs); - return NULL; - } - - SQuery *pQuery = &(pQInfo->query); - - pQuery->sdata = (SData **)calloc(1, sizeof(SData *) * pQuery->numOfOutputCols); - if (pQuery->sdata == NULL) { - goto __clean_memory; - } - - size_t size = 0; - int32_t numOfRows = vnodeList[pObj->vnode].cfg.rowsInFileBlock; - for (int col = 0; col < pQuery->numOfOutputCols; ++col) { - size = 2 * (numOfRows * pQuery->pSelectExpr[col].resBytes + sizeof(SData)); - pQuery->sdata[col] = (SData *)malloc(size); - if (pQuery->sdata[col] == NULL) { - goto __clean_memory; - } - } - - if (pQuery->colList[0].data.colId != PRIMARYKEY_TIMESTAMP_COL_INDEX) { - size = 2 * (numOfRows * TSDB_KEYSIZE + sizeof(SData)); - pQuery->tsData = (SData *)malloc(size); - if (pQuery->tsData == NULL) { - goto __clean_memory; - } - } - - // to make sure third party won't overwrite this structure - pQInfo->signature = (uint64_t)pQInfo; - pQInfo->pObj = pObj; - pQuery->slot = -1; - pQuery->hfd = -1; - pQuery->dfd = -1; - pQuery->lfd = -1; - pQuery->pos = -1; - pQuery->interpoType = TSDB_INTERPO_NONE; - - dTrace("vid:%d sid:%d meterId:%s, QInfo is allocated:%p", pObj->vnode, pObj->sid, pObj->meterId, pQInfo); - return pQInfo; - -__clean_memory: - - tfree(pQuery->tsData); - if (pQuery->sdata != NULL) { - for (int col = 0; col < pQuery->numOfOutputCols; ++col) { - tfree(pQuery->sdata[col]); - } - } - tfree(pQuery->sdata); - tfree(pQuery->pFilterInfo); - tfree(pQuery->colList); - - tfree(pExprs); - - tfree(pQInfo); - - return NULL; -} - -void vnodeFreeQInfoInQueue(void *param) { - SQInfo *pQInfo = (SQInfo *)param; - - if (!vnodeIsQInfoValid(pQInfo)) return; - - pQInfo->killed = 1; - dTrace("QInfo:%p set kill flag to free QInfo"); - - vnodeDecRefCount(pQInfo); -} - -void vnodeFreeQInfo(void *param, bool decQueryRef) { - SQInfo *pQInfo = (SQInfo *)param; - if (!vnodeIsQInfoValid(param)) return; - - pQInfo->killed = 1; - SMeterObj *pObj = pQInfo->pObj; - dTrace("QInfo:%p start to free SQInfo", pQInfo); - - if (decQueryRef) { - vnodeDecMeterRefcnt(pQInfo); - } - - SQuery *pQuery = &(pQInfo->query); - tclose(pQuery->hfd); - tclose(pQuery->dfd); - tclose(pQuery->lfd); - - vnodeFreeFields(pQuery); - - tfree(pQuery->pBlock); - - for (int col = 0; col < pQuery->numOfOutputCols; ++col) { - tfree(pQuery->sdata[col]); - } - - for (int col = 0; col < pQuery->numOfCols; ++col) { - vnodeFreeColumnInfo(&pQuery->colList[col].data); - } - - if (pQuery->colList[0].colIdx != PRIMARYKEY_TIMESTAMP_COL_INDEX) { - tfree(pQuery->tsData); - } - - sem_destroy(&(pQInfo->dataReady)); - vnodeQueryFreeQInfoEx(pQInfo); - - for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) { - SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i]; - if (pColFilter->numOfFilters > 0) { - tfree(pColFilter->pFilters); - } - } - - tfree(pQuery->pFilterInfo); - tfree(pQuery->colList); - tfree(pQuery->sdata); - - if (pQuery->pSelectExpr != NULL) { - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SSqlBinaryExprInfo *pBinExprInfo = &pQuery->pSelectExpr[i].pBinExprInfo; - - if (pBinExprInfo->numOfCols > 0) { - tfree(pBinExprInfo->pReqColumns); - tSQLBinaryExprDestroy(&pBinExprInfo->pBinExpr, NULL); - } - } - - tfree(pQuery->pSelectExpr); - } - - if (pQuery->defaultVal != NULL) { - tfree(pQuery->defaultVal); - } - - tfree(pQuery->pGroupbyExpr); - dTrace("QInfo:%p vid:%d sid:%d meterId:%s, QInfo is freed", pQInfo, pObj->vnode, pObj->sid, pObj->meterId); - - //destroy signature, in order to avoid the query process pass the object safety check - memset(pQInfo, 0, sizeof(SQInfo)); - tfree(pQInfo); -} - -bool vnodeIsQInfoValid(void *param) { - SQInfo *pQInfo = (SQInfo *)param; - if (pQInfo == NULL) { - return false; - } - - /* - * pQInfo->signature may be changed by another thread, so we assign value of signature - * into local variable, then compare by using local variable - */ - uint64_t sig = pQInfo->signature; - return (sig == (uint64_t)pQInfo); -} - -void vnodeDecRefCount(void *param) { - SQInfo *pQInfo = (SQInfo*) param; - - assert(vnodeIsQInfoValid(pQInfo)); - - int32_t ref = atomic_sub_fetch_32(&pQInfo->refCount, 1); - assert(ref >= 0); - - dTrace("QInfo:%p decrease obj refcount, %d", pQInfo, ref); - if (ref == 0) { - vnodeFreeQInfo(pQInfo, true); - } -} - -void vnodeAddRefCount(void *param) { - SQInfo *pQInfo = (SQInfo*) param; - - assert(vnodeIsQInfoValid(pQInfo)); - - int32_t ref = atomic_add_fetch_32(&pQInfo->refCount, 1); - dTrace("QInfo:%p add refcount, %d", pQInfo, ref); -} - -void vnodeQueryData(SSchedMsg *pMsg) { - SQuery *pQuery; - SQInfo *pQInfo; - - pQInfo = (SQInfo *)pMsg->ahandle; - - if (pQInfo->killed) { - dTrace("QInfo:%p it is already killed, abort", pQInfo); - vnodeDecRefCount(pQInfo); - return; - } - - pQuery = &(pQInfo->query); - - SMeterObj *pObj = pQInfo->pObj; - - dTrace("QInfo:%p vid:%d sid:%d id:%s, query thread is created, numOfQueries:%d, func:%s", pQInfo, pObj->vnode, - pObj->sid, pObj->meterId, pObj->numOfQueries, __FUNCTION__); - - pQuery->pointsToRead = vnodeList[pObj->vnode].cfg.rowsInFileBlock; - pQuery->pointsOffset = pQInfo->bufIndex * pQuery->pointsToRead; - - int64_t st = taosGetTimestampUs(); - - while (1) { - int64_t potentNumOfRes = pQInfo->pointsRead + pQuery->pointsToRead; - /* limit the potential overflow data */ - if (pQuery->limit.limit > 0 && potentNumOfRes > pQuery->limit.limit) { - pQuery->pointsToRead = pQuery->limit.limit - pQInfo->pointsRead; - - if (pQuery->pointsToRead == 0) { - /* reach the limitation, abort */ - pQuery->pointsRead = 0; - pQInfo->over = 1; - break; - } - } - - pQInfo->code = (*pQInfo->fp)(pObj, pQuery); // <0:error - - // has read at least one point - if (pQuery->pointsRead > 0 || pQInfo->code < 0) break; - - if (pQuery->pointsRead == 0 && pQuery->over == 0) continue; - - if (pQInfo->changed) { - pQInfo->over = 1; - break; - } - - // has read all data in file, check data in cache - pQInfo->fp = pQueryFunc[pQuery->order.order ^ 1]; - pQInfo->changed = 1; - - pQuery->slot = -1; // reset the handle - pQuery->over = 0; - - dTrace("vid:%d sid:%d id:%s, query in other media, order:%d, skey:%" PRId64 " query:%p", pObj->vnode, pObj->sid, - pObj->meterId, pQuery->order.order, pQuery->skey, pQuery); - } - - pQInfo->pointsRead += pQuery->pointsRead; - - dTrace("vid:%d sid:%d id:%s, %d points returned, totalRead:%d totalReturn:%d last key:%" PRId64 ", query:%p", pObj->vnode, - pObj->sid, pObj->meterId, pQuery->pointsRead, pQInfo->pointsRead, pQInfo->pointsReturned, pQuery->lastKey, - pQuery); - - int64_t et = taosGetTimestampUs(); - pQInfo->useconds += et - st; - - // close FDs as soon as possible - if (pQInfo->over) { - dTrace("vid:%d sid:%d id:%s, query over, %d points are returned", pObj->vnode, pObj->sid, pObj->meterId, - pQInfo->pointsRead); - tclose(pQInfo->query.hfd); - tclose(pQInfo->query.dfd); - tclose(pQInfo->query.lfd); - } - - sem_post(&pQInfo->dataReady); - vnodeDecRefCount(pQInfo); -} - -void *vnodeQueryOnSingleTable(SMeterObj **pMetersObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pSqlExprs, - SQueryMeterMsg *pQueryMsg, int32_t *code) { - SQInfo *pQInfo; - SQuery *pQuery; - - SMeterObj *pMeterObj = pMetersObj[0]; - bool isProjQuery = vnodeIsProjectionQuery(pSqlExprs, pQueryMsg->numOfOutputCols); - - // todo pass the correct error code - if (isProjQuery && pQueryMsg->tsLen == 0) { - pQInfo = vnodeAllocateQInfo(pQueryMsg, pMeterObj, pSqlExprs); - } else { - pQInfo = vnodeAllocateQInfoEx(pQueryMsg, pGroupbyExpr, pSqlExprs, pMetersObj[0]); - } - - if (pQInfo == NULL) { - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - goto _error; - } - - pQuery = &(pQInfo->query); - dTrace("qmsg:%p create QInfo:%p, QInfo created", pQueryMsg, pQInfo); - - SMeterSidExtInfo** pSids = (SMeterSidExtInfo**)pQueryMsg->pSidExtInfo; - if (pSids != NULL && pSids[0]->key > 0) { - pQuery->skey = pSids[0]->key; - } else { - pQuery->skey = pQueryMsg->skey; - } - - pQuery->ekey = pQueryMsg->ekey; - pQuery->lastKey = pQuery->skey; - - pQInfo->fp = pQueryFunc[pQueryMsg->order]; - - if (sem_init(&(pQInfo->dataReady), 0, 0) != 0) { - dError("QInfo:%p vid:%d sid:%d meterId:%s, init dataReady sem failed, reason:%s", pQInfo, pMeterObj->vnode, - pMeterObj->sid, pMeterObj->meterId, strerror(errno)); - *code = TSDB_CODE_APP_ERROR; - goto _error; - } - - SSchedMsg schedMsg = {0}; - - if (isProjQuery && pQueryMsg->tsLen == 0) { - schedMsg.fp = vnodeQueryData; - } else { - if (vnodeParametersSafetyCheck(pQuery) == false) { - *code = TSDB_CODE_APP_ERROR; - goto _error; - } - - STableQuerySupportObj *pSupporter = (STableQuerySupportObj *)calloc(1, sizeof(STableQuerySupportObj)); - pSupporter->numOfMeters = 1; - - pSupporter->pMetersHashTable = taosHashInit(pSupporter->numOfMeters, taosIntHash_32, false); - taosHashPut(pSupporter->pMetersHashTable, (const char*) &pMetersObj[0]->sid, sizeof(pMeterObj[0].sid), - (char *)&pMetersObj[0], POINTER_BYTES); - - pSupporter->pSidSet = NULL; - pSupporter->subgroupIdx = -1; - pSupporter->pMeterSidExtInfo = NULL; - - pQInfo->pTableQuerySupporter = pSupporter; - - STSBuf *pTSBuf = NULL; - if (pQueryMsg->tsLen > 0) { - // open new file to save the result - char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset; - pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder); - tsBufResetPos(pTSBuf); - tsBufNextPos(pTSBuf); - } - - if (((*code) = vnodeQueryTablePrepare(pQInfo, pQInfo->pObj, pSupporter, pTSBuf)) != TSDB_CODE_SUCCESS) { - goto _error; - } - - if (pQInfo->over == 1) { - vnodeAddRefCount(pQInfo); // for retrieve procedure - return pQInfo; - } - - schedMsg.fp = vnodeSingleTableQuery; - } - - /* - * The reference count, which is 2, is for both the current query thread and the future retrieve request, - * which will always be issued by client to acquire data or free SQInfo struct. - */ - vnodeAddRefCount(pQInfo); - vnodeAddRefCount(pQInfo); - - schedMsg.msg = NULL; - schedMsg.thandle = (void *)1; - schedMsg.ahandle = pQInfo; - - dTrace("QInfo:%p set query flag and prepare runtime environment completed, ref:%d, wait for schedule", pQInfo, - pQInfo->refCount); - - taosScheduleTask(tsQueryQhandle, &schedMsg); - return pQInfo; - -_error: - // table query ref will be decrease during error handling - vnodeFreeQInfo(pQInfo, false); - return NULL; -} - -/* - * query on multi-meters - */ -void *vnodeQueryOnMultiMeters(SMeterObj **pMetersObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pSqlExprs, - SQueryMeterMsg *pQueryMsg, int32_t *code) { - SQInfo *pQInfo; - SQuery *pQuery; - - assert(QUERY_IS_STABLE_QUERY(pQueryMsg->queryType) && pQueryMsg->numOfCols > 0 && pQueryMsg->pSidExtInfo != 0 && - pQueryMsg->numOfSids >= 1); - - pQInfo = vnodeAllocateQInfoEx(pQueryMsg, pGroupbyExpr, pSqlExprs, *pMetersObj); - if (pQInfo == NULL) { - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - goto _error; - } - - pQuery = &(pQInfo->query); - dTrace("qmsg:%p create QInfo:%p, QInfo created", pQueryMsg, pQInfo); - - pQuery->skey = pQueryMsg->skey; - pQuery->ekey = pQueryMsg->ekey; - - pQInfo->fp = pQueryFunc[pQueryMsg->order]; - - if (sem_init(&(pQInfo->dataReady), 0, 0) != 0) { - dError("QInfo:%p vid:%d sid:%d id:%s, init dataReady sem failed, reason:%s", pQInfo, pMetersObj[0]->vnode, - pMetersObj[0]->sid, pMetersObj[0]->meterId, strerror(errno)); - *code = TSDB_CODE_APP_ERROR; - goto _error; - } - - SSchedMsg schedMsg = {0}; - - STableQuerySupportObj *pSupporter = (STableQuerySupportObj *)calloc(1, sizeof(STableQuerySupportObj)); - pSupporter->numOfMeters = pQueryMsg->numOfSids; - - pSupporter->pMetersHashTable = taosHashInit(pSupporter->numOfMeters, taosIntHash_32, false); - for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) { - taosHashPut(pSupporter->pMetersHashTable, (const char*) &pMetersObj[i]->sid, sizeof(pMetersObj[i]->sid), (char *)&pMetersObj[i], - POINTER_BYTES); - } - - int32_t sidElemLen = pQueryMsg->tagLength + sizeof(SMeterSidExtInfo); - - int32_t size = POINTER_BYTES * pQueryMsg->numOfSids + sidElemLen * pQueryMsg->numOfSids; - pSupporter->pMeterSidExtInfo = (SMeterSidExtInfo **)malloc(size); - if (pSupporter->pMeterSidExtInfo == NULL) { - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - dError("QInfo:%p failed to allocate memory for meterSid info, size:%d, abort", pQInfo, size); - goto _error; - } - - char *px = ((char *)pSupporter->pMeterSidExtInfo) + POINTER_BYTES * pQueryMsg->numOfSids; - - for (int32_t i = 0; i < pQueryMsg->numOfSids; ++i) { - SMeterSidExtInfo* pSrc = ((SMeterSidExtInfo **)pQueryMsg->pSidExtInfo)[i]; - SMeterSidExtInfo* pDst = (SMeterSidExtInfo *)px; - - pSupporter->pMeterSidExtInfo[i] = pDst; - pDst->sid = pSrc->sid; - pDst->uid = pSrc->uid; - pDst->key = pSrc->key; - - if (pQueryMsg->tagLength > 0) { - memcpy(pDst->tags, pSrc->tags, pQueryMsg->tagLength); - } - px += sidElemLen; - } - - if (pGroupbyExpr != NULL && pGroupbyExpr->numOfGroupCols > 0) { - pSupporter->pSidSet = - tSidSetCreate(pSupporter->pMeterSidExtInfo, pQueryMsg->numOfSids, (SSchema *)pQueryMsg->pTagSchema, - pQueryMsg->numOfTagsCols, pGroupbyExpr->columnInfo, pGroupbyExpr->numOfGroupCols); - } else { - pSupporter->pSidSet = tSidSetCreate(pSupporter->pMeterSidExtInfo, pQueryMsg->numOfSids, - (SSchema *)pQueryMsg->pTagSchema, pQueryMsg->numOfTagsCols, NULL, 0); - } - - pQInfo->pTableQuerySupporter = pSupporter; - - STSBuf *pTSBuf = NULL; - if (pQueryMsg->tsLen > 0) { - // open new file to save the result - char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset; - pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder); - tsBufResetPos(pTSBuf); - } - - if (((*code) = vnodeSTableQueryPrepare(pQInfo, pQuery, pTSBuf)) != TSDB_CODE_SUCCESS) { - goto _error; - } - - vnodeAddRefCount(pQInfo); - if (pQInfo->over == 1) { - return pQInfo; - } - - vnodeAddRefCount(pQInfo); - - schedMsg.msg = NULL; - schedMsg.thandle = (void *)1; - schedMsg.ahandle = pQInfo; - schedMsg.fp = vnodeMultiMeterQuery; - - dTrace("QInfo:%p set query flag and prepare runtime environment completed, wait for schedule", pQInfo); - - taosScheduleTask(tsQueryQhandle, &schedMsg); - return pQInfo; - -_error: - // table query ref will be decrease during error handling - vnodeFreeQInfo(pQInfo, false); - return NULL; -} - -/* engine provides the storage, the app has to save the data before next - retrieve, *pNum is the number of points retrieved, and argv[] is - the point to retrieved column -*/ - -int vnodeRetrieveQueryInfo(void *handle, int *numOfRows, int *rowSize, int16_t *timePrec) { - SQInfo *pQInfo; - SQuery *pQuery; - - *numOfRows = 0; - *rowSize = 0; - - pQInfo = (SQInfo *)handle; - if (pQInfo == NULL) { - return TSDB_CODE_INVALID_QHANDLE; - } - - pQuery = &(pQInfo->query); - if (!vnodeIsQInfoValid(pQInfo) || (pQuery->sdata == NULL)) { - dError("QInfo:%p %p retrieve memory is corrupted!!! QInfo:%p, sign:%p, sdata:%p", pQInfo, pQuery, pQInfo, - pQInfo->signature, pQuery->sdata); - return TSDB_CODE_INVALID_QHANDLE; - } - - if (pQInfo->killed) { - dTrace("QInfo:%p query is killed, %p, code:%d", pQInfo, pQuery, pQInfo->code); - if (pQInfo->code == TSDB_CODE_SUCCESS) { - return TSDB_CODE_QUERY_CANCELLED; - } else { // in case of not TSDB_CODE_SUCCESS, return the code to client - return abs(pQInfo->code); - } - } - - sem_wait(&pQInfo->dataReady); - *numOfRows = pQInfo->pointsRead - pQInfo->pointsReturned; - *rowSize = pQuery->rowSize; - - *timePrec = vnodeList[pQInfo->pObj->vnode].cfg.precision; - - dTrace("QInfo:%p, retrieve data info completed, precision:%d, rowsize:%d, rows:%d, code:%d", pQInfo, *timePrec, - *rowSize, *numOfRows, pQInfo->code); - - if (pQInfo->code < 0) { // less than 0 means there are error existed. - return -pQInfo->code; - } - - return TSDB_CODE_SUCCESS; -} - -// vnodeRetrieveQueryInfo must be called first -int vnodeSaveQueryResult(void *handle, char *data, int32_t *size) { - SQInfo *pQInfo = (SQInfo *)handle; - - // the remained number of retrieved rows, not the interpolated result - int numOfRows = pQInfo->pointsRead - pQInfo->pointsReturned; - - int32_t numOfFinal = vnodeCopyQueryResultToMsg(pQInfo, data, numOfRows); - pQInfo->pointsReturned += numOfFinal; - - dTrace("QInfo:%p %d are returned, totalReturned:%d totalRead:%d", pQInfo, numOfFinal, pQInfo->pointsReturned, - pQInfo->pointsRead); - - if (pQInfo->over == 0) { - #ifdef _TD_ARM_ - dTrace("QInfo:%p set query flag, sig:%" PRIu64 ", func:vnodeSaveQueryResult", pQInfo, pQInfo->signature); - #else - dTrace("QInfo:%p set query flag, sig:%" PRIu64 ", func:%s", pQInfo, pQInfo->signature, __FUNCTION__); - #endif - - if (pQInfo->killed == 1) { - dTrace("%p freed or killed, abort query", pQInfo); - } else { - vnodeAddRefCount(pQInfo); - dTrace("%p add query into task queue for schedule", pQInfo); - - SSchedMsg schedMsg = {0}; - - if (pQInfo->pTableQuerySupporter != NULL) { - if (pQInfo->pTableQuerySupporter->pSidSet == NULL) { - schedMsg.fp = vnodeSingleTableQuery; - } else { // group by tag - schedMsg.fp = vnodeMultiMeterQuery; - } - } else { - pQInfo->bufIndex = pQInfo->bufIndex ^ 1; // exchange between 0 and 1 - schedMsg.fp = vnodeQueryData; - } - - schedMsg.msg = NULL; - schedMsg.thandle = (void *)1; - schedMsg.ahandle = pQInfo; - taosScheduleTask(tsQueryQhandle, &schedMsg); - } - } - - return numOfFinal; -} - -static int32_t validateQueryMeterMsg(SQueryMeterMsg *pQueryMsg) { - if (pQueryMsg->intervalTime < 0) { - dError("qmsg:%p illegal value of aggTimeInterval %" PRId64 "", pQueryMsg, pQueryMsg->intervalTime); - return -1; - } - - if (pQueryMsg->numOfTagsCols < 0 || pQueryMsg->numOfTagsCols > TSDB_MAX_TAGS + 1) { - dError("qmsg:%p illegal value of numOfTagsCols %d", pQueryMsg, pQueryMsg->numOfTagsCols); - return -1; - } - - if (pQueryMsg->numOfCols <= 0 || pQueryMsg->numOfCols > TSDB_MAX_COLUMNS) { - dError("qmsg:%p illegal value of numOfCols %d", pQueryMsg, pQueryMsg->numOfCols); - return -1; - } - - if (pQueryMsg->numOfSids <= 0) { - dError("qmsg:%p illegal value of numOfSids %d", pQueryMsg, pQueryMsg->numOfSids); - return -1; - } - - if (pQueryMsg->numOfGroupCols < 0) { - dError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols); - return -1; - } - - if (pQueryMsg->numOfOutputCols > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutputCols <= 0) { - dError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutputCols); - return -1; - } - - if (pQueryMsg->tagLength < 0) { - dError("qmsg:%p illegal value of tag length %d", pQueryMsg, pQueryMsg->tagLength); - return -1; - } - - return 0; -} - -int32_t vnodeConvertQueryMeterMsg(SQueryMeterMsg *pQueryMsg) { - pQueryMsg->vnode = htons(pQueryMsg->vnode); - pQueryMsg->numOfSids = htonl(pQueryMsg->numOfSids); - -#ifdef TSKEY32 - pQueryMsg->skey = htonl(pQueryMsg->skey); - pQueryMsg->ekey = htonl(pQueryMsg->ekey); -#else - pQueryMsg->skey = htobe64(pQueryMsg->skey); - pQueryMsg->ekey = htobe64(pQueryMsg->ekey); -#endif - - pQueryMsg->order = htons(pQueryMsg->order); - pQueryMsg->orderColId = htons(pQueryMsg->orderColId); - - pQueryMsg->queryType = htons(pQueryMsg->queryType); - - pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime); - pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime); - - pQueryMsg->numOfTagsCols = htons(pQueryMsg->numOfTagsCols); - pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols); - pQueryMsg->numOfOutputCols = htons(pQueryMsg->numOfOutputCols); - pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols); - pQueryMsg->tagLength = htons(pQueryMsg->tagLength); - - pQueryMsg->limit = htobe64(pQueryMsg->limit); - pQueryMsg->offset = htobe64(pQueryMsg->offset); - pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset); - pQueryMsg->tsLen = htonl(pQueryMsg->tsLen); - pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks); - pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder); - - // query msg safety check - if (validateQueryMeterMsg(pQueryMsg) != 0) { - return TSDB_CODE_INVALID_QUERY_MSG; - } - - SMeterSidExtInfo **pSids = NULL; - char * pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols; - - for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) { - pQueryMsg->colList[col].colId = htons(pQueryMsg->colList[col].colId); - pQueryMsg->colList[col].type = htons(pQueryMsg->colList[col].type); - pQueryMsg->colList[col].bytes = htons(pQueryMsg->colList[col].bytes); - pQueryMsg->colList[col].numOfFilters = htons(pQueryMsg->colList[col].numOfFilters); - - assert(pQueryMsg->colList[col].type >= TSDB_DATA_TYPE_BOOL && pQueryMsg->colList[col].type <= TSDB_DATA_TYPE_NCHAR); - - int32_t numOfFilters = pQueryMsg->colList[col].numOfFilters; - - if (numOfFilters > 0) { - pQueryMsg->colList[col].filters = calloc(numOfFilters, sizeof(SColumnFilterInfo)); - } - - for (int32_t f = 0; f < numOfFilters; ++f) { - SColumnFilterInfo *pFilterInfo = (SColumnFilterInfo *)pMsg; - SColumnFilterInfo *pDestFilterInfo = &pQueryMsg->colList[col].filters[f]; - - pDestFilterInfo->filterOnBinary = htons(pFilterInfo->filterOnBinary); - - pMsg += sizeof(SColumnFilterInfo); - - if (pDestFilterInfo->filterOnBinary) { - pDestFilterInfo->len = htobe64(pFilterInfo->len); - - pDestFilterInfo->pz = (int64_t)calloc(1, pDestFilterInfo->len + 1); - memcpy((void*)pDestFilterInfo->pz, pMsg, pDestFilterInfo->len + 1); - pMsg += (pDestFilterInfo->len + 1); - } else { - pDestFilterInfo->lowerBndi = htobe64(pFilterInfo->lowerBndi); - pDestFilterInfo->upperBndi = htobe64(pFilterInfo->upperBndi); - } - - pDestFilterInfo->lowerRelOptr = htons(pFilterInfo->lowerRelOptr); - pDestFilterInfo->upperRelOptr = htons(pFilterInfo->upperRelOptr); - } - } - - bool hasArithmeticFunction = false; - - /* - * 1. simple projection query on meters, we only record the pSqlFuncExprs[i].colIdx value - * 2. for complex queries, whole SqlExprs object is required. - */ - pQueryMsg->pSqlFuncExprs = (int64_t)malloc(POINTER_BYTES * pQueryMsg->numOfOutputCols); - SSqlFuncExprMsg *pExprMsg = (SSqlFuncExprMsg *)pMsg; - - for (int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) { - ((SSqlFuncExprMsg **)pQueryMsg->pSqlFuncExprs)[i] = pExprMsg; - - pExprMsg->colInfo.colIdx = htons(pExprMsg->colInfo.colIdx); - pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId); - pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag); - - pExprMsg->functionId = htons(pExprMsg->functionId); - pExprMsg->numOfParams = htons(pExprMsg->numOfParams); - - pMsg += sizeof(SSqlFuncExprMsg); - - for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) { - pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType); - pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes); - - if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) { - pExprMsg->arg[j].argValue.pz = pMsg; - pMsg += pExprMsg->arg[j].argBytes + 1; // one more for the string terminated char. - } else { - pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64); - } - } - - if (pExprMsg->functionId == TSDB_FUNC_ARITHM) { - hasArithmeticFunction = true; - } else if (pExprMsg->functionId == TSDB_FUNC_TAG || - pExprMsg->functionId == TSDB_FUNC_TAGPRJ || - pExprMsg->functionId == TSDB_FUNC_TAG_DUMMY) { - if (pExprMsg->colInfo.flag != TSDB_COL_TAG) { // ignore the column index check for arithmetic expression. - return TSDB_CODE_INVALID_QUERY_MSG; - } - } else { - if (!vnodeValidateExprColumnInfo(pQueryMsg, pExprMsg)) { - return TSDB_CODE_INVALID_QUERY_MSG; - } - } - - pExprMsg = (SSqlFuncExprMsg *)pMsg; - } - - pQueryMsg->colNameLen = htonl(pQueryMsg->colNameLen); - if (hasArithmeticFunction) { // column name array - assert(pQueryMsg->colNameLen > 0); - pQueryMsg->colNameList = (int64_t)pMsg; - pMsg += pQueryMsg->colNameLen; - } - - pSids = (SMeterSidExtInfo **)calloc(pQueryMsg->numOfSids, sizeof(SMeterSidExtInfo *)); - pQueryMsg->pSidExtInfo = (uint64_t)pSids; - - pSids[0] = (SMeterSidExtInfo *)pMsg; - pSids[0]->sid = htonl(pSids[0]->sid); - pSids[0]->uid = htobe64(pSids[0]->uid); - pSids[0]->key = htobe64(pSids[0]->key); - - for (int32_t j = 1; j < pQueryMsg->numOfSids; ++j) { - pSids[j] = (SMeterSidExtInfo *)((char *)pSids[j - 1] + sizeof(SMeterSidExtInfo) + pQueryMsg->tagLength); - pSids[j]->sid = htonl(pSids[j]->sid); - pSids[j]->uid = htobe64(pSids[j]->uid); - pSids[j]->key = htobe64(pSids[j]->key); - } - - pMsg = (char *)pSids[pQueryMsg->numOfSids - 1]; - pMsg += sizeof(SMeterSidExtInfo) + pQueryMsg->tagLength; - - if (pQueryMsg->numOfGroupCols > 0 || pQueryMsg->numOfTagsCols > 0) { // group by tag columns - pQueryMsg->pTagSchema = (uint64_t)pMsg; - SSchema *pTagSchema = (SSchema *)pQueryMsg->pTagSchema; - pMsg += sizeof(SSchema) * pQueryMsg->numOfTagsCols; - - if (pQueryMsg->numOfGroupCols > 0) { - pQueryMsg->groupbyTagIds = (uint64_t) & (pTagSchema[pQueryMsg->numOfTagsCols]); - } else { - pQueryMsg->groupbyTagIds = 0; - } - pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx); - pQueryMsg->orderType = htons(pQueryMsg->orderType); - - pMsg += sizeof(SColIndexEx) * pQueryMsg->numOfGroupCols; - } else { - pQueryMsg->pTagSchema = 0; - pQueryMsg->groupbyTagIds = 0; - } - - pQueryMsg->interpoType = htons(pQueryMsg->interpoType); - if (pQueryMsg->interpoType != TSDB_INTERPO_NONE) { - pQueryMsg->defaultVal = (uint64_t)(pMsg); - - int64_t *v = (int64_t *)pMsg; - for (int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) { - v[i] = htobe64(v[i]); - } - } - - dTrace("qmsg:%p query on %d meter(s), qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, numOfTagCols:%d, timestamp order:%d, " - "tags order:%d, tags order col:%d, numOfOutputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptslen:%d, limit:%" PRId64 ", " - "offset:%" PRId64, - pQueryMsg, pQueryMsg->numOfSids, pQueryMsg->skey, pQueryMsg->ekey, pQueryMsg->numOfGroupCols, - pQueryMsg->numOfTagsCols, pQueryMsg->order, pQueryMsg->orderType, pQueryMsg->orderByIdx, - pQueryMsg->numOfOutputCols, pQueryMsg->numOfCols, pQueryMsg->intervalTime, pQueryMsg->interpoType, - pQueryMsg->tsLen, pQueryMsg->limit, pQueryMsg->offset); - - return 0; -} diff --git a/src/vnode/detail/src/vnodeStore.c b/src/vnode/detail/src/vnodeStore.c deleted file mode 100644 index 5949b1636d..0000000000 --- a/src/vnode/detail/src/vnodeStore.c +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "dnodeSystem.h" -#include "trpc.h" -#include "ttime.h" -#include "vnode.h" -#include "vnodeStore.h" -#include "vnodeUtil.h" -#include "vnodeStatus.h" - -int tsMaxVnode = -1; -int tsOpenVnodes = 0; -SVnodeObj *vnodeList = NULL; - -static int vnodeInitStoreVnode(int vnode) { - SVnodeObj *pVnode = vnodeList + vnode; - - pVnode->vnode = vnode; - vnodeOpenMetersVnode(vnode); - if (pVnode->cfg.maxSessions <= 0) { - return TSDB_CODE_SUCCESS; - } - - pVnode->firstKey = taosGetTimestamp(pVnode->cfg.precision); - - pVnode->pCachePool = vnodeOpenCachePool(vnode); - if (pVnode->pCachePool == NULL) { - dError("vid:%d, cache pool init failed.", pVnode->vnode); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - if (vnodeInitFile(vnode) != TSDB_CODE_SUCCESS) { - dError("vid:%d, files init failed.", pVnode->vnode); - return TSDB_CODE_VG_INIT_FAILED; - } - - if (vnodeInitCommit(vnode) != TSDB_CODE_SUCCESS) { - dError("vid:%d, commit init failed.", pVnode->vnode); - return TSDB_CODE_VG_INIT_FAILED; - } - - pthread_mutex_init(&(pVnode->vmutex), NULL); - dPrint("vid:%d, storage initialized, version:%" PRIu64 " fileId:%d numOfFiles:%d", vnode, pVnode->version, pVnode->fileId, - pVnode->numOfFiles); - - return TSDB_CODE_SUCCESS; -} - -int vnodeOpenVnode(int vnode) { - int32_t code = TSDB_CODE_SUCCESS; - - SVnodeObj *pVnode = vnodeList + vnode; - - pVnode->vnode = vnode; - pVnode->accessState = TSDB_VN_ALL_ACCCESS; - - // vnode is empty - if (pVnode->cfg.maxSessions <= 0) { - return TSDB_CODE_SUCCESS; - } - - if (!(pVnode->vnodeStatus == TSDB_VN_STATUS_OFFLINE || pVnode->vnodeStatus == TSDB_VN_STATUS_CREATING)) { - dError("vid:%d, status:%s, cannot enter open operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus)); - return TSDB_CODE_INVALID_VNODE_STATUS; - } - - dPrint("vid:%d, status:%s, start to open", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus)); - pthread_mutex_lock(&dmutex); - - // not enough memory, abort - if ((code = vnodeOpenShellVnode(vnode)) != TSDB_CODE_SUCCESS) { - pthread_mutex_unlock(&dmutex); - return code; - } - - vnodeOpenPeerVnode(vnode); - - if (vnode > tsMaxVnode) tsMaxVnode = vnode; - - vnodeCalcOpenVnodes(); - - pthread_mutex_unlock(&dmutex); - -#ifndef CLUSTER - vnodeOpenStreams(pVnode, NULL); -#endif - - dPrint("vid:%d, vnode is opened, openVnodes:%d, status:%s", vnode, tsOpenVnodes, taosGetVnodeStatusStr(pVnode->vnodeStatus)); - - return TSDB_CODE_SUCCESS; -} - -static int32_t vnodeMarkAllMetersDropped(SVnodeObj* pVnode) { - if (pVnode->meterList == NULL) { - return TSDB_CODE_SUCCESS; - } - - bool ready = true; - for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) { - if (!vnodeIsSafeToDeleteMeter(pVnode, sid)) { - ready = false; - } else { // set the meter is to be deleted - SMeterObj* pObj = pVnode->meterList[sid]; - if (pObj != NULL) { - pObj->state = TSDB_METER_STATE_DROPPED; - } - } - } - - return ready? TSDB_CODE_SUCCESS:TSDB_CODE_ACTION_IN_PROGRESS; -} - -static int vnodeCloseVnode(int vnode) { - if (vnodeList == NULL) return TSDB_CODE_SUCCESS; - - SVnodeObj* pVnode = &vnodeList[vnode]; - - pthread_mutex_lock(&dmutex); - if (pVnode->cfg.maxSessions == 0) { - pthread_mutex_unlock(&dmutex); - return TSDB_CODE_SUCCESS; - } - - if (pVnode->vnodeStatus == TSDB_VN_STATUS_DELETING) { - dPrint("vid:%d, status:%s, another thread performed delete operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus)); - return TSDB_CODE_SUCCESS; - } else { - dPrint("vid:%d, status:%s, enter close operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus)); - pVnode->vnodeStatus = TSDB_VN_STATUS_CLOSING; - } - - // set the meter is dropped flag - if (vnodeMarkAllMetersDropped(pVnode) != TSDB_CODE_SUCCESS) { - pthread_mutex_unlock(&dmutex); - return TSDB_CODE_ACTION_IN_PROGRESS; - } - - dPrint("vid:%d, status:%s, enter delete operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus)); - pVnode->vnodeStatus = TSDB_VN_STATUS_DELETING; - - vnodeCloseStream(vnodeList + vnode); - vnodeCancelCommit(vnodeList + vnode); - vnodeClosePeerVnode(vnode); - vnodeCloseMetersVnode(vnode); - vnodeCloseShellVnode(vnode); - vnodeCloseCachePool(vnode); - vnodeCleanUpCommit(vnode); - - pthread_mutex_destroy(&(vnodeList[vnode].vmutex)); - - if (tsMaxVnode == vnode) tsMaxVnode = vnode - 1; - - tfree(vnodeList[vnode].meterIndex); - - pthread_mutex_unlock(&dmutex); - return TSDB_CODE_SUCCESS; -} - -int vnodeCreateVnode(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc) { - char fileName[128]; - - if (vnodeList[vnode].vnodeStatus != TSDB_VN_STATUS_OFFLINE) { - dError("vid:%d, status:%s, cannot enter create operation", vnode, taosGetVnodeStatusStr(vnodeList[vnode].vnodeStatus)); - return TSDB_CODE_INVALID_VNODE_STATUS; - } - - vnodeList[vnode].vnodeStatus = TSDB_VN_STATUS_CREATING; - - sprintf(fileName, "%s/vnode%d", tsDirectory, vnode); - if (mkdir(fileName, 0755) != 0) { - dError("failed to create vnode:%d directory:%s, errno:%d, reason:%s", vnode, fileName, errno, strerror(errno)); - if (errno == EACCES) { - return TSDB_CODE_NO_DISK_PERMISSIONS; - } else if (errno == ENOSPC) { - return TSDB_CODE_SERV_NO_DISKSPACE; - } else if (errno == EEXIST) { - } else { - return TSDB_CODE_VG_INIT_FAILED; - } - } - - sprintf(fileName, "%s/vnode%d/db", tsDirectory, vnode); - if (mkdir(fileName, 0755) != 0) { - dError("failed to create vnode:%d directory:%s, errno:%d, reason:%s", vnode, fileName, errno, strerror(errno)); - if (errno == EACCES) { - return TSDB_CODE_NO_DISK_PERMISSIONS; - } else if (errno == ENOSPC) { - return TSDB_CODE_SERV_NO_DISKSPACE; - } else if (errno == EEXIST) { - } else { - return TSDB_CODE_VG_INIT_FAILED; - } - } - - vnodeList[vnode].cfg = *pCfg; - int code = vnodeCreateMeterObjFile(vnode); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = vnodeSaveVnodeCfg(vnode, pCfg, pDesc); - if (code != TSDB_CODE_SUCCESS) { - return TSDB_CODE_VG_INIT_FAILED; - } - - code = vnodeInitStoreVnode(vnode); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - return vnodeOpenVnode(vnode); -} - -static void vnodeRemoveDataFiles(int vnode) { - char vnodeDir[TSDB_FILENAME_LEN]; - char dfilePath[TSDB_FILENAME_LEN]; - char linkFile[TSDB_FILENAME_LEN]; - struct dirent *de = NULL; - DIR * dir = NULL; - - sprintf(vnodeDir, "%s/vnode%d/db", tsDirectory, vnode); - dir = opendir(vnodeDir); - if (dir == NULL) return; - while ((de = readdir(dir)) != NULL) { - if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; - if ((strcmp(de->d_name + strlen(de->d_name) - strlen(".head"), ".head") == 0 || - strcmp(de->d_name + strlen(de->d_name) - strlen(".data"), ".data") == 0 || - strcmp(de->d_name + strlen(de->d_name) - strlen(".last"), ".last") == 0) && - (de->d_type & DT_LNK)) { - sprintf(linkFile, "%s/%s", vnodeDir, de->d_name); - - if (!vnodeRemoveDataFileFromLinkFile(linkFile, de->d_name)) { - continue; - } - - memset(dfilePath, 0, TSDB_FILENAME_LEN); - int tcode = readlink(linkFile, dfilePath, TSDB_FILENAME_LEN); - remove(linkFile); - - if (tcode >= 0) { - remove(dfilePath); - dPrint("Data file %s is removed, link file %s", dfilePath, linkFile); - } - } else { - remove(de->d_name); - } - } - - closedir(dir); - rmdir(vnodeDir); - - sprintf(vnodeDir, "%s/vnode%d/meterObj.v%d", tsDirectory, vnode, vnode); - remove(vnodeDir); - - sprintf(vnodeDir, "%s/vnode%d", tsDirectory, vnode); - rmdir(vnodeDir); - dPrint("vid:%d, vnode is removed, status:%s", vnode, taosGetVnodeStatusStr(vnodeList[vnode].vnodeStatus)); -} - -int vnodeRemoveVnode(int vnode) { - if (vnodeList == NULL) return TSDB_CODE_SUCCESS; - - if (vnodeList[vnode].cfg.maxSessions > 0) { - SVnodeObj* pVnode = &vnodeList[vnode]; - if (pVnode->vnodeStatus == TSDB_VN_STATUS_CREATING - || pVnode->vnodeStatus == TSDB_VN_STATUS_OFFLINE - || pVnode->vnodeStatus == TSDB_VN_STATUS_DELETING) { - dTrace("vid:%d, status:%s, cannot enter close/delete operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus)); - return TSDB_CODE_ACTION_IN_PROGRESS; - } else { - int32_t ret = vnodeCloseVnode(vnode); - if (ret != TSDB_CODE_SUCCESS) { - return ret; - } - - dTrace("vid:%d, status:%s, do delete operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus)); - vnodeRemoveDataFiles(vnode); - } - - } else { - dPrint("vid:%d, max sessions:%d, this vnode already dropped!!!", vnode, vnodeList[vnode].cfg.maxSessions); - vnodeList[vnode].cfg.maxSessions = 0; //reset value - vnodeCalcOpenVnodes(); - } - - return TSDB_CODE_SUCCESS; -} - -int vnodeInitStore() { - int vnode; - int size; - - size = sizeof(SVnodeObj) * TSDB_MAX_VNODES; - vnodeList = (SVnodeObj *)malloc(size); - if (vnodeList == NULL) return -1; - memset(vnodeList, 0, size); - - if (vnodeInitInfo() < 0) return -1; - - for (vnode = 0; vnode < TSDB_MAX_VNODES; ++vnode) { - int code = vnodeInitStoreVnode(vnode); - if (code != TSDB_CODE_SUCCESS) { - // one vnode is failed to recover from commit log, continue for remain - return -1; - } - } - - return 0; -} - -int vnodeInitVnodes() { - int vnode; - - for (vnode = 0; vnode < TSDB_MAX_VNODES; ++vnode) { - if (vnodeOpenVnode(vnode) < 0) return -1; - } - - return 0; -} - -void vnodeCleanUpOneVnode(int vnode) { - static int again = 0; - if (vnodeList == NULL) return; - - pthread_mutex_lock(&dmutex); - - if (again) { - pthread_mutex_unlock(&dmutex); - return; - } - again = 1; - - if (vnodeList[vnode].pCachePool) { - vnodeList[vnode].vnodeStatus = TSDB_VN_STATUS_OFFLINE; - vnodeClosePeerVnode(vnode); - } - - pthread_mutex_unlock(&dmutex); - - if (vnodeList[vnode].pCachePool) { - vnodeProcessCommitTimer(vnodeList + vnode, NULL); - while (vnodeList[vnode].commitThread != 0) { - taosMsleep(10); - } - vnodeCleanUpCommit(vnode); - } -} - -void vnodeCleanUpVnodes() { - static int again = 0; - if (vnodeList == NULL) return; - - pthread_mutex_lock(&dmutex); - - if (again) { - pthread_mutex_unlock(&dmutex); - return; - } - again = 1; - - for (int vnode = 0; vnode < TSDB_MAX_VNODES; ++vnode) { - if (vnodeList[vnode].pCachePool) { - vnodeList[vnode].vnodeStatus = TSDB_VN_STATUS_OFFLINE; - vnodeClosePeerVnode(vnode); - } - } - - pthread_mutex_unlock(&dmutex); - - for (int vnode = 0; vnode < TSDB_MAX_VNODES; ++vnode) { - if (vnodeList[vnode].pCachePool) { - vnodeProcessCommitTimer(vnodeList + vnode, NULL); - while (vnodeList[vnode].commitThread != 0) { - taosMsleep(10); - } - vnodeCleanUpCommit(vnode); - } - } -} - -void vnodeCalcOpenVnodes() { - int openVnodes = 0; - for (int vnode = 0; vnode <= tsMaxVnode; ++vnode) { - if (vnodeList[vnode].cfg.maxSessions <= 0) continue; - openVnodes++; - } - - atomic_store_32(&tsOpenVnodes, openVnodes); -} - -void vnodeUpdateHeadFile(int vnode, int oldTables, int newTables) { - //todo rewrite the head file with newTables -} diff --git a/src/vnode/detail/src/vnodeStore.spec.c b/src/vnode/detail/src/vnodeStore.spec.c deleted file mode 100644 index 2259d1267e..0000000000 --- a/src/vnode/detail/src/vnodeStore.spec.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include - -int vnodeInitInfo() { return 0; } - -bool vnodeRemoveDataFileFromLinkFile(char* linkFile, char* de_name) { return true; } - diff --git a/src/vnode/detail/src/vnodeStream.c b/src/vnode/detail/src/vnodeStream.c deleted file mode 100644 index d3f1d0fdc1..0000000000 --- a/src/vnode/detail/src/vnodeStream.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "taosmsg.h" -#include "vnode.h" -#include "vnodeUtil.h" -#include "vnodeStatus.h" - -/* static TAOS *dbConn = NULL; */ -void vnodeCloseStreamCallback(void *param); - -void vnodeProcessStreamRes(void *param, TAOS_RES *tres, TAOS_ROW row) { - SMeterObj *pObj = (SMeterObj *)param; - dTrace("vid:%d sid:%d id:%s, stream result is ready", pObj->vnode, pObj->sid, pObj->meterId); - - // construct data - int32_t contLen = pObj->bytesPerPoint; - char * pTemp = calloc(1, sizeof(SSubmitMsg) + pObj->bytesPerPoint + sizeof(SVMsgHeader)); - SSubmitMsg *pMsg = (SSubmitMsg *)(pTemp + sizeof(SVMsgHeader)); - - pMsg->numOfRows = htons(1); - - char ncharBuf[TSDB_MAX_BYTES_PER_ROW] = {0}; - - int32_t offset = 0; - for (int32_t i = 0; i < pObj->numOfColumns; ++i) { - char *dst = row[i]; - if (dst == NULL) { - setNull(pMsg->payLoad + offset, pObj->schema[i].type, pObj->schema[i].bytes); - } else { - // here, we need to transfer nchar(utf8) to unicode(ucs-4) - if (pObj->schema[i].type == TSDB_DATA_TYPE_NCHAR) { - taosMbsToUcs4(row[i], pObj->schema[i].bytes, ncharBuf, TSDB_MAX_BYTES_PER_ROW); - dst = ncharBuf; - } - - memcpy(pMsg->payLoad + offset, dst, pObj->schema[i].bytes); - } - - offset += pObj->schema[i].bytes; - } - - contLen += sizeof(SSubmitMsg); - - int32_t numOfPoints = 0; - int32_t code = vnodeInsertPoints(pObj, (char *)pMsg, contLen, TSDB_DATA_SOURCE_SHELL, NULL, pObj->sversion, - &numOfPoints, taosGetTimestamp(vnodeList[pObj->vnode].cfg.precision)); - - if (code != TSDB_CODE_SUCCESS) { - dError("vid:%d sid:%d id:%s, failed to insert continuous query results", pObj->vnode, pObj->sid, pObj->meterId); - } - - assert(numOfPoints >= 0 && numOfPoints <= 1); - tfree(pTemp); -} - -static void vnodeGetDBFromMeterId(SMeterObj *pObj, char *db) { - char *st = strstr(pObj->meterId, "."); - char *end = strstr(st + 1, "."); - - memcpy(db, st + 1, end - (st + 1)); -} - -void vnodeOpenStreams(void *param, void *tmrId) { - SVnodeObj *pVnode = (SVnodeObj *)param; - SMeterObj *pObj; - - if (pVnode->streamRole == TSDB_VN_STREAM_STATUS_STOP) return; - if (pVnode->meterList == NULL) return; - - taosTmrStopA(&pVnode->streamTimer); - pVnode->streamTimer = NULL; - - for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) { - pObj = pVnode->meterList[sid]; - if (pObj == NULL || pObj->sqlLen == 0 || vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) continue; - - dTrace("vid:%d sid:%d id:%s, open stream:%s", pObj->vnode, sid, pObj->meterId, pObj->pSql); - - if (pVnode->dbConn == NULL) { - char db[64] = {0}; - char user[64] = {0}; - vnodeGetDBFromMeterId(pObj, db); - sprintf(user, "_%s", pVnode->cfg.acct); - pVnode->dbConn = taos_connect(NULL, user, tsInternalPass, db, 0); - } - - if (pVnode->dbConn == NULL) { - dError("vid:%d, failed to connect to mgmt node", pVnode->vnode); - taosTmrReset(vnodeOpenStreams, 1000, param, vnodeTmrCtrl, &pVnode->streamTimer); - return; - } - - if (pObj->pStream == NULL) { - pObj->pStream = taos_open_stream(pVnode->dbConn, pObj->pSql, vnodeProcessStreamRes, pObj->lastKey, pObj, - vnodeCloseStreamCallback); - if (pObj->pStream) pVnode->numOfStreams++; - } - } -} - -void vnodeCreateStream(SMeterObj *pObj) { - if (pObj->sqlLen <= 0) return; - - SVnodeObj *pVnode = vnodeList + pObj->vnode; - - if (pVnode->streamRole == TSDB_VN_STREAM_STATUS_STOP) return; - if (pObj->pStream) return; - - dTrace("vid:%d sid:%d id:%s stream:%s is created", pObj->vnode, pObj->sid, pObj->meterId, pObj->pSql); - if (pVnode->dbConn == NULL) { - if (pVnode->streamTimer == NULL) taosTmrReset(vnodeOpenStreams, 1000, pVnode, vnodeTmrCtrl, &pVnode->streamTimer); - } else { - pObj->pStream = taos_open_stream(pVnode->dbConn, pObj->pSql, vnodeProcessStreamRes, pObj->lastKey, pObj, - vnodeCloseStreamCallback); - if (pObj->pStream) pVnode->numOfStreams++; - } -} - -// Close only one stream -void vnodeRemoveStream(SMeterObj *pObj) { - SVnodeObj *pVnode = vnodeList + pObj->vnode; - if (pObj->sqlLen <= 0) return; - - if (pObj->pStream) { - taos_close_stream(pObj->pStream); - pVnode->numOfStreams--; - } - - pObj->pStream = NULL; - if (pVnode->numOfStreams == 0) { - taos_close(pVnode->dbConn); - pVnode->dbConn = NULL; - } - - dTrace("vid:%d sid:%d id:%d stream is removed", pObj->vnode, pObj->sid, pObj->meterId); -} - -// Close all streams in a vnode -void vnodeCloseStream(SVnodeObj *pVnode) { - SMeterObj *pObj; - dPrint("vid:%d, stream is closed, old role %s", pVnode->vnode, taosGetVnodeStreamStatusStr(pVnode->streamRole)); - - // stop stream computing - for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) { - pObj = pVnode->meterList[sid]; - if (pObj == NULL) continue; - if (pObj->sqlLen > 0 && pObj->pStream) { - taos_close_stream(pObj->pStream); - pVnode->numOfStreams--; - } - pObj->pStream = NULL; - } -} - -void vnodeUpdateStreamRole(SVnodeObj *pVnode) { - /* SMeterObj *pObj; */ - - int newRole = (pVnode->vnodeStatus == TSDB_VN_STATUS_MASTER) ? TSDB_VN_STREAM_STATUS_START : TSDB_VN_STREAM_STATUS_STOP; - if (newRole != pVnode->streamRole) { - dPrint("vid:%d, stream role is changed from %s to %s", - pVnode->vnode, taosGetVnodeStreamStatusStr(pVnode->streamRole), taosGetVnodeStreamStatusStr(newRole)); - pVnode->streamRole = newRole; - if (newRole == TSDB_VN_STREAM_STATUS_START) { - vnodeOpenStreams(pVnode, NULL); - } else { - vnodeCloseStream(pVnode); - } - } else { - dPrint("vid:%d, stream role is keep to %s", pVnode->vnode, taosGetVnodeStreamStatusStr(pVnode->streamRole)); - } -} - -// Callback function called from client -void vnodeCloseStreamCallback(void *param) { - SMeterObj *pTable = (SMeterObj *)param; - SVnodeObj *pVnode = NULL; - - if (pTable == NULL || pTable->sqlLen == 0) return; - pVnode = vnodeList + pTable->vnode; - - pTable->sqlLen = 0; - pTable->pSql = NULL; - pTable->pStream = NULL; - - pVnode->numOfStreams--; - - if (pVnode->numOfStreams == 0) { - taos_close(pVnode->dbConn); - pVnode->dbConn = NULL; - } - - vnodeSaveMeterObjToFile(pTable); -} \ No newline at end of file diff --git a/src/vnode/detail/src/vnodeSupertableQuery.c b/src/vnode/detail/src/vnodeSupertableQuery.c deleted file mode 100644 index 36cb7ad741..0000000000 --- a/src/vnode/detail/src/vnodeSupertableQuery.c +++ /dev/null @@ -1,874 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "mnode.h" -#include "os.h" -#include "qast.h" -#include "qextbuffer.h" -#include "tschemautil.h" -#include "tsqlfunction.h" - -typedef struct SSyntaxTreeFilterSupporter { - SSchema* pTagSchema; - int32_t numOfTags; - int32_t optr; -} SSyntaxTreeFilterSupporter; - -typedef struct SJoinSupporter { - void** val; - void** pTabObjs; - int32_t size; - - int16_t type; - int16_t colIndex; - - void** qualMeterObj; - int32_t qualSize; -} SJoinSupporter; - -typedef struct SMeterNameFilterSupporter { - SPatternCompareInfo info; - char* pattern; -} SMeterNameFilterSupporter; - -static void tansformQueryResult(tQueryResultset* pRes); -static bool tSkipListNodeFilterCallback(const void *pNode, void *param); - -static int32_t tabObjVGIDComparator(const void* pLeft, const void* pRight) { - STabObj* p1 = *(STabObj**)pLeft; - STabObj* p2 = *(STabObj**)pRight; - - int32_t ret = p1->gid.vgId - p2->gid.vgId; - if (ret == 0) { - return ret; - } else { - return ret > 0 ? 1 : -1; - } -} - -// monotonic inc in memory address -static int32_t tabObjPointerComparator(const void* pLeft, const void* pRight) { - int64_t ret = (*(STabObj**)(pLeft))->uid - (*(STabObj**)(pRight))->uid; - if (ret == 0) { - return 0; - } else { - return ret > 0 ? 1 : -1; - } -} - -static int32_t tabObjResultComparator(const void* p1, const void* p2, void* param) { - tOrderDescriptor* pOrderDesc = (tOrderDescriptor*)param; - - STabObj* pNode1 = (STabObj*)p1; - STabObj* pNode2 = (STabObj*)p2; - - for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfCols; ++i) { - int32_t colIdx = pOrderDesc->orderIdx.pData[i]; - - char* f1 = NULL; - char* f2 = NULL; - - SSchema schema = {0}; - - if (colIdx == -1) { - f1 = pNode1->meterId; - f2 = pNode2->meterId; - schema.type = TSDB_DATA_TYPE_BINARY; - schema.bytes = TSDB_TABLE_ID_LEN; - } else { - f1 = mgmtMeterGetTag(pNode1, colIdx, NULL); - f2 = mgmtMeterGetTag(pNode2, colIdx, &schema); - - SSchema* pSchema = getColumnModelSchema(pOrderDesc->pColumnModel, colIdx); - assert(schema.type == pSchema->type); - } - - int32_t ret = doCompare(f1, f2, schema.type, schema.bytes); - if (ret == 0) { - continue; - } else { - return ret; - } - } - - return 0; -} - -/** - * update the tag order index according to the tags column index. The tags column index needs to be checked one-by-one, - * since the normal columns may be passed to server for handling the group by on status column. - * - * @param pSuperTableMetaMsg - * @param tableIndex - * @param pOrderIndexInfo - * @param numOfTags - */ -static void mgmtUpdateOrderTagColIndex(SMetricMetaMsg* pMetricMetaMsg, int32_t tableIndex, SColumnOrderInfo* pOrderIndexInfo, - int32_t numOfTags) { - SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pSuperTableMetaMsg + pSuperTableMetaMsg->metaElem[tableIndex]); - SColIndexEx* groupColumnList = (SColIndexEx*)((char*)pSuperTableMetaMsg + pElem->groupbyTagColumnList); - - int32_t numOfGroupbyTags = 0; - for (int32_t i = 0; i < pElem->numOfGroupCols; ++i) { - if (groupColumnList[i].flag == TSDB_COL_TAG) { // ignore this column if it is not a tag column. - pOrderIndexInfo->pData[numOfGroupbyTags++] = groupColumnList[i].colIdx; - - assert(groupColumnList[i].colIdx < numOfTags); - } - } - - pOrderIndexInfo->numOfCols = numOfGroupbyTags; -} - -// todo merge sort function with losertree used -void mgmtReorganizeMetersInMetricMeta(SSuperTableMetaMsg* pSuperTableMetaMsg, int32_t tableIndex, tQueryResultset* pRes) { - if (pRes->num <= 0) { // no result, no need to pagination - return; - } - - SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pSuperTableMetaMsg + pSuperTableMetaMsg->metaElem[tableIndex]); - - STabObj* pMetric = mgmtGetTable(pElem->meterId); - SSchema* pTagSchema = (SSchema*)(pMetric->schema + pMetric->numOfColumns * sizeof(SSchema)); - - /* - * To apply the group limitation and group offset, we should sort the result - * list according to the order condition - */ - tOrderDescriptor* descriptor = - (tOrderDescriptor*)calloc(1, sizeof(tOrderDescriptor) + sizeof(int32_t) * pElem->numOfGroupCols); - descriptor->pColumnModel = createColumnModel(pTagSchema, pMetric->numOfTags, 1); - descriptor->orderIdx.numOfCols = pElem->numOfGroupCols; - - int32_t* startPos = NULL; - int32_t numOfSubset = 1; - - mgmtUpdateOrderTagColIndex(pMetricMetaMsg, tableIndex, &descriptor->orderIdx, pMetric->numOfTags); - if (descriptor->orderIdx.numOfCols > 0) { - tQSortEx(pRes->pRes, POINTER_BYTES, 0, pRes->num - 1, descriptor, tabObjResultComparator); - startPos = calculateSubGroup(pRes->pRes, pRes->num, &numOfSubset, descriptor, tabObjResultComparator); - } else { - startPos = malloc(2 * sizeof(int32_t)); - - startPos[0] = 0; - startPos[1] = (int32_t)pRes->num; - } - - /* - * sort the result according to vgid to ensure meters with the same vgid is - * continuous in the result list - */ - qsort(pRes->pRes, (size_t)pRes->num, POINTER_BYTES, tabObjVGIDComparator); - - free(descriptor->pColumnModel); - free(descriptor); - free(startPos); -} - -static void mgmtRetrieveByMeterName(tQueryResultset* pRes, char* str, STabObj* pMetric) { - const char* sep = ","; - char* pToken = NULL; - - int32_t s = 4; // initial size - - pRes->pRes = malloc(sizeof(char*) * s); - pRes->num = 0; - - for (pToken = strsep(&str, sep); pToken != NULL; pToken = strsep(&str, sep)) { - STabObj* pMeterObj = mgmtGetTable(pToken); - if (pMeterObj == NULL) { - mWarn("metric:%s error in metric query expression, invalid meter id:%s", pMetric->meterId, pToken); - continue; - } - - if (pRes->num >= s) { - s += (s >> 1); // increase 50% size - pRes->pRes = realloc(pRes->pRes, sizeof(char*) * s); - } - - /* not a table created from metric, ignore */ - if (pMeterObj->tableType != TSDB_TABLE_TYPE_CHILD_TABLE) { - continue; - } - - /* - * queried meter not belongs to this metric, ignore, metric does not have - * uid, so compare according to meterid - */ - STabObj* parentMetric = mgmtGetTable(pMeterObj->pTagData); - if (strncasecmp(parentMetric->meterId, pMetric->meterId, TSDB_TABLE_ID_LEN) != 0 || - (parentMetric->uid != pMetric->uid)) { - continue; - } - - pRes->pRes[pRes->num++] = pMeterObj; - } -} - -static bool mgmtTablenameFilterCallback(tSkipListNode* pNode, void* param) { - SMeterNameFilterSupporter* pSupporter = (SMeterNameFilterSupporter*)param; - - char name[TSDB_TABLE_ID_LEN] = {0}; - - // pattern compare for meter name - STabObj* pMeterObj = (STabObj*)pNode->pData; - extractTableName(pMeterObj->meterId, name); - - return patternMatch(pSupporter->pattern, name, TSDB_TABLE_ID_LEN, &pSupporter->info) == TSDB_PATTERN_MATCH; -} - -static void mgmtRetrieveFromLikeOptr(tQueryResultset* pRes, const char* str, STabObj* pMetric) { - SPatternCompareInfo info = PATTERN_COMPARE_INFO_INITIALIZER; - SMeterNameFilterSupporter supporter = {info, (char*) str}; - - pRes->num = - tSkipListIterateList(pMetric->pSkipList, (tSkipListNode***)&pRes->pRes, mgmtTablenameFilterCallback, &supporter); -} - -static void mgmtFilterByTableNameCond(tQueryResultset* pRes, char* condStr, int32_t len, STabObj* pMetric) { - pRes->num = 0; - if (len <= 0) { - return; - } - - char* str = calloc(1, (size_t)len + 1); - memcpy(str, condStr, len); - - if (strncasecmp(condStr, QUERY_COND_REL_PREFIX_IN, QUERY_COND_REL_PREFIX_IN_LEN) == 0) { // handle in expression - mgmtRetrieveByMeterName(pRes, str + QUERY_COND_REL_PREFIX_IN_LEN, pMetric); - } else { // handle like expression - assert(strncasecmp(str, QUERY_COND_REL_PREFIX_LIKE, QUERY_COND_REL_PREFIX_LIKE_LEN) == 0); - mgmtRetrieveFromLikeOptr(pRes, str + QUERY_COND_REL_PREFIX_LIKE_LEN, pMetric); - - tansformQueryResult(pRes); - } - - free(str); -} - -UNUSED_FUNC static bool mgmtJoinFilterCallback(tSkipListNode* pNode, void* param) { - SJoinSupporter* pSupporter = (SJoinSupporter*)param; - - SSchema s = {0}; - char* v = mgmtTableGetTag((STabObj*)pNode->pData, pSupporter->colIndex, &s); - - for (int32_t i = 0; i < pSupporter->size; ++i) { - int32_t ret = doCompare(v, pSupporter->val[i], pSupporter->type, s.bytes); - if (ret == 0) { - pSupporter->qualMeterObj[pSupporter->qualSize++] = pSupporter->pTabObjs[i]; - - /* - * Once a value is qualified according to the join condition, it is remove from the - * candidate list, as well as its corresponding meter object. - * - * last one does not need to move forward - */ - if (i < pSupporter->size - 1) { - memmove(pSupporter->val[i], pSupporter->val[i + 1], pSupporter->size - (i + 1)); - } - - pSupporter->size -= 1; - - return true; - } - } - - return false; -} - -static void orderResult(SSuperTableMetaMsg* pSuperTableMetaMsg, tQueryResultset* pRes, int16_t colIndex, int32_t tableIndex) { - SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pSuperTableMetaMsg + pSuperTableMetaMsg->metaElem[tableIndex]); - - tOrderDescriptor* descriptor = - (tOrderDescriptor*)calloc(1, sizeof(tOrderDescriptor) + sizeof(int32_t) * 1); // only one column for join - - STabObj* pMetric = mgmtGetTable(pElem->meterId); - SSchema* pTagSchema = (SSchema*)(pMetric->schema + pMetric->numOfColumns * sizeof(SSchema)); - - descriptor->pColumnModel = createColumnModel(pTagSchema, pMetric->numOfTags, 1); - - descriptor->orderIdx.pData[0] = colIndex; - descriptor->orderIdx.numOfCols = 1; - - // sort results list - tQSortEx(pRes->pRes, POINTER_BYTES, 0, pRes->num - 1, descriptor, tabObjResultComparator); - - free(descriptor->pColumnModel); - free(descriptor); -} - -// check for duplicate join tags -static int32_t mgmtCheckForDuplicateTagValue(tQueryResultset* pRes, int32_t index, int32_t tagCol) { - SSchema s = {0}; - - for (int32_t k = 1; k < pRes[index].num; ++k) { - STabObj* pObj1 = pRes[index].pRes[k - 1]; - STabObj* pObj2 = pRes[index].pRes[k]; - - char* val1 = mgmtTableGetTag(pObj1, tagCol, &s); - char* val2 = mgmtTableGetTag(pObj2, tagCol, NULL); - - if (doCompare(val1, val2, s.type, s.bytes) == 0) { - return TSDB_CODE_DUPLICATE_TAGS; - } - } - - return TSDB_CODE_SUCCESS; -} - -int32_t mgmtDoJoin(SSuperTableMetaMsg* pSuperTableMetaMsg, tQueryResultset* pRes) { - if (pSuperTableMetaMsg->numOfMeters == 1) { - return TSDB_CODE_SUCCESS; - } - - bool allEmpty = false; - for (int32_t i = 0; i < pSuperTableMetaMsg->numOfMeters; ++i) { - if (pRes[i].num == 0) { // all results are empty if one of them is empty - allEmpty = true; - break; - } - } - - if (allEmpty) { - for (int32_t i = 0; i < pSuperTableMetaMsg->numOfMeters; ++i) { - pRes[i].num = 0; - tfree(pRes[i].pRes); - } - - return TSDB_CODE_SUCCESS; - } - - char* cond = (char*)pSuperTableMetaMsg + pSuperTableMetaMsg->join; - - char left[TSDB_TABLE_ID_LEN + 1] = {0}; - strcpy(left, cond); - int16_t leftTagColIndex = *(int16_t*)(cond + TSDB_TABLE_ID_LEN); - - char right[TSDB_TABLE_ID_LEN + 1] = {0}; - strcpy(right, cond + TSDB_TABLE_ID_LEN + sizeof(int16_t)); - int16_t rightTagColIndex = *(int16_t*)(cond + TSDB_TABLE_ID_LEN * 2 + sizeof(int16_t)); - - STabObj* pLeftMetric = mgmtGetTable(left); - STabObj* pRightMetric = mgmtGetTable(right); - - // decide the pRes belongs to - int32_t leftIndex = 0; - int32_t rightIndex = 0; - - for (int32_t i = 0; i < pSuperTableMetaMsg->numOfMeters; ++i) { - STabObj* pObj = (STabObj*)pRes[i].pRes[0]; - STabObj* pMetric1 = mgmtGetTable(pObj->pTagData); - if (pMetric1 == pLeftMetric) { - leftIndex = i; - } else if (pMetric1 == pRightMetric) { - rightIndex = i; - } - } - - orderResult(pSuperTableMetaMsg, &pRes[leftIndex], leftTagColIndex, leftIndex); - orderResult(pSuperTableMetaMsg, &pRes[rightIndex], rightTagColIndex, rightIndex); - - int32_t i = 0; - int32_t j = 0; - - SSchema s = {0}; - int32_t res = 0; - - // check for duplicated tag values - int32_t ret1 = mgmtCheckForDuplicateTagValue(pRes, leftIndex, leftTagColIndex); - int32_t ret2 = mgmtCheckForDuplicateTagValue(pRes, rightIndex, rightTagColIndex); - if (ret1 != TSDB_CODE_SUCCESS || ret2 != TSDB_CODE_SUCCESS) { - return ret1; - } - - while (i < pRes[leftIndex].num && j < pRes[rightIndex].num) { - STabObj* pLeftObj = pRes[leftIndex].pRes[i]; - STabObj* pRightObj = pRes[rightIndex].pRes[j]; - - char* v1 = mgmtTableGetTag(pLeftObj, leftTagColIndex, &s); - char* v2 = mgmtTableGetTag(pRightObj, rightTagColIndex, NULL); - - int32_t ret = doCompare(v1, v2, s.type, s.bytes); - if (ret == 0) { // qualified - pRes[leftIndex].pRes[res] = pRes[leftIndex].pRes[i++]; - pRes[rightIndex].pRes[res] = pRes[rightIndex].pRes[j++]; - - res++; - } else if (ret < 0) { - i++; - } else { - j++; - } - } - - pRes[leftIndex].num = res; - pRes[rightIndex].num = res; - - return TSDB_CODE_SUCCESS; -} - -/** - * convert the result pointer to STabObj instead of tSkipListNode - * @param pRes - */ -static void tansformQueryResult(tQueryResultset* pRes) { - if (pRes == NULL || pRes->num == 0) { - return; - } - - for (int32_t i = 0; i < pRes->num; ++i) { - pRes->pRes[i] = ((tSkipListNode*)(pRes->pRes[i]))->pData; - } -} - -static tQueryResultset* doNestedLoopIntersect(tQueryResultset* pRes1, tQueryResultset* pRes2) { - int32_t num = 0; - void** pResult = pRes1->pRes; - - for (int32_t i = 0; i < pRes1->num; ++i) { - for (int32_t j = 0; j < pRes2->num; ++j) { - if (pRes1->pRes[i] == pRes2->pRes[j]) { - pResult[num++] = pRes1->pRes[i]; - break; - } - } - } - - tQueryResultClean(pRes2); - - memset(pRes1->pRes + num, 0, sizeof(void*) * (pRes1->num - num)); - pRes1->num = num; - - return pRes1; -} - -static tQueryResultset* doSortIntersect(tQueryResultset* pRes1, tQueryResultset* pRes2) { - size_t sizePtr = sizeof(void *); - - qsort(pRes1->pRes, pRes1->num, sizePtr, tabObjPointerComparator); - qsort(pRes2->pRes, pRes2->num, sizePtr, tabObjPointerComparator); - - int32_t i = 0; - int32_t j = 0; - - int32_t num = 0; - while (i < pRes1->num && j < pRes2->num) { - if (pRes1->pRes[i] == pRes2->pRes[j]) { - j++; - pRes1->pRes[num++] = pRes1->pRes[i++]; - } else if (pRes1->pRes[i] < pRes2->pRes[j]) { - i++; - } else { - j++; - } - } - - tQueryResultClean(pRes2); - - memset(pRes1->pRes + num, 0, sizeof(void*) * (pRes1->num - num)); - pRes1->num = num; - return pRes1; -} - -static void queryResultIntersect(tQueryResultset* pFinalRes, tQueryResultset* pRes) { - const int32_t NUM_OF_RES_THRESHOLD = 20; - - // for small result, use nested loop join - if (pFinalRes->num <= NUM_OF_RES_THRESHOLD && pRes->num <= NUM_OF_RES_THRESHOLD) { - doNestedLoopIntersect(pFinalRes, pRes); - } else { // for larger result, sort merge is employed - doSortIntersect(pFinalRes, pRes); - } -} - -static void queryResultUnion(tQueryResultset* pFinalRes, tQueryResultset* pRes) { - if (pRes->num == 0) { - tQueryResultClean(pRes); - return; - } - - int32_t total = pFinalRes->num + pRes->num; - void* tmp = realloc(pFinalRes->pRes, total * POINTER_BYTES); - if (tmp == NULL) { - return; - } - pFinalRes->pRes = tmp; - - memcpy(&pFinalRes->pRes[pFinalRes->num], pRes->pRes, POINTER_BYTES * pRes->num); - qsort(pFinalRes->pRes, total, POINTER_BYTES, tabObjPointerComparator); - - int32_t num = 1; - for (int32_t i = 1; i < total; ++i) { - if (pFinalRes->pRes[i] != pFinalRes->pRes[i - 1]) { - pFinalRes->pRes[num++] = pFinalRes->pRes[i]; - } - } - - if (num < total) { - memset(&pFinalRes->pRes[num], 0, POINTER_BYTES * (total - num)); - } - - pFinalRes->num = num; - - tQueryResultClean(pRes); -} - -static int32_t compareIntVal(const void* pLeft, const void* pRight) { - DEFAULT_COMP(GET_INT64_VAL(pLeft), GET_INT64_VAL(pRight)); -} - -static int32_t compareIntDoubleVal(const void* pLeft, const void* pRight) { - DEFAULT_COMP(GET_INT64_VAL(pLeft), GET_DOUBLE_VAL(pRight)); -} - -static int32_t compareDoubleVal(const void* pLeft, const void* pRight) { - DEFAULT_COMP(GET_DOUBLE_VAL(pLeft), GET_DOUBLE_VAL(pRight)); -} - -static int32_t compareDoubleIntVal(const void* pLeft, const void* pRight) { - double ret = (*(double*)pLeft) - (*(int64_t*)pRight); - if (fabs(ret) < DBL_EPSILON) { - return 0; - } else { - return ret > 0 ? 1 : -1; - } -} - -static int32_t compareStrVal(const void* pLeft, const void* pRight) { - int32_t ret = strcmp(pLeft, pRight); - if (ret == 0) { - return 0; - } else { - return ret > 0 ? 1 : -1; - } -} - -static int32_t compareWStrVal(const void* pLeft, const void* pRight) { - int32_t ret = wcscmp(pLeft, pRight); - if (ret == 0) { - return 0; - } else { - return ret > 0 ? 1 : -1; - } -} - -static int32_t compareStrPatternComp(const void* pLeft, const void* pRight) { - SPatternCompareInfo pInfo = {'%', '_'}; - - const char* pattern = pRight; - const char* str = pLeft; - - int32_t ret = patternMatch(pattern, str, strlen(str), &pInfo); - - return (ret == TSDB_PATTERN_MATCH) ? 0 : 1; -} - -static int32_t compareWStrPatternComp(const void* pLeft, const void* pRight) { - SPatternCompareInfo pInfo = {'%', '_'}; - - const wchar_t* pattern = pRight; - const wchar_t* str = pLeft; - - int32_t ret = WCSPatternMatch(pattern, str, wcslen(str), &pInfo); - - return (ret == TSDB_PATTERN_MATCH) ? 0 : 1; -} - -static __compar_fn_t getFilterComparator(int32_t type, int32_t filterType, int32_t optr) { - __compar_fn_t comparator = NULL; - - switch (type) { - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_BOOL: { - if (filterType >= TSDB_DATA_TYPE_BOOL && filterType <= TSDB_DATA_TYPE_BIGINT) { - comparator = compareIntVal; - } else if (filterType >= TSDB_DATA_TYPE_FLOAT && filterType <= TSDB_DATA_TYPE_DOUBLE) { - comparator = compareIntDoubleVal; - } - break; - } - - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_DOUBLE: { - if (filterType >= TSDB_DATA_TYPE_BOOL && filterType <= TSDB_DATA_TYPE_BIGINT) { - comparator = compareDoubleIntVal; - } else if (filterType >= TSDB_DATA_TYPE_FLOAT && filterType <= TSDB_DATA_TYPE_DOUBLE) { - comparator = compareDoubleVal; - } - break; - } - - case TSDB_DATA_TYPE_BINARY: { - assert(filterType == TSDB_DATA_TYPE_BINARY); - - if (optr == TSDB_RELATION_LIKE) { /* wildcard query using like operator */ - comparator = compareStrPatternComp; - } else { /* normal relational comparator */ - comparator = compareStrVal; - } - - break; - } - - case TSDB_DATA_TYPE_NCHAR: { - assert(filterType == TSDB_DATA_TYPE_NCHAR); - - if (optr == TSDB_RELATION_LIKE) { - comparator = compareWStrPatternComp; - } else { - comparator = compareWStrVal; - } - - break; - } - default: - comparator = compareIntVal; - break; - } - - return comparator; -} - -static void getTagColumnInfo(SSyntaxTreeFilterSupporter* pSupporter, SSchema* pSchema, int32_t* index, - int32_t* offset) { - *index = 0; - *offset = 0; - - // filter on table name(TBNAME) - if (strcasecmp(pSchema->name, TSQL_TBNAME_L) == 0) { - *index = TSDB_TBNAME_COLUMN_INDEX; - *offset = TSDB_TBNAME_COLUMN_INDEX; - return; - } - - while ((*index) < pSupporter->numOfTags) { - if (pSupporter->pTagSchema[*index].bytes == pSchema->bytes && - pSupporter->pTagSchema[*index].type == pSchema->type && - strcmp(pSupporter->pTagSchema[*index].name, pSchema->name) == 0) { - break; - } else { - (*offset) += pSupporter->pTagSchema[(*index)++].bytes; - } - } -} - -void filterPrepare(void* expr, void* param) { - tSQLBinaryExpr *pExpr = (tSQLBinaryExpr*) expr; - if (pExpr->info != NULL) { - return; - } - - int32_t i = 0, offset = 0; - pExpr->info = calloc(1, sizeof(tQueryInfo)); - - tQueryInfo* pInfo = pExpr->info; - SSyntaxTreeFilterSupporter* pSupporter = (SSyntaxTreeFilterSupporter*)param; - - tVariant* pCond = pExpr->pRight->pVal; - SSchema* pSchema = pExpr->pLeft->pSchema; - - getTagColumnInfo(pSupporter, pSchema, &i, &offset); - assert((i >= 0 && i < TSDB_MAX_TAGS) || (i == TSDB_TBNAME_COLUMN_INDEX)); - assert((offset >= 0 && offset < TSDB_MAX_TAGS_LEN) || (offset == TSDB_TBNAME_COLUMN_INDEX)); - - pInfo->sch = *pSchema; - pInfo->colIdx = i; - pInfo->optr = pExpr->nSQLBinaryOptr; - pInfo->offset = offset; - pInfo->compare = getFilterComparator(pSchema->type, pCond->nType, pInfo->optr); - - tVariantAssign(&pInfo->q, pCond); - tVariantTypeSetType(&pInfo->q, pInfo->sch.type); -} - -void tSQLListTraverseDestroyInfo(void* param) { - if (param == NULL) { - return; - } - - tQueryInfo* pInfo = (tQueryInfo*)param; - tVariantDestroy(&(pInfo->q)); - free(param); -} - -static int32_t mgmtFilterMeterByIndex(STabObj* pMetric, tQueryResultset* pRes, char* pCond, int32_t condLen) { - SSchema* pTagSchema = (SSchema*)(pMetric->schema + pMetric->numOfColumns * sizeof(SSchema)); - - tSQLBinaryExpr* pExpr = NULL; - tSQLBinaryExprFromString(&pExpr, pTagSchema, pMetric->numOfTags, pCond, condLen); - - // failed to build expression, no result, return immediately - if (pExpr == NULL) { - mError("metric:%s, no result returned, error in super table query expression:%s", pMetric->meterId, pCond); - tfree(pCond); - - return TSDB_CODE_OPS_NOT_SUPPORT; - } else { // query according to the binary expression - SSyntaxTreeFilterSupporter s = {.pTagSchema = pTagSchema, .numOfTags = pMetric->numOfTags}; - SBinaryFilterSupp supp = {.fp = (__result_filter_fn_t)tSkipListNodeFilterCallback, - .setupInfoFn = (__do_filter_suppl_fn_t)filterPrepare, - .pExtInfo = &s}; - -// tSQLBinaryExprTraverse(pExpr, pMetric->pSkipList, pRes, &supp); - tSQLBinaryExprDestroy(&pExpr, tSQLListTraverseDestroyInfo); - } - - tansformQueryResult(pRes); - - return TSDB_CODE_SUCCESS; -} - -int32_t mgmtRetrieveMetersFromSuperTable(SSuperTableMetaMsg* pMsg, int32_t tableIndex, tQueryResultset* pRes) { - SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pMsg + pMsg->metaElem[tableIndex]); - STabObj* pMetric = mgmtGetTable(pElem->meterId); - char* pCond = NULL; - char* tmpTableNameCond = NULL; - - // no table created in accordance with this metric. - if (pMetric->pSkipList == NULL || pMetric->pSkipList->nSize == 0) { - assert(pMetric->numOfMeters == 0); - return TSDB_CODE_SUCCESS; - } - - char* pQueryCond = (char*)pMsg + pElem->cond; - int32_t condLen = pElem->condLen; - - // transfer the unicode string to mbs binary expression - if (condLen > 0) { - pCond = calloc(1, (condLen + 1) * TSDB_NCHAR_SIZE); - - taosUcs4ToMbs(pQueryCond, condLen * TSDB_NCHAR_SIZE, pCond); - condLen = strlen(pCond) + 1; - mTrace("metric:%s len:%d, metric query condition:%s", pMetric->meterId, condLen, pCond); - } - - char* tablenameCond = (char*)pMsg + pElem->tableCond; - - if (pElem->tableCondLen > 0) { - tmpTableNameCond = calloc(1, pElem->tableCondLen + 1); - strncpy(tmpTableNameCond, tablenameCond, pElem->tableCondLen); - - mTrace("metric:%s rel:%d, len:%d, table name cond:%s", pMetric->meterId, pElem->rel, pElem->tableCondLen, - tmpTableNameCond); - } - - if (pElem->tableCondLen > 0 || condLen > 0) { - mgmtFilterByTableNameCond(pRes, tmpTableNameCond, pElem->tableCondLen, pMetric); - - bool noNextCal = (pRes->num == 0 && pElem->rel == TSDB_RELATION_AND); // no need to calculate next result - - if (!noNextCal && condLen > 0) { - tQueryResultset filterRes = {0}; - - int32_t ret = mgmtFilterMeterByIndex(pMetric, &filterRes, pCond, condLen); - if (ret != TSDB_CODE_SUCCESS) { - tfree(pCond); - tfree(tmpTableNameCond); - - return ret; - } - - // union or intersect of two results - assert(pElem->rel == TSDB_RELATION_AND || pElem->rel == TSDB_RELATION_OR); - - if (pElem->rel == TSDB_RELATION_AND) { - if (filterRes.num == 0 || pRes->num == 0) { // intersect two sets - tQueryResultClean(pRes); - } else { - queryResultIntersect(pRes, &filterRes); - } - } else { // union two sets - queryResultUnion(pRes, &filterRes); - } - - tQueryResultClean(&filterRes); - } - } else { - mTrace("metric:%s retrieve all meter, no query condition", pMetric->meterId); - pRes->num = tSkipListIterateList(pMetric->pSkipList, (tSkipListNode***)&pRes->pRes, NULL, NULL); - tansformQueryResult(pRes); - } - - tfree(pCond); - tfree(tmpTableNameCond); - - mTrace("metric:%s numOfRes:%d", pMetric->meterId, pRes->num); - return TSDB_CODE_SUCCESS; -} - -// todo refactor!!!!! -static char* getTagValueFromMeter(STabObj* pTable, int32_t offset, int32_t len, char* param) { - if (offset == TSDB_TBNAME_COLUMN_INDEX) { - extractTableName(pTable->meterId, param); - } else { - char* tags = pTable->pTagData + offset + TSDB_TABLE_ID_LEN; // tag start position - memcpy(param, tags, len); // make sure the value is null-terminated string - } - - return param; -} - -bool tSkipListNodeFilterCallback(const void* pNode, void* param) { - - tQueryInfo* pInfo = (tQueryInfo*)param; - STabObj* pTable = (STabObj*)(((tSkipListNode*)pNode)->pData); - - char buf[TSDB_MAX_TAGS_LEN] = {0}; - - char* val = getTagValueFromMeter(pTable, pInfo->offset, pInfo->sch.bytes, buf); - int8_t type = pInfo->sch.type; - - int32_t ret = 0; - if (pInfo->q.nType == TSDB_DATA_TYPE_BINARY || pInfo->q.nType == TSDB_DATA_TYPE_NCHAR) { - ret = pInfo->compare(val, pInfo->q.pz); - } else { - tVariant t = {0}; - tVariantCreateFromBinary(&t, val, (uint32_t) pInfo->sch.bytes, type); - - ret = pInfo->compare(&t.i64Key, &pInfo->q.i64Key); - } - - switch (pInfo->optr) { - case TSDB_RELATION_EQUAL: { - return ret == 0; - } - case TSDB_RELATION_NOT_EQUAL: { - return ret != 0; - } - case TSDB_RELATION_LARGE_EQUAL: { - return ret >= 0; - } - case TSDB_RELATION_LARGE: { - return ret > 0; - } - case TSDB_RELATION_LESS_EQUAL: { - return ret <= 0; - } - case TSDB_RELATION_LESS: { - return ret < 0; - } - case TSDB_RELATION_LIKE: { - return ret == 0; - } - - default: - assert(false); - } - return true; -} diff --git a/src/vnode/detail/src/vnodeTagMgmt.c b/src/vnode/detail/src/vnodeTagMgmt.c deleted file mode 100644 index 054a18900c..0000000000 --- a/src/vnode/detail/src/vnodeTagMgmt.c +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "qast.h" -#include "qextbuffer.h" -#include "taosdef.h" -#include "taosmsg.h" -#include "tlog.h" -#include "tutil.h" -#include "vnodeTagMgmt.h" - -#define GET_TAG_VAL_POINTER(s, col, sc, t) ((t *)(&((s)->tags[getColumnModelOffset(sc, col)]))) -#define GET_TAG_VAL(s, col, sc, t) (*GET_TAG_VAL_POINTER(s, col, sc, t)) - -static void tTagsPrints(SMeterSidExtInfo *pMeterInfo, SColumnModel *pSchema, SColumnOrderInfo *pOrder); - -static void tSidSetDisplay(tSidSet *pSets); - -//todo merge with losertree_compar/ext_comp -int32_t doCompare(char* f1, char* f2, int32_t type, int32_t size) { - switch (type) { - case TSDB_DATA_TYPE_INT: DEFAULT_COMP(GET_INT32_VAL(f1), GET_INT32_VAL(f2)); - case TSDB_DATA_TYPE_DOUBLE: DEFAULT_COMP(GET_DOUBLE_VAL(f1), GET_DOUBLE_VAL(f2)); - case TSDB_DATA_TYPE_FLOAT: DEFAULT_COMP(GET_FLOAT_VAL(f1), GET_FLOAT_VAL(f2)); - case TSDB_DATA_TYPE_BIGINT: DEFAULT_COMP(GET_INT64_VAL(f1), GET_INT64_VAL(f2)); - case TSDB_DATA_TYPE_SMALLINT: DEFAULT_COMP(GET_INT16_VAL(f1), GET_INT16_VAL(f2)); - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_BOOL: DEFAULT_COMP(GET_INT8_VAL(f1), GET_INT8_VAL(f2)); - case TSDB_DATA_TYPE_NCHAR: { - int32_t ret = wcsncmp((wchar_t*) f1, (wchar_t*) f2, size/TSDB_NCHAR_SIZE); - if (ret == 0) { - return ret; - } - return (ret < 0) ? -1 : 1; - } - default: { - int32_t ret = strncmp(f1, f2, (size_t)size); - if (ret == 0) { - return ret; - } - - return (ret < 0) ? -1 : 1; - } - } -} - -int32_t meterSidComparator(const void *p1, const void *p2, void *param) { - tOrderDescriptor *pOrderDesc = (tOrderDescriptor *)param; - - SMeterSidExtInfo *s1 = (SMeterSidExtInfo *)p1; - SMeterSidExtInfo *s2 = (SMeterSidExtInfo *)p2; - - for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfCols; ++i) { - int32_t colIdx = pOrderDesc->orderIdx.pData[i]; - - char * f1 = NULL; - char * f2 = NULL; - int32_t type = 0; - int32_t bytes = 0; - - if (colIdx == -1) { - f1 = s1->tags; - f2 = s2->tags; - type = TSDB_DATA_TYPE_BINARY; - bytes = TSDB_METER_NAME_LEN; - } else { - f1 = GET_TAG_VAL_POINTER(s1, colIdx, pOrderDesc->pColumnModel, char); - f2 = GET_TAG_VAL_POINTER(s2, colIdx, pOrderDesc->pColumnModel, char); - SSchema *pSchema = getColumnModelSchema(pOrderDesc->pColumnModel, colIdx); - type = pSchema->type; - bytes = pSchema->bytes; - } - - int32_t ret = doCompare(f1, f2, type, bytes); - if (ret == 0) { - continue; - } else { - return ret; - } - } - - return 0; -} - -static void median(void **pMeterSids, size_t size, int32_t s1, int32_t s2, tOrderDescriptor *pOrderDesc, - __ext_compar_fn_t compareFn) { - int32_t midIdx = ((s2 - s1) >> 1) + s1; - - if (compareFn(pMeterSids[midIdx], pMeterSids[s1], pOrderDesc) == 1) { - tsDataSwap(&pMeterSids[midIdx], &pMeterSids[s1], TSDB_DATA_TYPE_BINARY, size); - } - - if (compareFn(pMeterSids[midIdx], pMeterSids[s2], pOrderDesc) == 1) { - tsDataSwap(&pMeterSids[midIdx], &pMeterSids[s1], TSDB_DATA_TYPE_BINARY, size); - tsDataSwap(&pMeterSids[midIdx], &pMeterSids[s2], TSDB_DATA_TYPE_BINARY, size); - } else if (compareFn(pMeterSids[s1], pMeterSids[s2], pOrderDesc) == 1) { - tsDataSwap(&pMeterSids[s1], &pMeterSids[s2], TSDB_DATA_TYPE_BINARY, size); - } - - assert(compareFn(pMeterSids[midIdx], pMeterSids[s1], pOrderDesc) <= 0 && - compareFn(pMeterSids[s1], pMeterSids[s2], pOrderDesc) <= 0); - -#ifdef _DEBUG_VIEW - tTagsPrints(pMeterSids[s1], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); - tTagsPrints(pMeterSids[midIdx], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); - tTagsPrints(pMeterSids[s2], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); -#endif -} - -static void tInsertSort(void **pMeterSids, size_t size, int32_t startPos, int32_t endPos, void *param, - __ext_compar_fn_t compareFn) { - for (int32_t i = startPos + 1; i <= endPos; ++i) { - for (int32_t j = i; j > startPos; --j) { - if (compareFn(pMeterSids[j], pMeterSids[j - 1], param) == -1) { - tsDataSwap(&pMeterSids[j], &pMeterSids[j - 1], TSDB_DATA_TYPE_BINARY, size); - } else { - break; - } - } - } -} - -void tQSortEx(void **pMeterSids, size_t size, int32_t start, int32_t end, void *param, __ext_compar_fn_t compareFn) { - tOrderDescriptor *pOrderDesc = (tOrderDescriptor *)param; - - // short array sort, incur another sort procedure instead of quick sort process - if (end - start + 1 <= 8) { - tInsertSort(pMeterSids, size, start, end, pOrderDesc, compareFn); - return; - } - - median(pMeterSids, size, start, end, pOrderDesc, compareFn); - - int32_t s = start, e = end; - int32_t endRightS = end, startLeftS = start; - - while (s < e) { - while (e > s) { - int32_t ret = compareFn(pMeterSids[e], pMeterSids[s], pOrderDesc); - if (ret < 0) { - break; - } - - /* - * move the data that equals to pivotal value to the right end of the list - */ - if (ret == 0 && e != endRightS) { - tsDataSwap(&pMeterSids[e], &pMeterSids[endRightS--], TSDB_DATA_TYPE_BINARY, size); - } - - e--; - } - - if (e != s) { - tsDataSwap(&pMeterSids[e], &pMeterSids[s], TSDB_DATA_TYPE_BINARY, size); - } - - while (s < e) { - int32_t ret = compareFn(pMeterSids[s], pMeterSids[e], pOrderDesc); - if (ret > 0) { - break; - } - - if (ret == 0 && s != startLeftS) { - tsDataSwap(&pMeterSids[s], &pMeterSids[startLeftS++], TSDB_DATA_TYPE_BINARY, size); - } - s++; - } - - if (e != s) { - tsDataSwap(&pMeterSids[s], &pMeterSids[e], TSDB_DATA_TYPE_BINARY, size); - } - } - - int32_t rightPartStart = e + 1; - if (endRightS != end && e < end) { - int32_t left = rightPartStart; - int32_t right = end; - - while (right > endRightS && left <= endRightS) { - tsDataSwap(&pMeterSids[left++], &pMeterSids[right--], TSDB_DATA_TYPE_BINARY, size); - } - - rightPartStart += (end - endRightS); - } - - int32_t leftPartEnd = e - 1; - if (startLeftS != end && s > start) { - int32_t left = start; - int32_t right = leftPartEnd; - - while (left < startLeftS && right >= startLeftS) { - tsDataSwap(&pMeterSids[left++], &pMeterSids[right--], TSDB_DATA_TYPE_BINARY, size); - } - - leftPartEnd -= (startLeftS - start); - } - - if (leftPartEnd > start) { - tQSortEx(pMeterSids, size, start, leftPartEnd, pOrderDesc, compareFn); - } - - if (rightPartStart < end) { - tQSortEx(pMeterSids, size, rightPartStart, end, pOrderDesc, compareFn); - } -} - -int32_t *calculateSubGroup(void **pSids, int32_t numOfMeters, int32_t *numOfSubset, tOrderDescriptor *pOrderDesc, - __ext_compar_fn_t compareFn) { - int32_t *starterPos = (int32_t *)malloc((numOfMeters + 1) * sizeof(int32_t)); // add additional buffer - starterPos[0] = 0; - - *numOfSubset = 1; - - for (int32_t i = 1; i < numOfMeters; ++i) { - int32_t ret = compareFn(pSids[i - 1], pSids[i], pOrderDesc); - if (ret != 0) { - assert(ret == -1); - starterPos[(*numOfSubset)++] = i; - } - } - - starterPos[*numOfSubset] = numOfMeters; - assert(*numOfSubset <= numOfMeters); - - return starterPos; -} - -tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOfMeters, SSchema *pSchema, - int32_t numOfTags, SColIndexEx *colList, int32_t numOfCols) { - tSidSet *pSidSet = (tSidSet *)calloc(1, sizeof(tSidSet) + numOfCols * sizeof(int16_t)); - if (pSidSet == NULL) { - return NULL; - } - - pSidSet->numOfSids = numOfMeters; - pSidSet->pSids = pMeterSidExtInfo; - pSidSet->pColumnModel = createColumnModel(pSchema, numOfTags, 1); - pSidSet->orderIdx.numOfCols = numOfCols; - - /* - * in case of "group by tbname,normal_col", the normal_col is ignored - */ - int32_t numOfTagCols = 0; - for(int32_t i = 0; i < numOfCols; ++i) { - if (colList[i].flag == TSDB_COL_TAG) { - pSidSet->orderIdx.pData[numOfTagCols++] = colList[i].colIdx; - } - } - - pSidSet->orderIdx.numOfCols = numOfTagCols; - - pSidSet->starterPos = NULL; - return pSidSet; -} - -void tSidSetDestroy(tSidSet **pSets) { - if ((*pSets) != NULL) { - tfree((*pSets)->starterPos); - tfree((*pSets)->pColumnModel)(*pSets)->pSids = NULL; - tfree(*pSets); - } -} - -void tTagsPrints(SMeterSidExtInfo *pMeterInfo, SColumnModel *pSchema, SColumnOrderInfo *pOrder) { - if (pSchema == NULL) { - return; - } - - printf("sid: %-5d tags(", pMeterInfo->sid); - - for (int32_t i = 0; i < pOrder->numOfCols; ++i) { - int32_t colIndex = pOrder->pData[i]; - - // it is the tbname column - if (colIndex == -1) { - printf("%s, ", pMeterInfo->tags); - continue; - } - - SSchema* s = getColumnModelSchema(pSchema, colIndex); - - switch (s->type) { - case TSDB_DATA_TYPE_INT: - printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int32_t)); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf("%lf, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, double)); - break; - case TSDB_DATA_TYPE_FLOAT: - printf("%f, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, float)); - break; - case TSDB_DATA_TYPE_BIGINT: - printf("%" PRId64 ", ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int64_t)); - break; - case TSDB_DATA_TYPE_SMALLINT: - printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int16_t)); - break; - case TSDB_DATA_TYPE_TINYINT: - printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int8_t)); - break; - case TSDB_DATA_TYPE_BINARY: - printf("%s, ", GET_TAG_VAL_POINTER(pMeterInfo, colIndex, pSchema, char)); - break; - case TSDB_DATA_TYPE_NCHAR: { - char *data = GET_TAG_VAL_POINTER(pMeterInfo, colIndex, pSchema, char); - - char buffer[512] = {0}; - taosUcs4ToMbs(data, s->bytes, buffer); - printf("%s, ", buffer); - break; - } - case TSDB_DATA_TYPE_BOOL: - printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int8_t)); - break; - - default: - assert(false); - } - } - printf(")\n"); -} - -/* - * display all the subset groups for debug purpose only - */ -static void UNUSED_FUNC tSidSetDisplay(tSidSet *pSets) { - printf("%d meters.\n", pSets->numOfSids); - for (int32_t i = 0; i < pSets->numOfSids; ++i) { - printf("%d\t", pSets->pSids[i]->sid); - } - printf("\n"); - - printf("total number of subset group is: %d\n", pSets->numOfSubSet); - for (int32_t i = 0; i < pSets->numOfSubSet; ++i) { - int32_t s = pSets->starterPos[i]; - int32_t e = pSets->starterPos[i + 1]; - - printf("the %d-th subgroup: \n", i + 1); - for (int32_t j = s; j < e; ++j) { - tTagsPrints(pSets->pSids[j], pSets->pColumnModel, &pSets->orderIdx); - } - } -} - -void tSidSetSort(tSidSet *pSets) { - pTrace("number of meters in sort: %d", pSets->numOfSids); - SColumnOrderInfo *pOrderIdx = &pSets->orderIdx; - - if (pOrderIdx->numOfCols == 0 || pSets->numOfSids <= 1 || pSets->pColumnModel == NULL) { // no group by tags clause - pSets->numOfSubSet = 1; - pSets->starterPos = (int32_t *)malloc(sizeof(int32_t) * (pSets->numOfSubSet + 1)); - pSets->starterPos[0] = 0; - pSets->starterPos[1] = pSets->numOfSids; - pTrace("all meters belong to one subgroup, no need to subgrouping ops"); -#ifdef _DEBUG_VIEW - tSidSetDisplay(pSets); -#endif - } else { - tOrderDescriptor *descriptor = - (tOrderDescriptor *)calloc(1, sizeof(tOrderDescriptor) + sizeof(int16_t) * pSets->orderIdx.numOfCols); - descriptor->pColumnModel = pSets->pColumnModel; - descriptor->orderIdx = pSets->orderIdx; - - memcpy(descriptor->orderIdx.pData, pOrderIdx->pData, sizeof(int16_t) * pSets->orderIdx.numOfCols); - - tQSortEx((void **)pSets->pSids, POINTER_BYTES, 0, pSets->numOfSids - 1, descriptor, meterSidComparator); - pSets->starterPos = - calculateSubGroup((void **)pSets->pSids, pSets->numOfSids, &pSets->numOfSubSet, descriptor, meterSidComparator); - -#ifdef _DEBUG_VIEW - tSidSetDisplay(pSets); -#endif - tfree(descriptor); - } -} diff --git a/src/vnode/detail/src/vnodeUtil.c b/src/vnode/detail/src/vnodeUtil.c deleted file mode 100644 index 43c24bae6a..0000000000 --- a/src/vnode/detail/src/vnodeUtil.c +++ /dev/null @@ -1,767 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "qast.h" -#include "tscUtil.h" -#include "tschemautil.h" -#include "vnode.h" -#include "vnodeDataFilterFunc.h" -#include "vnodeStatus.h" -#include "vnodeUtil.h" - -int vnodeCheckFileIntegrity(FILE* fp) { - /* - int savedSessions, savedMeterSize; - - fseek(fp, TSDB_FILE_HEADER_LEN/3, SEEK_SET); - fscanf(fp, "%d %d", &savedSessions, &savedMeterSize); - if ( (savedSessions != tsSessionsPerVnode) || (savedMeterSize != tsMeterSizeOnFile) ) { - dError("file structure is changed"); - return -1; - - } - - uint64_t checkSum = 0, savedCheckSum=0; - checkSum = taosGetCheckSum(fp, TSDB_FILE_HEADER_LEN); - - fseek(fp, TSDB_FILE_HEADER_LEN - cksumsize, SEEK_SET); - fread(&savedCheckSum, cksumsize, 1, fp); - - if ( savedCheckSum != checkSum ) { - dError("check sum is not matched:0x%x 0x%x", checkSum, savedCheckSum); - return -1; - } - */ - return 0; -} - -void vnodeCreateFileHeaderFd(int fd) { - char temp[TSDB_FILE_HEADER_LEN / 4]; - int lineLen; - - lineLen = sizeof(temp); - - // write the first line` - memset(temp, 0, lineLen); - *(int16_t*)temp = vnodeFileVersion; - sprintf(temp + sizeof(int16_t), "tsdb version: %s\n", version); - /* *((int16_t *)(temp + TSDB_FILE_HEADER_LEN/8)) = vnodeFileVersion; */ - lseek(fd, 0, SEEK_SET); - twrite(fd, temp, lineLen); - - // second line - memset(temp, 0, lineLen); - twrite(fd, temp, lineLen); - - // the third/forth line is the dynamic info - memset(temp, 0, lineLen); - twrite(fd, temp, lineLen); - twrite(fd, temp, lineLen); -} - -void vnodeGetHeadFileHeaderInfo(int fd, SVnodeHeadInfo* pHeadInfo) { - lseek(fd, TSDB_FILE_HEADER_LEN / 4, SEEK_SET); - read(fd, pHeadInfo, sizeof(SVnodeHeadInfo)); -} - -void vnodeUpdateHeadFileHeader(int fd, SVnodeHeadInfo* pHeadInfo) { - lseek(fd, TSDB_FILE_HEADER_LEN / 4, SEEK_SET); - twrite(fd, pHeadInfo, sizeof(SVnodeHeadInfo)); -} - -void vnodeCreateFileHeader(FILE* fp) { - char temp[TSDB_FILE_HEADER_LEN / 4]; - int lineLen; - - lineLen = sizeof(temp); - - // write the first line - memset(temp, 0, lineLen); - *(int16_t*)temp = vnodeFileVersion; - sprintf(temp + sizeof(int16_t), "tsdb version: %s\n", version); - /* *((int16_t *)(temp + TSDB_FILE_HEADER_LEN/8)) = vnodeFileVersion; */ - fseek(fp, 0, SEEK_SET); - fwrite(temp, lineLen, 1, fp); - - // second line - memset(temp, 0, lineLen); - fwrite(temp, lineLen, 1, fp); - - // the third line is the dynamic info - memset(temp, 0, lineLen); - fwrite(temp, lineLen, 1, fp); - fwrite(temp, lineLen, 1, fp); -} - -SSqlGroupbyExpr* vnodeCreateGroupbyExpr(SQueryMeterMsg* pQueryMsg, int32_t* code) { - if (pQueryMsg->numOfGroupCols == 0) { - return NULL; - } - - // using group by tag columns - SSqlGroupbyExpr* pGroupbyExpr = - (SSqlGroupbyExpr*)malloc(sizeof(SSqlGroupbyExpr) + pQueryMsg->numOfGroupCols * sizeof(SColIndexEx)); - if (pGroupbyExpr == NULL) { - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - return NULL; - } - - SColIndexEx* pGroupbyColInfo = (SColIndexEx*)pQueryMsg->groupbyTagIds; - - pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols; - pGroupbyExpr->orderType = pQueryMsg->orderType; - pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx; - - memcpy(pGroupbyExpr->columnInfo, pGroupbyColInfo, sizeof(SColIndexEx) * pGroupbyExpr->numOfGroupCols); - - // TODO: update the colIndexInBuf for each column in group by clause - - return pGroupbyExpr; -} - -static SSchema* toSchema(SQueryMeterMsg* pQuery, SColumnInfo* pCols, int32_t numOfCols) { - char* start = (char*)pQuery->colNameList; - char* end = start; - - SSchema* pSchema = calloc(1, sizeof(SSchema) * numOfCols); - for (int32_t i = 0; i < numOfCols; ++i) { - pSchema[i].type = pCols[i].type; - pSchema[i].bytes = pCols[i].bytes; - pSchema[i].colId = pCols[i].colId; - - end = strstr(start, ","); - memcpy(pSchema[i].name, start, end - start); - start = end + 1; - } - - return pSchema; -} - -static int32_t id_compar(const void* left, const void* right) { - DEFAULT_COMP(GET_INT16_VAL(left), GET_INT16_VAL(right)); -} - -static int32_t vnodeBuildExprFromArithmeticStr(SSqlFunctionExpr* pExpr, SQueryMeterMsg* pQueryMsg) { - SSqlBinaryExprInfo* pBinaryExprInfo = &pExpr->pBinExprInfo; - SColumnInfo* pColMsg = pQueryMsg->colList; - - tSQLBinaryExpr* pBinExpr = NULL; - SSchema* pSchema = toSchema(pQueryMsg, pColMsg, pQueryMsg->numOfCols); - - dTrace("qmsg:%p create binary expr from string:%s", pQueryMsg, pExpr->pBase.arg[0].argValue.pz); - tSQLBinaryExprFromString(&pBinExpr, pSchema, pQueryMsg->numOfCols, pExpr->pBase.arg[0].argValue.pz, - pExpr->pBase.arg[0].argBytes); - - if (pBinExpr == NULL) { - dError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pExpr->pBase.arg[0].argValue.pz); - return TSDB_CODE_APP_ERROR; - } - - pBinaryExprInfo->pBinExpr = pBinExpr; - - int32_t num = 0; - int16_t ids[TSDB_MAX_COLUMNS] = {0}; - - tSQLBinaryExprTrv(pBinExpr, &num, ids); - qsort(ids, num, sizeof(int16_t), id_compar); - - int32_t i = 0, j = 0; - - while (i < num && j < num) { - if (ids[i] == ids[j]) { - j++; - } else { - ids[++i] = ids[j++]; - } - } - assert(i <= num); - - // there may be duplicated referenced columns. - num = i + 1; - pBinaryExprInfo->pReqColumns = malloc(sizeof(SColIndexEx) * num); - - for (int32_t k = 0; k < num; ++k) { - SColIndexEx* pColIndex = &pBinaryExprInfo->pReqColumns[k]; - pColIndex->colId = ids[k]; - } - - pBinaryExprInfo->numOfCols = num; - free(pSchema); - - return TSDB_CODE_SUCCESS; -} - -static int32_t getColumnIndexInSource(SQueryMeterMsg* pQueryMsg, SSqlFuncExprMsg* pExprMsg) { - int32_t j = 0; - - while(j < pQueryMsg->numOfCols) { - if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) { - break; - } - - j += 1; - } - - return j; -} - -bool vnodeValidateExprColumnInfo(SQueryMeterMsg* pQueryMsg, SSqlFuncExprMsg* pExprMsg) { - int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg); - return j < pQueryMsg->numOfCols; -} - -SSqlFunctionExpr* vnodeCreateSqlFunctionExpr(SQueryMeterMsg* pQueryMsg, int32_t* code) { - SSqlFunctionExpr* pExprs = (SSqlFunctionExpr*)calloc(1, sizeof(SSqlFunctionExpr) * pQueryMsg->numOfOutputCols); - if (pExprs == NULL) { - tfree(pQueryMsg->pSqlFuncExprs); - - *code = TSDB_CODE_SERV_OUT_OF_MEMORY; - return NULL; - } - - bool isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType); - int16_t tagLen = 0; - - SSchema* pTagSchema = (SSchema*)pQueryMsg->pTagSchema; - for (int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) { - pExprs[i].pBase = *((SSqlFuncExprMsg**)pQueryMsg->pSqlFuncExprs)[i]; - pExprs[i].resBytes = 0; - - int16_t type = 0; - int16_t bytes = 0; - - SColIndexEx* pColumnIndexExInfo = &pExprs[i].pBase.colInfo; - - // tag column schema is kept in pQueryMsg->pColumnModel - if (TSDB_COL_IS_TAG(pColumnIndexExInfo->flag)) { - if (pColumnIndexExInfo->colIdx >= pQueryMsg->numOfTagsCols) { - *code = TSDB_CODE_INVALID_QUERY_MSG; - tfree(pExprs); - return NULL; - } - - type = pTagSchema[pColumnIndexExInfo->colIdx].type; - bytes = pTagSchema[pColumnIndexExInfo->colIdx].bytes; - - } else { // parse the arithmetic expression - if (pExprs[i].pBase.functionId == TSDB_FUNC_ARITHM) { - *code = vnodeBuildExprFromArithmeticStr(&pExprs[i], pQueryMsg); - - if (*code != TSDB_CODE_SUCCESS) { - tfree(pExprs); - return NULL; - } - - type = TSDB_DATA_TYPE_DOUBLE; - bytes = tDataTypeDesc[type].nSize; - } else { // parse the normal column - int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].pBase); - assert(j < pQueryMsg->numOfCols); - - SColumnInfo* pCol = &pQueryMsg->colList[j]; - type = pCol->type; - bytes = pCol->bytes; - } - } - - int32_t param = pExprs[i].pBase.arg[0].argValue.i64; - if (getResultDataInfo(type, bytes, pExprs[i].pBase.functionId, param, &pExprs[i].resType, &pExprs[i].resBytes, - &pExprs[i].interResBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) { - *code = TSDB_CODE_INVALID_QUERY_MSG; - return NULL; - } - - if (pExprs[i].pBase.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].pBase.functionId == TSDB_FUNC_TS_DUMMY) { - tagLen += pExprs[i].resBytes; - } - assert(isValidDataType(pExprs[i].resType, pExprs[i].resBytes)); - } - - //get the correct result size for top/bottom query, according to the number of tags columns in selection clause - - // TODO refactor - for(int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) { - pExprs[i].pBase = *((SSqlFuncExprMsg**)pQueryMsg->pSqlFuncExprs)[i]; - int16_t functId = pExprs[i].pBase.functionId; - if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) { - int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].pBase); - assert(j < pQueryMsg->numOfCols); - - SColumnInfo* pCol = &pQueryMsg->colList[j]; - int16_t type = pCol->type; - int16_t bytes = pCol->bytes; - - int32_t ret = getResultDataInfo(type, bytes, pExprs[i].pBase.functionId, pExprs[i].pBase.arg[0].argValue.i64, - &pExprs[i].resType, &pExprs[i].resBytes, &pExprs[i].interResBytes, tagLen, isSuperTable); - assert(ret == TSDB_CODE_SUCCESS); - } - } - - tfree(pQueryMsg->pSqlFuncExprs); - return pExprs; -} - -bool vnodeIsValidVnodeCfg(SVnodeCfg* pCfg) { - if (pCfg == NULL) return false; - - if (pCfg->maxSessions <= 0 || pCfg->cacheBlockSize <= 0 || pCfg->replications <= 0 || pCfg->replications > 20 || - pCfg->daysPerFile <= 0 || pCfg->daysToKeep <= 0) { - return false; - } - - return true; -} - -/** - * compare if schema of two tables are identical. - * when multi-table query is issued, the schemas of all requested tables - * should be identical. Otherwise,query process will abort. - */ -bool vnodeMeterSchemaIdentical(SColumn* pSchema1, int32_t numOfCols1, SColumn* pSchema2, int32_t numOfCols2) { - if (!VALIDNUMOFCOLS(numOfCols1) || !VALIDNUMOFCOLS(numOfCols2) || numOfCols1 != numOfCols2) { - return false; - } - - return memcmp((char*)pSchema1, (char*)pSchema2, sizeof(SColumn) * numOfCols1) == 0; -} - -void vnodeFreeFields(SQuery* pQuery) { - if (pQuery == NULL || pQuery->pFields == NULL) { - return; - } - - for (int32_t i = 0; i < pQuery->numOfBlocks; ++i) { - tfree(pQuery->pFields[i]); - } - - /* - * pQuery->pFields does not need to be released, it is allocated at the last part of pBlock - * so free(pBlock) can release this memory at the same time. - */ - pQuery->pFields = NULL; - pQuery->numOfBlocks = 0; -} - -void vnodeUpdateFilterColumnIndex(SQuery* pQuery) { - for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) { - for (int16_t j = 0; j < pQuery->numOfCols; ++j) { - if (pQuery->pFilterInfo[i].info.data.colId == pQuery->colList[j].data.colId) { - pQuery->pFilterInfo[i].info.colIdx = pQuery->colList[j].colIdx; - pQuery->pFilterInfo[i].info.colIdxInBuf = pQuery->colList[j].colIdxInBuf; - - // supplementary scan is also require this column - pQuery->colList[j].req[1] = 1; - break; - } - } - } - - // set the column index in buffer for arithmetic operation - if (pQuery->pSelectExpr == NULL) { - return; - } - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - SSqlBinaryExprInfo* pBinExprInfo = &pQuery->pSelectExpr[i].pBinExprInfo; - if (pBinExprInfo->pBinExpr == NULL) { - continue; - } - - for (int16_t j = 0; j < pBinExprInfo->numOfCols; ++j) { - for (int32_t k = 0; k < pQuery->numOfCols; ++k) { - if (pBinExprInfo->pReqColumns[j].colId == pQuery->colList[k].data.colId) { - pBinExprInfo->pReqColumns[j].colIdxInBuf = pQuery->colList[k].colIdxInBuf; - assert(pQuery->colList[k].colIdxInBuf == k); - break; - } - } - } - } -} - -// TODO support k<12 and k<>9 -int32_t vnodeCreateFilterInfo(void* pQInfo, SQuery* pQuery) { - for (int32_t i = 0; i < pQuery->numOfCols; ++i) { - if (pQuery->colList[i].data.numOfFilters > 0) { - pQuery->numOfFilterCols++; - } - } - - if (pQuery->numOfFilterCols == 0) { - return TSDB_CODE_SUCCESS; - } - - pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols); - - for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) { - if (pQuery->colList[i].data.numOfFilters > 0) { - SSingleColumnFilterInfo* pFilterInfo = &pQuery->pFilterInfo[j]; - - memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfoEx)); - pFilterInfo->info.data.filters = NULL; - - pFilterInfo->numOfFilters = pQuery->colList[i].data.numOfFilters; - pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem)); - - for(int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) { - SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f]; - pSingleColFilter->filterInfo = pQuery->colList[i].data.filters[f]; - - int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr; - int32_t upper = pSingleColFilter->filterInfo.upperRelOptr; - - if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) { - dError("QInfo:%p invalid filter info", pQInfo); - return TSDB_CODE_INVALID_QUERY_MSG; - } - - int16_t type = pQuery->colList[i].data.type; - int16_t bytes = pQuery->colList[i].data.bytes; - - __filter_func_t *rangeFilterArray = vnodeGetRangeFilterFuncArray(type); - __filter_func_t *filterArray = vnodeGetValueFilterFuncArray(type); - - if (rangeFilterArray == NULL && filterArray == NULL) { - dError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type); - return TSDB_CODE_INVALID_QUERY_MSG; - } - - if ((lower == TSDB_RELATION_LARGE_EQUAL || lower == TSDB_RELATION_LARGE) && - (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) { - if (lower == TSDB_RELATION_LARGE_EQUAL) { - if (upper == TSDB_RELATION_LESS_EQUAL) { - pSingleColFilter->fp = rangeFilterArray[4]; - } else { - pSingleColFilter->fp = rangeFilterArray[2]; - } - } else { - if (upper == TSDB_RELATION_LESS_EQUAL) { - pSingleColFilter->fp = rangeFilterArray[3]; - } else { - pSingleColFilter->fp = rangeFilterArray[1]; - } - } - } else { // set callback filter function - if (lower != TSDB_RELATION_INVALID) { - pSingleColFilter->fp = filterArray[lower]; - - if (upper != TSDB_RELATION_INVALID) { - dError("pQInfo:%p failed to get filter function, invalid filter condition", pQInfo, type); - return TSDB_CODE_INVALID_QUERY_MSG; - } - } else { - pSingleColFilter->fp = filterArray[upper]; - } - } - assert (pSingleColFilter->fp != NULL); - pSingleColFilter->bytes = bytes; - } - - j++; - } - } - - return TSDB_CODE_SUCCESS; -} - -bool vnodeDoFilterData(SQuery* pQuery, int32_t elemPos) { - for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { - SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k]; - char* pElem = pFilterInfo->pData + pFilterInfo->info.data.bytes * elemPos; - - if(isNull(pElem, pFilterInfo->info.data.type)) { - return false; - } - - int32_t num = pFilterInfo->numOfFilters; - bool qualified = false; - for(int32_t j = 0; j < num; ++j) { - SColumnFilterElem* pFilterElem = &pFilterInfo->pFilters[j]; - if (pFilterElem->fp(pFilterElem, pElem, pElem)) { - qualified = true; - break; - } - } - - if (!qualified) { - return false; - } - } - - return true; -} - -bool vnodeFilterData(SQuery* pQuery, int32_t* numOfActualRead, int32_t index) { - (*numOfActualRead)++; - if (!vnodeDoFilterData(pQuery, index)) { - return false; - } - - if (pQuery->limit.offset > 0) { - pQuery->limit.offset--; // ignore this qualified row - return false; - } - - return true; -} - -bool vnodeIsProjectionQuery(SSqlFunctionExpr* pExpr, int32_t numOfOutput) { - for (int32_t i = 0; i < numOfOutput; ++i) { - if (pExpr[i].pBase.functionId != TSDB_FUNC_PRJ) { - return false; - } - } - - return true; -} - -/* - * the pTable->state may be changed by vnodeIsSafeToDeleteMeter and import/update processor, the check of - * the state will not always be correct. - * - * The import/update/deleting is actually blocked by current query processing if the check of meter state is - * passed, but later queries are denied. - * - * 1. vnodeIsSafeToDelete will wait for this complete, since it also use the vmutex to check the numOfQueries - * 2. import will check the numOfQueries again after setting state to be TSDB_METER_STATE_IMPORTING, while the - * vmutex is also used. - * 3. insert has nothing to do with the query processing. - */ -int32_t vnodeIncQueryRefCount(SQueryMeterMsg* pQueryMsg, SMeterSidExtInfo** pSids, SMeterObj** pMeterObjList, - int32_t* numOfIncTables) { - SVnodeObj* pVnode = &vnodeList[pQueryMsg->vnode]; - - int32_t num = 0; - int32_t index = 0; - - int32_t code = TSDB_CODE_SUCCESS; - - for (int32_t i = 0; i < pQueryMsg->numOfSids; ++i) { - SMeterObj* pTable = pVnode->meterList[pSids[i]->sid]; - - /* - * If table is missing or is in dropping status, config it from management node, and ignore it - * during query processing. The error code of TSDB_CODE_NOT_ACTIVE_TABLE will never return to client. - * The missing table needs to be removed from pSids list - */ - if (pTable == NULL || vnodeIsMeterState(pTable, TSDB_METER_STATE_DROPPING)) { - dWarn("qmsg:%p, vid:%d sid:%d, not there or will be dropped, ignore this table in query", pQueryMsg, - pQueryMsg->vnode, pSids[i]->sid); - - vnodeSendMeterCfgMsg(pQueryMsg->vnode, pSids[i]->sid); - continue; - } else if (pTable->uid != pSids[i]->uid || pTable->sid != pSids[i]->sid) { - code = TSDB_CODE_TABLE_ID_MISMATCH; - dError("qmsg:%p, vid:%d sid:%d id:%s uid:%" PRIu64 ", id mismatch. sid:%d uid:%" PRId64 " in msg", pQueryMsg, - pQueryMsg->vnode, pTable->sid, pTable->meterId, pTable->uid, pSids[i]->sid, pSids[i]->uid); - - vnodeSendMeterCfgMsg(pQueryMsg->vnode, pSids[i]->sid); - continue; - } else if (pTable->state > TSDB_METER_STATE_INSERTING) { //update or import - code = TSDB_CODE_ACTION_IN_PROGRESS; - dTrace("qmsg:%p, vid:%d sid:%d id:%s, it is in state:%s, wait!", pQueryMsg, pQueryMsg->vnode, pSids[i]->sid, - pTable->meterId, taosGetTableStatusStr(pTable->state)); - continue; - } - - /* - * vnodeIsSafeToDeleteMeter will wait for this function complete, and then it can - * check if the numOfQueries is 0 or not. - */ - pMeterObjList[(*numOfIncTables)++] = pTable; - atomic_fetch_add_32(&pTable->numOfQueries, 1); - - pSids[index++] = pSids[i]; - - // output for meter more than one query executed - if (pTable->numOfQueries > 1) { - dTrace("qmsg:%p, vid:%d sid:%d id:%s, inc query ref, numOfQueries:%d", pQueryMsg, pTable->vnode, pTable->sid, - pTable->meterId, pTable->numOfQueries); - num++; - } - } - - dTrace("qmsg:%p, query meters: %d, inc query ref %d, numOfQueries on %d meters are 1, queried meters:%d after " - "filter missing meters", pQueryMsg, pQueryMsg->numOfSids, *numOfIncTables, (*numOfIncTables) - num, index); - - assert(pQueryMsg->numOfSids >= (*numOfIncTables) && pQueryMsg->numOfSids >= index); - - pQueryMsg->numOfSids = index; - return code; -} - -void vnodeDecQueryRefCount(SQueryMeterMsg* pQueryMsg, SMeterObj** pMeterObjList, int32_t numOfIncTables) { - int32_t num = 0; - - for (int32_t i = 0; i < numOfIncTables; ++i) { - SMeterObj* pTable = pMeterObjList[i]; - - if (pTable != NULL) { // here, do not need to lock to perform operations - atomic_fetch_sub_32(&pTable->numOfQueries, 1); - - if (pTable->numOfQueries > 0) { - dTrace("qmsg:%p, vid:%d sid:%d id:%s dec query ref, numOfQueries:%d", pQueryMsg, pTable->vnode, pTable->sid, - pTable->meterId, pTable->numOfQueries); - num++; - } - } - } - - dTrace("qmsg:%p, dec query ref for %d meters, numOfQueries on %d meters are 0", pQueryMsg, numOfIncTables, numOfIncTables - num); -} - -void vnodeUpdateQueryColumnIndex(SQuery* pQuery, SMeterObj* pMeterObj) { - if (pQuery == NULL || pMeterObj == NULL) { - return; - } - - int32_t i = 0, j = 0; - while (i < pQuery->numOfCols && j < pMeterObj->numOfColumns) { - if (pQuery->colList[i].data.colId == pMeterObj->schema[j].colId) { - pQuery->colList[i++].colIdx = (int16_t)j++; - } else if (pQuery->colList[i].data.colId < pMeterObj->schema[j].colId) { - pQuery->colList[i++].colIdx = -1; - } else if (pQuery->colList[i].data.colId > pMeterObj->schema[j].colId) { - j++; - } - } - - while (i < pQuery->numOfCols) { - pQuery->colList[i++].colIdx = -1; // not such column in current meter - } - - // sql expression has not been created yet - if (pQuery->pSelectExpr == NULL) { - return; - } - - for(int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { - SSqlFuncExprMsg* pSqlExprMsg = &pQuery->pSelectExpr[k].pBase; - if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM || pSqlExprMsg->colInfo.flag == TSDB_COL_TAG) { - continue; - } - - SColIndexEx* pColIndexEx = &pSqlExprMsg->colInfo; - for(int32_t f = 0; f < pQuery->numOfCols; ++f) { - if (pColIndexEx->colId == pQuery->colList[f].data.colId) { - pColIndexEx->colIdx = pQuery->colList[f].colIdx; - break; - } - } - } -} - -int32_t vnodeSetMeterState(SMeterObj* pMeterObj, int32_t state) { - return atomic_val_compare_exchange_32(&pMeterObj->state, TSDB_METER_STATE_READY, state); -} - -void vnodeClearMeterState(SMeterObj* pMeterObj, int32_t state) { - pMeterObj->state &= (~state); -} - -bool vnodeIsMeterState(SMeterObj* pMeterObj, int32_t state) { - if (state == TSDB_METER_STATE_READY) { - return pMeterObj->state == TSDB_METER_STATE_READY; - } else if (state == TSDB_METER_STATE_DROPPING) { - return pMeterObj->state >= state; - } else { - return (((pMeterObj->state) & state) == state); - } -} - -void vnodeSetMeterDeleting(SMeterObj* pMeterObj) { - if (pMeterObj == NULL) { - return; - } - - pMeterObj->state |= TSDB_METER_STATE_DROPPING; -} - -int32_t vnodeSetMeterInsertImportStateEx(SMeterObj* pObj, int32_t st) { - int32_t code = TSDB_CODE_SUCCESS; - - int32_t state = vnodeSetMeterState(pObj, st); - if (state != TSDB_METER_STATE_READY) {//return to denote import is not performed - if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) { - dTrace("vid:%d sid:%d id:%s, meter is deleted, state:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->state); - code = TSDB_CODE_NOT_ACTIVE_TABLE; - } else {// waiting for 300ms by default and try again - dTrace("vid:%d sid:%d id:%s, try submit again since in state:%d", pObj->vnode, pObj->sid, - pObj->meterId, pObj->state); - - code = TSDB_CODE_ACTION_IN_PROGRESS; - } - } - - return code; -} - -bool vnodeIsSafeToDeleteMeter(SVnodeObj* pVnode, int32_t sid) { - SMeterObj* pObj = pVnode->meterList[sid]; - - if (pObj == NULL || vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPED)) { - return true; - } - - int32_t prev = vnodeSetMeterState(pObj, TSDB_METER_STATE_DROPPING); - - /* - * if the meter is not in ready/deleting state, it must be in insert/import/update, - * set the deleting state and wait the procedure to be completed - */ - if (prev != TSDB_METER_STATE_READY && prev < TSDB_METER_STATE_DROPPING) { - vnodeSetMeterDeleting(pObj); - - dWarn("vid:%d sid:%d id:%s, can not be deleted, state:%d, wait", pObj->vnode, pObj->sid, pObj->meterId, prev); - return false; - } - - bool ready = true; - - /* - * the query will be stopped ASAP, since the state of meter is set to TSDB_METER_STATE_DROPPING, - * and new query will abort since the meter is deleted. - */ - pthread_mutex_lock(&pVnode->vmutex); - if (pObj->numOfQueries > 0) { - dWarn("vid:%d sid:%d id:%s %d queries executing on it, wait query to be finished", - pObj->vnode, pObj->sid, pObj->meterId, pObj->numOfQueries); - ready = false; - } - pthread_mutex_unlock(&pVnode->vmutex); - - return ready; -} - -void vnodeFreeColumnInfo(SColumnInfo* pColumnInfo) { - if (pColumnInfo == NULL) { - return; - } - - if (pColumnInfo->numOfFilters > 0) { - if (pColumnInfo->type == TSDB_DATA_TYPE_BINARY) { - for (int32_t i = 0; i < pColumnInfo->numOfFilters; ++i) { - tfree(pColumnInfo->filters[i].pz); - pColumnInfo->filters[i].len = 0; - } - } - - tfree(pColumnInfo->filters); - } -} -- GitLab