未验证 提交 38598c5e 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #2784 from taosdata/feature/query

Feature/query
...@@ -31,8 +31,8 @@ extern "C" { ...@@ -31,8 +31,8 @@ extern "C" {
#include "tutil.h" #include "tutil.h"
#include "qExecutor.h" #include "qExecutor.h"
#include "qSqlparser.h"
#include "qTsbuf.h" #include "qTsbuf.h"
#include "qsqlparser.h"
#include "tcmdtype.h" #include "tcmdtype.h"
// forward declaration // forward declaration
......
...@@ -430,7 +430,7 @@ void tscTableMetaCallBack(void *param, TAOS_RES *res, int code) { ...@@ -430,7 +430,7 @@ void tscTableMetaCallBack(void *param, TAOS_RES *res, int code) {
pRes->code = code; pRes->code = code;
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
tscError("%p ge tableMeta failed, code:%s", pSql, tstrerror(code)); tscError("%p get tableMeta failed, code:%s", pSql, tstrerror(code));
goto _error; goto _error;
} else { } else {
tscDebug("%p get tableMeta successfully", pSql); tscDebug("%p get tableMeta successfully", pSql);
......
...@@ -2131,6 +2131,11 @@ static STopBotInfo *getTopBotOutputInfo(SQLFunctionCtx *pCtx) { ...@@ -2131,6 +2131,11 @@ static STopBotInfo *getTopBotOutputInfo(SQLFunctionCtx *pCtx) {
} }
bool topbot_datablock_filter(SQLFunctionCtx *pCtx, int32_t functionId, const char *minval, const char *maxval) { bool topbot_datablock_filter(SQLFunctionCtx *pCtx, int32_t functionId, const char *minval, const char *maxval) {
SResultInfo *pResInfo = GET_RES_INFO(pCtx);
if (pResInfo == NULL) {
return true;
}
STopBotInfo *pTopBotInfo = getTopBotOutputInfo(pCtx); STopBotInfo *pTopBotInfo = getTopBotOutputInfo(pCtx);
// required number of results are not reached, continue load data block // required number of results are not reached, continue load data block
......
...@@ -691,9 +691,15 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr ...@@ -691,9 +691,15 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr
pModel = createColumnModel(pSchema, size, capacity); pModel = createColumnModel(pSchema, size, capacity);
int32_t pg = DEFAULT_PAGE_SIZE;
int32_t overhead = sizeof(tFilePage);
while((pg - overhead) < pModel->rowSize * 2) {
pg *= 2;
}
size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups; size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups;
for (int32_t i = 0; i < numOfSubs; ++i) { for (int32_t i = 0; i < numOfSubs; ++i) {
(*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel); (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pg, pModel);
(*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL; (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL;
} }
......
...@@ -1505,12 +1505,11 @@ static int32_t tscReissueSubquery(SRetrieveSupport *trsupport, SSqlObj *pSql, in ...@@ -1505,12 +1505,11 @@ static int32_t tscReissueSubquery(SRetrieveSupport *trsupport, SSqlObj *pSql, in
SSqlObj *pNew = tscCreateSqlObjForSubquery(trsupport->pParentSql, trsupport, pSql); SSqlObj *pNew = tscCreateSqlObjForSubquery(trsupport->pParentSql, trsupport, pSql);
// todo add to async res or not??
if (pNew == NULL) { if (pNew == NULL) {
tscError("%p sub:%p failed to create new subquery due to out of memory, abort retry, vgId:%d, orderOfSub:%d", tscError("%p sub:%p failed to create new subquery due to error:%s, abort retry, vgId:%d, orderOfSub:%d",
trsupport->pParentSql, pSql, pVgroup->vgId, trsupport->subqueryIndex); trsupport->pParentSql, pSql, tstrerror(terrno), pVgroup->vgId, trsupport->subqueryIndex);
pParentSql->res.code = TSDB_CODE_TSC_OUT_OF_MEMORY; pParentSql->res.code = terrno;
trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY;
return pParentSql->res.code; return pParentSql->res.code;
......
...@@ -148,7 +148,7 @@ void taos_init_imp() { ...@@ -148,7 +148,7 @@ void taos_init_imp() {
refreshTime = refreshTime < 10 ? 10 : refreshTime; refreshTime = refreshTime < 10 ? 10 : refreshTime;
if (tscCacheHandle == NULL) { if (tscCacheHandle == NULL) {
tscCacheHandle = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, NULL, "client"); tscCacheHandle = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, NULL, "tableMeta");
} }
tscDebug("client is initialized successfully"); tscDebug("client is initialized successfully");
......
...@@ -356,9 +356,9 @@ void tscPartiallyFreeSqlObj(SSqlObj* pSql) { ...@@ -356,9 +356,9 @@ void tscPartiallyFreeSqlObj(SSqlObj* pSql) {
// pSql->sqlstr will be used by tscBuildQueryStreamDesc // pSql->sqlstr will be used by tscBuildQueryStreamDesc
if (pObj->signature == pObj) { if (pObj->signature == pObj) {
pthread_mutex_lock(&pObj->mutex); //pthread_mutex_lock(&pObj->mutex);
tfree(pSql->sqlstr); tfree(pSql->sqlstr);
pthread_mutex_unlock(&pObj->mutex); //pthread_mutex_unlock(&pObj->mutex);
} }
tscFreeSqlResult(pSql); tscFreeSqlResult(pSql);
...@@ -1675,6 +1675,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void ...@@ -1675,6 +1675,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
SSqlObj* pNew = (SSqlObj*)calloc(1, sizeof(SSqlObj)); SSqlObj* pNew = (SSqlObj*)calloc(1, sizeof(SSqlObj));
if (pNew == NULL) { if (pNew == NULL) {
tscError("%p new subquery failed, tableIndex:%d", pSql, tableIndex); tscError("%p new subquery failed, tableIndex:%d", pSql, tableIndex);
terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
return NULL; return NULL;
} }
...@@ -1688,6 +1689,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void ...@@ -1688,6 +1689,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex); tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex);
free(pNew); free(pNew);
terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
return NULL; return NULL;
} }
...@@ -1706,6 +1708,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void ...@@ -1706,6 +1708,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
if (tscAddSubqueryInfo(pnCmd) != TSDB_CODE_SUCCESS) { if (tscAddSubqueryInfo(pnCmd) != TSDB_CODE_SUCCESS) {
tscFreeSqlObj(pNew); tscFreeSqlObj(pNew);
terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
return NULL; return NULL;
} }
...@@ -1743,6 +1746,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void ...@@ -1743,6 +1746,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
if (tscAllocPayload(pnCmd, TSDB_DEFAULT_PAYLOAD_SIZE) != TSDB_CODE_SUCCESS) { if (tscAllocPayload(pnCmd, TSDB_DEFAULT_PAYLOAD_SIZE) != TSDB_CODE_SUCCESS) {
tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex); tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex);
tscFreeSqlObj(pNew); tscFreeSqlObj(pNew);
terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
return NULL; return NULL;
} }
...@@ -1827,8 +1831,16 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void ...@@ -1827,8 +1831,16 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
} }
if (pFinalInfo->pTableMeta == NULL) { if (pFinalInfo->pTableMeta == NULL) {
tscError("%p new subquery failed for get tableMeta is NULL from cache", pSql); tscError("%p new subquery failed since no tableMeta in cache, name:%s", pSql, name);
tscFreeSqlObj(pNew); tscFreeSqlObj(pNew);
if (pPrevSql != NULL) {
assert(pPrevSql->res.code != TSDB_CODE_SUCCESS);
terrno = pPrevSql->res.code;
} else {
terrno = TSDB_CODE_TSC_APP_ERROR;
}
return NULL; return NULL;
} }
......
...@@ -49,7 +49,7 @@ static taos_qset readQset; ...@@ -49,7 +49,7 @@ static taos_qset readQset;
int32_t dnodeInitVnodeRead() { int32_t dnodeInitVnodeRead() {
readQset = taosOpenQset(); readQset = taosOpenQset();
readPool.min = 2; readPool.min = tsNumOfCores;
readPool.max = tsNumOfCores * tsNumOfThreadsPerCore; readPool.max = tsNumOfCores * tsNumOfThreadsPerCore;
if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min; if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min;
readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max); readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max);
...@@ -206,10 +206,14 @@ static void *dnodeProcessReadQueue(void *param) { ...@@ -206,10 +206,14 @@ static void *dnodeProcessReadQueue(void *param) {
taosMsg[pReadMsg->rpcMsg.msgType], type); taosMsg[pReadMsg->rpcMsg.msgType], type);
int32_t code = vnodeProcessRead(pVnode, pReadMsg); int32_t code = vnodeProcessRead(pVnode, pReadMsg);
if (type == TAOS_QTYPE_RPC) { if (type == TAOS_QTYPE_RPC && code != TSDB_CODE_QRY_NOT_READY) {
dnodeSendRpcReadRsp(pVnode, pReadMsg, code); dnodeSendRpcReadRsp(pVnode, pReadMsg, code);
} else { } else {
dnodeDispatchNonRspMsg(pVnode, pReadMsg, code); if (code == TSDB_CODE_QRY_HAS_RSP) {
dnodeSendRpcReadRsp(pVnode, pReadMsg, TSDB_CODE_SUCCESS);
} else {
dnodeDispatchNonRspMsg(pVnode, pReadMsg, code);
}
} }
taosFreeQitem(pReadMsg); taosFreeQitem(pReadMsg);
......
...@@ -28,7 +28,7 @@ typedef void* qinfo_t; ...@@ -28,7 +28,7 @@ typedef void* qinfo_t;
* @param qinfo * @param qinfo
* @return * @return
*/ */
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMsg, void* param, qinfo_t* qinfo); int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMsg, qinfo_t* qinfo);
/** /**
...@@ -38,7 +38,10 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMs ...@@ -38,7 +38,10 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMs
* @param qinfo * @param qinfo
* @return * @return
*/ */
void qTableQuery(qinfo_t qinfo); bool qTableQuery(qinfo_t qinfo);
void* pGetRspMsg(qinfo_t qinfo);
/** /**
* Retrieve the produced results information, if current query is not paused or completed, * Retrieve the produced results information, if current query is not paused or completed,
...@@ -48,7 +51,7 @@ void qTableQuery(qinfo_t qinfo); ...@@ -48,7 +51,7 @@ void qTableQuery(qinfo_t qinfo);
* @param qinfo * @param qinfo
* @return * @return
*/ */
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo); int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext);
/** /**
* *
...@@ -60,16 +63,9 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo); ...@@ -60,16 +63,9 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo);
* @param contLen payload length * @param contLen payload length
* @return * @return
*/ */
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen); int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec);
/** void* qGetResultRetrieveMsg(qinfo_t qinfo);
* Decide if more results will be produced or not, NOTE: this function will increase the ref count of QInfo,
* so it can be only called once for each retrieve
*
* @param qinfo
* @return
*/
bool qHasMoreResultsToRetrieve(qinfo_t qinfo);
/** /**
* kill current ongoing query and free query handle automatically * kill current ongoing query and free query handle automatically
......
...@@ -216,6 +216,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_QRY_OUT_OF_MEMORY, 0, 0x0703, "query out ...@@ -216,6 +216,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_QRY_OUT_OF_MEMORY, 0, 0x0703, "query out
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_APP_ERROR, 0, 0x0704, "query app error") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_APP_ERROR, 0, 0x0704, "query app error")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_DUP_JOIN_KEY, 0, 0x0705, "query duplicated join key") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_DUP_JOIN_KEY, 0, 0x0705, "query duplicated join key")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_EXCEED_TAGS_LIMIT, 0, 0x0706, "query tag conditon too many") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_EXCEED_TAGS_LIMIT, 0, 0x0706, "query tag conditon too many")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_NOT_READY, 0, 0x0707, "query not ready")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_HAS_RSP, 0, 0x0708, "query should response")
// grant // grant
TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_EXPIRED, 0, 0x0800, "grant expired") TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_EXPIRED, 0, 0x0800, "grant expired")
......
...@@ -68,7 +68,7 @@ int32_t mnodeInitProfile() { ...@@ -68,7 +68,7 @@ int32_t mnodeInitProfile() {
mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_STREAM, mnodeProcessKillStreamMsg); mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_STREAM, mnodeProcessKillStreamMsg);
mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_CONN, mnodeProcessKillConnectionMsg); mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_CONN, mnodeProcessKillConnectionMsg);
tsMnodeConnCache = taosCacheInit(TSDB_DATA_TYPE_INT, CONN_CHECK_TIME, false, mnodeFreeConn, "conn"); tsMnodeConnCache = taosCacheInit(TSDB_DATA_TYPE_INT, CONN_CHECK_TIME, true, mnodeFreeConn, "conn");
return 0; return 0;
} }
...@@ -119,7 +119,7 @@ SConnObj *mnodeAccquireConn(int32_t connId, char *user, uint32_t ip, uint16_t po ...@@ -119,7 +119,7 @@ SConnObj *mnodeAccquireConn(int32_t connId, char *user, uint32_t ip, uint16_t po
return NULL; return NULL;
} }
if (pConn->ip != ip || pConn->port != port /* || strcmp(pConn->user, user) != 0 */) { if (/* pConn->ip != ip || */ pConn->port != port /* || strcmp(pConn->user, user) != 0 */) {
mError("connId:%d, incoming conn user:%s ip:%s:%u, not match exist conn user:%s ip:%s:%u", connId, user, mError("connId:%d, incoming conn user:%s ip:%s:%u, not match exist conn user:%s ip:%s:%u", connId, user,
taosIpStr(ip), port, pConn->user, taosIpStr(pConn->ip), pConn->port); taosIpStr(ip), port, pConn->user, taosIpStr(pConn->ip), pConn->port);
taosCacheRelease(tsMnodeConnCache, (void **)&pConn, false); taosCacheRelease(tsMnodeConnCache, (void **)&pConn, false);
......
...@@ -58,7 +58,7 @@ static void httpDestroyContext(void *data) { ...@@ -58,7 +58,7 @@ static void httpDestroyContext(void *data) {
} }
bool httpInitContexts() { bool httpInitContexts() {
tsHttpServer.contextCache = taosCacheInit(TSDB_DATA_TYPE_BIGINT, 2, false, httpDestroyContext, "restc"); tsHttpServer.contextCache = taosCacheInit(TSDB_DATA_TYPE_BIGINT, 2, true, httpDestroyContext, "restc");
if (tsHttpServer.contextCache == NULL) { if (tsHttpServer.contextCache == NULL) {
httpError("failed to init context cache"); httpError("failed to init context cache");
return false; return false;
......
...@@ -20,8 +20,8 @@ ...@@ -20,8 +20,8 @@
#include "hash.h" #include "hash.h"
#include "qFill.h" #include "qFill.h"
#include "qResultbuf.h" #include "qResultbuf.h"
#include "qSqlparser.h"
#include "qTsbuf.h" #include "qTsbuf.h"
#include "qsqlparser.h"
#include "query.h" #include "query.h"
#include "taosdef.h" #include "taosdef.h"
#include "tarray.h" #include "tarray.h"
...@@ -43,7 +43,7 @@ typedef struct SSqlGroupbyExpr { ...@@ -43,7 +43,7 @@ typedef struct SSqlGroupbyExpr {
typedef struct SPosInfo { typedef struct SPosInfo {
int32_t pageId; int32_t pageId;
int16_t rowId; int32_t rowId;
} SPosInfo; } SPosInfo;
typedef struct SWindowStatus { typedef struct SWindowStatus {
...@@ -177,13 +177,18 @@ typedef struct SQueryRuntimeEnv { ...@@ -177,13 +177,18 @@ typedef struct SQueryRuntimeEnv {
SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file
} SQueryRuntimeEnv; } SQueryRuntimeEnv;
enum {
QUERY_RESULT_NOT_READY = 1,
QUERY_RESULT_READY = 2,
};
typedef struct SQInfo { typedef struct SQInfo {
void* signature; void* signature;
int32_t pointsInterpo; int32_t pointsInterpo;
int32_t code; // error code to returned to client int32_t code; // error code to returned to client
sem_t dataReady; // sem_t dataReady;
void* tsdb; void* tsdb;
void* param;
int32_t vgId; int32_t vgId;
STableGroupInfo tableGroupInfo; // table id list < only includes the STable list> STableGroupInfo tableGroupInfo; // table id list < only includes the STable list>
STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray<STableQueryInfo*> structure STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray<STableQueryInfo*> structure
...@@ -200,8 +205,11 @@ typedef struct SQInfo { ...@@ -200,8 +205,11 @@ typedef struct SQInfo {
*/ */
int32_t tableIndex; int32_t tableIndex;
int32_t numOfGroupResultPages; int32_t numOfGroupResultPages;
void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables; void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables;
pthread_mutex_t lock; // used to synchronize the rsp/query threads
int32_t dataReady; // denote if query result is ready or not
void* rspContext; // response context
} SQInfo; } SQInfo;
#endif // TDENGINE_QUERYEXECUTOR_H #endif // TDENGINE_QUERYEXECUTOR_H
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
extern "C" { extern "C" {
#endif #endif
#include "os.h" #include "os.h"
#include "taosmsg.h" #include "taosmsg.h"
...@@ -28,9 +27,9 @@ extern "C" { ...@@ -28,9 +27,9 @@ extern "C" {
#include "tdataformat.h" #include "tdataformat.h"
#include "talgo.h" #include "talgo.h"
#define DEFAULT_PAGE_SIZE (1024L*4) // 16k larger than the SHistoInfo #define MAX_TMPFILE_PATH_LENGTH PATH_MAX
#define MAX_TMPFILE_PATH_LENGTH PATH_MAX
#define INITIAL_ALLOCATION_BUFFER_SIZE 64 #define INITIAL_ALLOCATION_BUFFER_SIZE 64
#define DEFAULT_PAGE_SIZE (4096L) // 16k larger than the SHistoInfo
typedef enum EXT_BUFFER_FLUSH_MODEL { typedef enum EXT_BUFFER_FLUSH_MODEL {
/* /*
...@@ -126,7 +125,7 @@ typedef struct tExtMemBuffer { ...@@ -126,7 +125,7 @@ typedef struct tExtMemBuffer {
* @param pModel * @param pModel
* @return * @return
*/ */
tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel); tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, int32_t pagesize, SColumnModel *pModel);
/** /**
* *
......
...@@ -13,50 +13,85 @@ ...@@ -13,50 +13,85 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef TDENGINE_VNODEQUERYUTIL_H #ifndef TDENGINE_QRESULTBUF_H
#define TDENGINE_VNODEQUERYUTIL_H #define TDENGINE_QRESULTBUF_H
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#include <tlist.h>
#include "hash.h" #include "hash.h"
#include "os.h" #include "os.h"
#include "qExtbuffer.h" #include "qExtbuffer.h"
#include "tlockfree.h"
typedef struct SArray* SIDList; typedef struct SArray* SIDList;
typedef struct SPageDiskInfo {
int32_t offset;
int32_t length;
} SPageDiskInfo;
typedef struct SPageInfo {
SListNode* pn; // point to list node
int32_t pageId;
SPageDiskInfo info;
void* pData;
bool used; // set current page is in used
} SPageInfo;
typedef struct SFreeListItem {
int32_t offset;
int32_t len;
} SFreeListItem;
typedef struct SResultBufStatis {
int32_t flushBytes;
int32_t loadBytes;
int32_t getPages;
int32_t releasePages;
int32_t flushPages;
} SResultBufStatis;
typedef struct SDiskbasedResultBuf { typedef struct SDiskbasedResultBuf {
int32_t numOfRowsPerPage; int32_t numOfRowsPerPage;
int32_t numOfPages; int32_t numOfPages;
int64_t totalBufSize; int64_t totalBufSize;
int32_t fd; // data file fd int64_t fileSize; // disk file size
FILE* file;
int32_t allocateId; // allocated page id int32_t allocateId; // allocated page id
int32_t incStep; // minimum allocated pages
void* pBuf; // mmap buffer pointer
char* path; // file path char* path; // file path
int32_t pageSize; // current used page size int32_t pageSize; // current used page size
int32_t inMemPages; // numOfPages that are allocated in memory int32_t inMemPages; // numOfPages that are allocated in memory
SHashObj* idsTable; // id hash table SHashObj* groupSet; // id hash table
SIDList list; // for each id, there is a page id list SHashObj* all;
SList* lruList;
void* iBuf; // inmemory buf
void* handle; // for debug purpose
void* emptyDummyIdList; // dummy id list void* emptyDummyIdList; // dummy id list
void* assistBuf; // assistant buffer for compress/decompress data
SArray* pFree; // free area in file
bool comp; // compressed before flushed to disk
int32_t nextPos; // next page flush position
const void* handle; // for debug purpose
SResultBufStatis statis;
} SDiskbasedResultBuf; } SDiskbasedResultBuf;
#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) #define DEFAULT_INTERN_BUF_PAGE_SIZE (4096L)
#define DEFAULT_INMEM_BUF_PAGES 10 #define DEFAULT_INMEM_BUF_PAGES 10
#define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1}
/** /**
* create disk-based result buffer * create disk-based result buffer
* @param pResultBuf * @param pResultBuf
* @param size
* @param rowSize * @param rowSize
* @param pagesize
* @param inMemPages
* @param handle
* @return * @return
*/ */
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, int32_t pagesize, int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t rowSize, int32_t pagesize,
int32_t inMemPages, void* handle); int32_t inMemBufSize, const void* handle);
/** /**
* *
...@@ -72,7 +107,7 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32 ...@@ -72,7 +107,7 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32
* @param pResultBuf * @param pResultBuf
* @return * @return
*/ */
int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf); size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf);
/** /**
* *
...@@ -88,42 +123,52 @@ SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId); ...@@ -88,42 +123,52 @@ SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId);
* @param id * @param id
* @return * @return
*/ */
static FORCE_INLINE tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id);
if (id < pResultBuf->inMemPages) {
return (tFilePage*) ((char*) pResultBuf->iBuf + id * pResultBuf->pageSize); /**
} else { * release the referenced buf pages
return (tFilePage*) ((char*) pResultBuf->pBuf + (id - pResultBuf->inMemPages) * pResultBuf->pageSize); * @param pResultBuf
} * @param page
} */
void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page);
/**
*
* @param pResultBuf
* @param pi
*/
void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi);
/** /**
* get the total buffer size in the format of disk file * get the total buffer size in the format of disk file
* @param pResultBuf * @param pResultBuf
* @return * @return
*/ */
int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf); size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf);
/** /**
* get the number of groups in the result buffer * get the number of groups in the result buffer
* @param pResultBuf * @param pResultBuf
* @return * @return
*/ */
int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf); size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf);
/** /**
* destroy result buffer * destroy result buffer
* @param pResultBuf * @param pResultBuf
*/ */
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle); void destroyResultBuf(SDiskbasedResultBuf* pResultBuf);
/** /**
* *
* @param pList * @param pList
* @return * @return
*/ */
int32_t getLastPageId(SIDList pList); SPageInfo* getLastPageInfo(SIDList pList);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif // TDENGINE_VNODEQUERYUTIL_H #endif // TDENGINE_QRESULTBUF_H
...@@ -45,13 +45,14 @@ bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot); ...@@ -45,13 +45,14 @@ bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot);
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize); int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize);
static FORCE_INLINE char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) { static FORCE_INLINE char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult,
tFilePage* page) {
assert(pResult != NULL && pRuntimeEnv != NULL); assert(pResult != NULL && pRuntimeEnv != NULL);
SQuery *pQuery = pRuntimeEnv->pQuery; SQuery *pQuery = pRuntimeEnv->pQuery;
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); // tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage + return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
pQuery->pSelectExpr[columnIndex].bytes * realRowId; pQuery->pSelectExpr[columnIndex].bytes * realRowId;
} }
......
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
#include "exception.h" #include "exception.h"
#include "qAst.h" #include "qAst.h"
#include "qSqlparser.h"
#include "qSyntaxtreefunction.h" #include "qSyntaxtreefunction.h"
#include "qsqlparser.h"
#include "taosdef.h" #include "taosdef.h"
#include "taosmsg.h" #include "taosmsg.h"
#include "tarray.h" #include "tarray.h"
......
...@@ -221,7 +221,7 @@ void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) { ...@@ -221,7 +221,7 @@ void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
} }
static int32_t getGroupResultId(int32_t groupIndex) { static int32_t getGroupResultId(int32_t groupIndex) {
int32_t base = 200000; int32_t base = 20000000;
return base + (groupIndex * 10000); return base + (groupIndex * 10000);
} }
...@@ -478,10 +478,14 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult ...@@ -478,10 +478,14 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult
if (taosArrayGetSize(list) == 0) { if (taosArrayGetSize(list) == 0) {
pData = getNewDataBuf(pResultBuf, sid, &pageId); pData = getNewDataBuf(pResultBuf, sid, &pageId);
} else { } else {
pageId = getLastPageId(list); SPageInfo* pi = getLastPageInfo(list);
pData = getResBufPage(pResultBuf, pageId); pData = getResBufPage(pResultBuf, pi->pageId);
pageId = pi->pageId;
if (pData->num >= numOfRowsPerPage) { if (pData->num >= numOfRowsPerPage) {
// release current page first, and prepare the next one
releaseResBufPageInfo(pResultBuf, pi);
pData = getNewDataBuf(pResultBuf, sid, &pageId); pData = getNewDataBuf(pResultBuf, sid, &pageId);
if (pData != NULL) { if (pData != NULL) {
assert(pData->num == 0); // number of elements must be 0 for new allocated buffer assert(pData->num == 0); // number of elements must be 0 for new allocated buffer
...@@ -497,6 +501,8 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult ...@@ -497,6 +501,8 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult
if (pWindowRes->pos.pageId == -1) { // not allocated yet, allocate new buffer if (pWindowRes->pos.pageId == -1) { // not allocated yet, allocate new buffer
pWindowRes->pos.pageId = pageId; pWindowRes->pos.pageId = pageId;
pWindowRes->pos.rowId = pData->num++; pWindowRes->pos.rowId = pData->num++;
assert(pWindowRes->pos.pageId >= 0);
} }
return 0; return 0;
...@@ -1490,8 +1496,6 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order ...@@ -1490,8 +1496,6 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order
goto _clean; goto _clean;
} }
qDebug("QInfo:%p setup runtime env1", GET_QINFO_ADDR(pRuntimeEnv));
pRuntimeEnv->offset[0] = 0; pRuntimeEnv->offset[0] = 0;
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base; SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
...@@ -1536,8 +1540,6 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order ...@@ -1536,8 +1540,6 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order
} }
} }
qDebug("QInfo:%p setup runtime env2", GET_QINFO_ADDR(pRuntimeEnv));
// set the order information for top/bottom query // set the order information for top/bottom query
int32_t functionId = pCtx->functionId; int32_t functionId = pCtx->functionId;
...@@ -1558,25 +1560,19 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order ...@@ -1558,25 +1560,19 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order
} }
} }
qDebug("QInfo:%p setup runtime env3", GET_QINFO_ADDR(pRuntimeEnv));
char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput; char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
// set the intermediate result output buffer // set the intermediate result output buffer
setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf); setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
qDebug("QInfo:%p setup runtime env4", GET_QINFO_ADDR(pRuntimeEnv));
// if it is group by normal column, do not set output buffer, the output buffer is pResult // if it is group by normal column, do not set output buffer, the output buffer is pResult
if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) { if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
resetCtxOutputBuf(pRuntimeEnv); resetCtxOutputBuf(pRuntimeEnv);
} }
qDebug("QInfo:%p setup runtime env5", GET_QINFO_ADDR(pRuntimeEnv));
setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx); setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
qDebug("QInfo:%p init completed", GET_QINFO_ADDR(pRuntimeEnv)); qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
_clean: _clean:
...@@ -1615,7 +1611,7 @@ static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) { ...@@ -1615,7 +1611,7 @@ static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo); pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo); destroyResultBuf(pRuntimeEnv->pResultBuf);
tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle); tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle); tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
...@@ -2111,9 +2107,6 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, ...@@ -2111,9 +2107,6 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle,
} }
if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) { if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
#if defined(_DEBUG_VIEW)
qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
#endif
// current block has been discard due to filter applied // current block has been discard due to filter applied
pRuntimeEnv->summary.discardBlocks += 1; pRuntimeEnv->summary.discardBlocks += 1;
qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv), qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
...@@ -2446,6 +2439,8 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes ...@@ -2446,6 +2439,8 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes
SQuery * pQuery = pRuntimeEnv->pQuery; SQuery * pQuery = pRuntimeEnv->pQuery;
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
int32_t functionId = pQuery->pSelectExpr[i].base.functionId; int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
if (!mergeFlag) { if (!mergeFlag) {
...@@ -2458,7 +2453,7 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes ...@@ -2458,7 +2453,7 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes
pCtx[i].hasNull = true; pCtx[i].hasNull = true;
pCtx[i].nStartQueryTimestamp = timestamp; pCtx[i].nStartQueryTimestamp = timestamp;
pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes); pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
// in case of tag column, the tag information should be extracted from input buffer // in case of tag column, the tag information should be extracted from input buffer
if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) { if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
...@@ -2615,14 +2610,16 @@ int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) ...@@ -2615,14 +2610,16 @@ int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param)
SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo; SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos); SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1); char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
TSKEY leftTimestamp = GET_INT64_VAL(b1); TSKEY leftTimestamp = GET_INT64_VAL(b1);
SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo; SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos); SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2); char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
TSKEY rightTimestamp = GET_INT64_VAL(b2); TSKEY rightTimestamp = GET_INT64_VAL(b2);
if (leftTimestamp == rightTimestamp) { if (leftTimestamp == rightTimestamp) {
...@@ -2685,35 +2682,26 @@ void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) { ...@@ -2685,35 +2682,26 @@ void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
int32_t id = getGroupResultId(pQInfo->groupIndex - 1); int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id); SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
int32_t total = 0;
int32_t size = taosArrayGetSize(list); int32_t size = taosArrayGetSize(list);
for (int32_t i = 0; i < size; ++i) {
int32_t* pgId = taosArrayGet(list, i);
tFilePage *pData = getResBufPage(pResultBuf, *pgId);
total += pData->num;
}
int32_t rows = total;
int32_t offset = 0; int32_t offset = 0;
for (int32_t j = 0; j < size; ++j) { for (int32_t j = 0; j < size; ++j) {
int32_t* pgId = taosArrayGet(list, j); SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
tFilePage *pData = getResBufPage(pResultBuf, *pgId); tFilePage *pData = getResBufPage(pResultBuf, pi->pageId);
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
char * pDest = pQuery->sdata[i]->data; char * pDest = pQuery->sdata[i]->data;
memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num, bytes * pData->num);
memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
bytes * pData->num);
} }
// rows += pData->num;
offset += pData->num; offset += pData->num;
} }
assert(pQuery->rec.rows == 0); assert(pQuery->rec.rows == 0);
pQuery->rec.rows += rows; pQuery->rec.rows += offset;
pQInfo->offset += 1; pQInfo->offset += 1;
} }
...@@ -2777,7 +2765,6 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) { ...@@ -2777,7 +2765,6 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
assert(pQInfo->numOfGroupResultPages == 0); assert(pQInfo->numOfGroupResultPages == 0);
return 0; return 0;
} else if (numOfTables == 1) { // no need to merge results since only one table in each group } else if (numOfTables == 1) { // no need to merge results since only one table in each group
} }
SCompSupporter cs = {pTableList, posList, pQInfo}; SCompSupporter cs = {pTableList, posList, pQInfo};
...@@ -2802,8 +2789,9 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) { ...@@ -2802,8 +2789,9 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo; SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]); SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes); char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
TSKEY ts = GET_INT64_VAL(b); TSKEY ts = GET_INT64_VAL(b);
assert(ts == pWindowRes->window.skey); assert(ts == pWindowRes->window.skey);
...@@ -3517,9 +3505,11 @@ void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult ...@@ -3517,9 +3505,11 @@ void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult
SQuery *pQuery = pRuntimeEnv->pQuery; SQuery *pQuery = pRuntimeEnv->pQuery;
// Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
int32_t functionId = pQuery->pSelectExpr[i].base.functionId; int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
...@@ -3542,6 +3532,8 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult * ...@@ -3542,6 +3532,8 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *
SQuery *pQuery = pRuntimeEnv->pQuery; SQuery *pQuery = pRuntimeEnv->pQuery;
// Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
...@@ -3550,7 +3542,7 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult * ...@@ -3550,7 +3542,7 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *
continue; continue;
} }
pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
pCtx->currentStage = 0; pCtx->currentStage = 0;
int32_t functionId = pCtx->functionId; int32_t functionId = pCtx->functionId;
...@@ -3713,11 +3705,13 @@ static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_ ...@@ -3713,11 +3705,13 @@ static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_
pQInfo->groupIndex += 1; pQInfo->groupIndex += 1;
} }
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) { for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
int32_t size = pRuntimeEnv->pCtx[j].outputBytes; int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
char *out = pQuery->sdata[j]->data + numOfResult * size; char *out = pQuery->sdata[j]->data + numOfResult * size;
char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]); char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
memcpy(out, in + oldOffset * size, size * numOfRowsToCopy); memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
} }
...@@ -4238,10 +4232,10 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo ...@@ -4238,10 +4232,10 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo
int32_t ps = DEFAULT_PAGE_SIZE; int32_t ps = DEFAULT_PAGE_SIZE;
int32_t rowsize = 0; int32_t rowsize = 0;
getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize); getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
int32_t TWOMB = 1024*1024*2;
if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) { if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
int32_t numOfPages = getInitialPageNum(pQInfo); code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, numOfPages, rowsize, ps, numOfPages, pQInfo);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
return code; return code;
} }
...@@ -4269,8 +4263,7 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo ...@@ -4269,8 +4263,7 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo
} else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) { } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
int32_t numOfResultRows = getInitialPageNum(pQInfo); int32_t numOfResultRows = getInitialPageNum(pQInfo);
getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize); getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, numOfResultRows, rowsize, ps, numOfResultRows, pQInfo);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
return code; return code;
} }
...@@ -5894,16 +5887,11 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, ...@@ -5894,16 +5887,11 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList,
} }
pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo)); pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
pQInfo->dataReady = QUERY_RESULT_NOT_READY;
pthread_mutex_init(&pQInfo->lock, NULL);
pQuery->pos = -1; pQuery->pos = -1;
pQuery->window = pQueryMsg->window; pQuery->window = pQueryMsg->window;
if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
int32_t code = TAOS_SYSTEM_ERROR(errno);
qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
goto _cleanup;
}
colIdCheck(pQuery); colIdCheck(pQuery);
qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo); qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
...@@ -5945,7 +5933,7 @@ static bool isValidQInfo(void *param) { ...@@ -5945,7 +5933,7 @@ static bool isValidQInfo(void *param) {
return (sig == (uint64_t)pQInfo); return (sig == (uint64_t)pQInfo);
} }
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param) { static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
int32_t code = TSDB_CODE_SUCCESS; int32_t code = TSDB_CODE_SUCCESS;
SQuery *pQuery = pQInfo->runtimeEnv.pQuery; SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
...@@ -5965,18 +5953,12 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ ...@@ -5965,18 +5953,12 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ
pQuery->window.ekey, pQuery->order.order); pQuery->window.ekey, pQuery->order.order);
setQueryStatus(pQuery, QUERY_COMPLETED); setQueryStatus(pQuery, QUERY_COMPLETED);
pQInfo->tableqinfoGroupInfo.numOfTables = 0; pQInfo->tableqinfoGroupInfo.numOfTables = 0;
sem_post(&pQInfo->dataReady);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
pQInfo->param = param;
if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) { if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo); qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
setQueryStatus(pQuery, QUERY_COMPLETED); setQueryStatus(pQuery, QUERY_COMPLETED);
sem_post(&pQInfo->dataReady);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
...@@ -6018,7 +6000,6 @@ static void freeQInfo(SQInfo *pQInfo) { ...@@ -6018,7 +6000,6 @@ static void freeQInfo(SQInfo *pQInfo) {
tfree(pQuery->sdata[col]); tfree(pQuery->sdata[col]);
} }
sem_destroy(&(pQInfo->dataReady));
teardownQueryRuntimeEnv(&pQInfo->runtimeEnv); teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) { for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
...@@ -6170,7 +6151,7 @@ typedef struct SQueryMgmt { ...@@ -6170,7 +6151,7 @@ typedef struct SQueryMgmt {
pthread_mutex_t lock; pthread_mutex_t lock;
} SQueryMgmt; } SQueryMgmt;
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, qinfo_t* pQInfo) { int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
assert(pQueryMsg != NULL && tsdb != NULL); assert(pQueryMsg != NULL && tsdb != NULL);
int32_t code = TSDB_CODE_SUCCESS; int32_t code = TSDB_CODE_SUCCESS;
...@@ -6266,7 +6247,7 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, vo ...@@ -6266,7 +6247,7 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, vo
goto _over; goto _over;
} }
code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param); code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
_over: _over:
free(tagCond); free(tagCond);
...@@ -6306,24 +6287,32 @@ void qDestroyQueryInfo(qinfo_t qHandle) { ...@@ -6306,24 +6287,32 @@ void qDestroyQueryInfo(qinfo_t qHandle) {
freeQInfo(pQInfo); freeQInfo(pQInfo);
} }
void qTableQuery(qinfo_t qinfo) { static void setQueryResultReady(SQInfo* pQInfo) {
pthread_mutex_lock(&pQInfo->lock);
pQInfo->dataReady = QUERY_RESULT_READY;
pthread_mutex_unlock(&pQInfo->lock);
}
bool qTableQuery(qinfo_t qinfo) {
SQInfo *pQInfo = (SQInfo *)qinfo; SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || pQInfo->signature != pQInfo) { if (pQInfo == NULL || pQInfo->signature != pQInfo) {
qDebug("QInfo:%p has been freed, no need to execute", pQInfo); qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
return; return false;
} }
if (IS_QUERY_KILLED(pQInfo)) { if (IS_QUERY_KILLED(pQInfo)) {
qDebug("QInfo:%p it is already killed, abort", pQInfo); qDebug("QInfo:%p it is already killed, abort", pQInfo);
sem_post(&pQInfo->dataReady); setQueryResultReady(pQInfo);
return; return false;
} }
if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) { if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
setQueryResultReady(pQInfo);
qDebug("QInfo:%p no table exists for query, abort", pQInfo); qDebug("QInfo:%p no table exists for query, abort", pQInfo);
sem_post(&pQInfo->dataReady); return false;
return;
} }
// error occurs, record the error code and return to client // error occurs, record the error code and return to client
...@@ -6331,8 +6320,9 @@ void qTableQuery(qinfo_t qinfo) { ...@@ -6331,8 +6320,9 @@ void qTableQuery(qinfo_t qinfo) {
if (ret != TSDB_CODE_SUCCESS) { if (ret != TSDB_CODE_SUCCESS) {
pQInfo->code = ret; pQInfo->code = ret;
qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code)); qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
sem_post(&pQInfo->dataReady);
return; setQueryResultReady(pQInfo);
return false;
} }
qDebug("QInfo:%p query task is launched", pQInfo); qDebug("QInfo:%p query task is launched", pQInfo);
...@@ -6357,10 +6347,20 @@ void qTableQuery(qinfo_t qinfo) { ...@@ -6357,10 +6347,20 @@ void qTableQuery(qinfo_t qinfo) {
pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows); pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
} }
sem_post(&pQInfo->dataReady); bool buildRes = false;
pthread_mutex_lock(&pQInfo->lock);
pQInfo->dataReady = QUERY_RESULT_READY;
if (pQInfo->rspContext != NULL) {
buildRes = true;
}
pthread_mutex_unlock(&pQInfo->lock);
return buildRes;
} }
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) { int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
SQInfo *pQInfo = (SQInfo *)qinfo; SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || !isValidQInfo(pQInfo)) { if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
...@@ -6373,11 +6373,21 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) { ...@@ -6373,11 +6373,21 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
return pQInfo->code; return pQInfo->code;
} }
sem_wait(&pQInfo->dataReady); int32_t code = TSDB_CODE_SUCCESS;
qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows, pthread_mutex_lock(&pQInfo->lock);
pQInfo->code); if (pQInfo->dataReady == QUERY_RESULT_READY) {
*buildRes = true;
qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
pQInfo->code);
} else {
*buildRes = false;
qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
pQInfo->rspContext = pRspContext;
}
return pQInfo->code; code = pQInfo->code;
pthread_mutex_unlock(&pQInfo->lock);
return code;
} }
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
...@@ -6389,6 +6399,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { ...@@ -6389,6 +6399,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
} }
SQuery *pQuery = pQInfo->runtimeEnv.pQuery; SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
bool ret = false; bool ret = false;
if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) { if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
ret = false; ret = false;
...@@ -6407,7 +6418,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { ...@@ -6407,7 +6418,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
return ret; return ret;
} }
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) { int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
SQInfo *pQInfo = (SQInfo *)qinfo; SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || !isValidQInfo(pQInfo)) { if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
...@@ -6417,8 +6428,10 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co ...@@ -6417,8 +6428,10 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co
SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
SQuery *pQuery = pQInfo->runtimeEnv.pQuery; SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
size_t size = getResultSize(pQInfo, &pQuery->rec.rows); size_t size = getResultSize(pQInfo, &pQuery->rec.rows);
size += sizeof(int32_t); size += sizeof(int32_t);
size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo); size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
*contLen = size + sizeof(SRetrieveTableRsp); *contLen = size + sizeof(SRetrieveTableRsp);
// todo proper handle failed to allocate memory, // todo proper handle failed to allocate memory,
...@@ -6427,6 +6440,7 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co ...@@ -6427,6 +6440,7 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co
if (*pRsp == NULL) { if (*pRsp == NULL) {
return TSDB_CODE_QRY_OUT_OF_MEMORY; return TSDB_CODE_QRY_OUT_OF_MEMORY;
} }
(*pRsp)->numOfRows = htonl(pQuery->rec.rows); (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
int32_t code = pQInfo->code; int32_t code = pQInfo->code;
...@@ -6434,8 +6448,8 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co ...@@ -6434,8 +6448,8 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co
(*pRsp)->offset = htobe64(pQuery->limit.offset); (*pRsp)->offset = htobe64(pQuery->limit.offset);
(*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime); (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
} else { } else {
(*pRsp)->offset = 0;
(*pRsp)->useconds = 0; (*pRsp)->useconds = 0;
(*pRsp)->offset = 0;
} }
(*pRsp)->precision = htons(pQuery->precision); (*pRsp)->precision = htons(pQuery->precision);
...@@ -6446,10 +6460,20 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co ...@@ -6446,10 +6460,20 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co
code = pQInfo->code; code = pQInfo->code;
} }
pQInfo->rspContext = NULL;
pQInfo->dataReady = QUERY_RESULT_NOT_READY;
if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) { if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
(*pRsp)->completed = 1; // notify no more result to client (*pRsp)->completed = 1; // notify no more result to client
} }
if (qHasMoreResultsToRetrieve(pQInfo)) {
*continueExec = true;
} else { // failed to dump result, free qhandle immediately
*continueExec = false;
qKillQuery(pQInfo);
}
return code; return code;
} }
...@@ -6460,7 +6484,7 @@ int32_t qKillQuery(qinfo_t qinfo) { ...@@ -6460,7 +6484,7 @@ int32_t qKillQuery(qinfo_t qinfo) {
return TSDB_CODE_QRY_INVALID_QHANDLE; return TSDB_CODE_QRY_INVALID_QHANDLE;
} }
sem_post(&pQInfo->dataReady); // sem_post(&pQInfo->dataReady);
setQueryKilled(pQInfo); setQueryKilled(pQInfo);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
...@@ -6607,6 +6631,13 @@ static void buildTagQueryResult(SQInfo* pQInfo) { ...@@ -6607,6 +6631,13 @@ static void buildTagQueryResult(SQInfo* pQInfo) {
setQueryStatus(pQuery, QUERY_COMPLETED); setQueryStatus(pQuery, QUERY_COMPLETED);
} }
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
SQInfo* pQInfo = (SQInfo*) qinfo;
assert(pQInfo != NULL);
return pQInfo->rspContext;
}
void freeqinfoFn(void *qhandle) { void freeqinfoFn(void *qhandle) {
void** handle = qhandle; void** handle = qhandle;
if (handle == NULL || *handle == NULL) { if (handle == NULL || *handle == NULL) {
...@@ -6618,19 +6649,21 @@ void freeqinfoFn(void *qhandle) { ...@@ -6618,19 +6649,21 @@ void freeqinfoFn(void *qhandle) {
} }
void* qOpenQueryMgmt(int32_t vgId) { void* qOpenQueryMgmt(int32_t vgId) {
const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
char cacheName[128] = {0}; char cacheName[128] = {0};
sprintf(cacheName, "qhandle_%d", vgId); sprintf(cacheName, "qhandle_%d", vgId);
SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt)); SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
pQueryMgmt->closed = false;
pQueryMgmt->vgId = vgId;
pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName); pthread_mutex_init(&pQueryMgmt->lock, NULL);
pQueryHandle->closed = false;
pthread_mutex_init(&pQueryHandle->lock, NULL);
qDebug("vgId:%d, open querymgmt success", vgId); qDebug("vgId:%d, open querymgmt success", vgId);
return pQueryHandle; return pQueryMgmt;
} }
static void queryMgmtKillQueryFn(void* handle) { static void queryMgmtKillQueryFn(void* handle) {
...@@ -6670,7 +6703,7 @@ void qCleanupQueryMgmt(void* pQMgmt) { ...@@ -6670,7 +6703,7 @@ void qCleanupQueryMgmt(void* pQMgmt) {
pthread_mutex_destroy(&pQueryMgmt->lock); pthread_mutex_destroy(&pQueryMgmt->lock);
tfree(pQueryMgmt); tfree(pQueryMgmt);
qDebug("vgId:%d querymgmt cleanup completed", vgId); qDebug("vgId:%d queryMgmt cleanup completed", vgId);
} }
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) { void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
...@@ -6727,3 +6760,4 @@ void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) { ...@@ -6727,3 +6760,4 @@ void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
return 0; return 0;
} }
...@@ -28,10 +28,10 @@ ...@@ -28,10 +28,10 @@
/* /*
* SColumnModel is deeply copy * SColumnModel is deeply copy
*/ */
tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel) { tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, int32_t pagesize, SColumnModel *pModel) {
tExtMemBuffer* pMemBuffer = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer)); tExtMemBuffer* pMemBuffer = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer));
pMemBuffer->pageSize = DEFAULT_PAGE_SIZE; pMemBuffer->pageSize = pagesize;
pMemBuffer->inMemCapacity = ALIGN8(inMemSize) / pMemBuffer->pageSize; pMemBuffer->inMemCapacity = ALIGN8(inMemSize) / pMemBuffer->pageSize;
pMemBuffer->nElemSize = elemSize; pMemBuffer->nElemSize = elemSize;
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
*/ */
#include "os.h" #include "os.h"
#include "qsqlparser.h" #include "qSqlparser.h"
#include "queryLog.h" #include "queryLog.h"
#include "taosdef.h" #include "taosdef.h"
#include "taosmsg.h" #include "taosmsg.h"
......
...@@ -535,7 +535,7 @@ void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) { ...@@ -535,7 +535,7 @@ void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) {
if (pSeg->pBuffer[slotIdx] == NULL) { if (pSeg->pBuffer[slotIdx] == NULL) {
pSeg->pBuffer[slotIdx] = createExtMemBuffer(pBucket->numOfTotalPages * pBucket->pageSize, pBucket->nElemSize, pSeg->pBuffer[slotIdx] = createExtMemBuffer(pBucket->numOfTotalPages * pBucket->pageSize, pBucket->nElemSize,
pBucket->pOrderDesc->pColumnModel); pBucket->pageSize, pBucket->pOrderDesc->pColumnModel);
pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL; pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL;
pBucket->pOrderDesc->pColumnModel->capacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage; pBucket->pOrderDesc->pColumnModel->capacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage;
} }
......
#include "qResultbuf.h" #include "qResultbuf.h"
#include "stddef.h"
#include "tscompression.h"
#include "hash.h" #include "hash.h"
#include "qExtbuffer.h" #include "qExtbuffer.h"
#include "queryLog.h" #include "queryLog.h"
#include "taoserror.h" #include "taoserror.h"
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, #define GET_DATA_PAYLOAD(_p) ((_p)->pData + POINTER_BYTES)
int32_t pagesize, int32_t inMemPages, void* handle) {
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t rowSize, int32_t pagesize,
int32_t inMemBufSize, const void* handle) {
*pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf)); *pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf));
SDiskbasedResultBuf* pResBuf = *pResultBuf; SDiskbasedResultBuf* pResBuf = *pResultBuf;
if (pResBuf == NULL) { if (pResBuf == NULL) {
return TSDB_CODE_COM_OUT_OF_MEMORY; return TSDB_CODE_COM_OUT_OF_MEMORY;
} }
pResBuf->pageSize = pagesize; pResBuf->pageSize = pagesize;
pResBuf->numOfPages = inMemPages; // all pages are in buffer in the first place pResBuf->numOfPages = 0; // all pages are in buffer in the first place
pResBuf->inMemPages = inMemPages; pResBuf->totalBufSize = 0;
assert(inMemPages <= numOfPages); pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit.
pResBuf->allocateId = -1;
pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize; pResBuf->comp = true;
pResBuf->file = NULL;
pResBuf->handle = handle;
pResBuf->fileSize = 0;
pResBuf->totalBufSize = pResBuf->numOfPages * pagesize; // at least more than 2 pages must be in memory
pResBuf->incStep = 4; assert(inMemBufSize >= pagesize * 2);
pResBuf->allocateId = -1;
pResBuf->iBuf = calloc(pResBuf->inMemPages, pResBuf->pageSize); pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize;
pResBuf->lruList = tdListNew(POINTER_BYTES);
// init id hash table // init id hash table
pResBuf->idsTable = taosHashInit(numOfPages, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
pResBuf->list = taosArrayInit(numOfPages, POINTER_BYTES); pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES
pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
char path[PATH_MAX] = {0}; char path[PATH_MAX] = {0};
getTmpfilePath("tsdb_qbuf", path); getTmpfilePath("qbuf", path);
pResBuf->path = strdup(path); pResBuf->path = strdup(path);
pResBuf->fd = FD_INITIALIZER;
pResBuf->pBuf = NULL;
pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t));
qDebug("QInfo:%p create resBuf for output, page size:%d, initial pages:%d, %" PRId64 "bytes", handle, qDebug("QInfo:%p create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", handle, pResBuf->pageSize,
pResBuf->pageSize, pResBuf->numOfPages, pResBuf->totalBufSize); pResBuf->inMemPages, pResBuf->path);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->idsTable); } static int32_t createDiskFile(SDiskbasedResultBuf* pResultBuf) {
pResultBuf->file = fopen(pResultBuf->path, "wb+");
int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; } if (pResultBuf->file == NULL) {
#define NUM_OF_PAGES_ON_DISK(_r) ((_r)->numOfPages - (_r)->inMemPages)
#define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize)
static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) {
pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR | O_TRUNC, 0666);
if (!FD_VALID(pResultBuf->fd)) {
qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno));
return TAOS_SYSTEM_ERROR(errno); return TAOS_SYSTEM_ERROR(errno);
} }
assert(pResultBuf->numOfPages == pResultBuf->inMemPages); return TSDB_CODE_SUCCESS;
pResultBuf->numOfPages += pResultBuf->incStep; }
int32_t ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing
if (ret != TSDB_CODE_SUCCESS) { if (!pResultBuf->comp) {
qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); *dst = srcSize;
return TAOS_SYSTEM_ERROR(errno); return data;
} }
pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); *dst = tsCompressString(data, srcSize, 1, pResultBuf->assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0);
if (pResultBuf->pBuf == MAP_FAILED) {
qError("QInfo:%p failed to map temp file: %s. %s", pResultBuf->handle, pResultBuf->path, strerror(errno)); memcpy(data, pResultBuf->assistBuf, *dst);
return TAOS_SYSTEM_ERROR(errno); return data;
}
static char* doDecompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing
if (!pResultBuf->comp) {
*dst = srcSize;
return data;
} }
pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize; *dst = tsDecompressString(data, srcSize, 1, pResultBuf->assistBuf, pResultBuf->pageSize, ONE_STAGE_COMP, NULL, 0);
return TSDB_CODE_SUCCESS;
memcpy(data, pResultBuf->assistBuf, *dst);
return data;
} }
static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNumOfPages) { static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t size) {
assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize); if (pResultBuf->pFree == NULL) {
int32_t ret = TSDB_CODE_SUCCESS; return pResultBuf->nextPos;
} else {
int32_t offset = -1;
size_t num = taosArrayGetSize(pResultBuf->pFree);
for(int32_t i = 0; i < num; ++i) {
SFreeListItem* pi = taosArrayGet(pResultBuf->pFree, i);
if (pi->len >= size) {
offset = pi->offset;
pi->offset += size;
pi->len -= size;
return offset;
}
}
// no available recycle space, allocate new area in file
return pResultBuf->nextPos;
}
}
static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
assert(!pg->used && pg->pData != NULL);
int32_t size = -1;
char* t = doCompressData(GET_DATA_PAYLOAD(pg), pResultBuf->pageSize, &size, pResultBuf);
// this page is flushed to disk for the first time
if (pg->info.offset == -1) {
pg->info.offset = allocatePositionInFile(pResultBuf, size);
pResultBuf->nextPos += size;
if (pResultBuf->pBuf == NULL) { fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
assert(pResultBuf->fd == FD_INITIALIZER); /*int32_t ret =*/ fwrite(t, 1, size, pResultBuf->file);
if ((ret = createDiskResidesBuf(pResultBuf)) != TSDB_CODE_SUCCESS) { if (pResultBuf->fileSize < pg->info.offset + pg->info.length) {
return ret; pResultBuf->fileSize = pg->info.offset + pg->info.length;
} }
} else { } else {
ret = munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf)); // length becomes greater, current space is not enough, allocate new place, otherwise, do nothing
pResultBuf->numOfPages += incNumOfPages; if (pg->info.length < size) {
// 1. add current space to free list
/* taosArrayPush(pResultBuf->pFree, &pg->info);
* disk-based output buffer is exhausted, try to extend the disk-based buffer, the available disk space may
* be insufficient // 2. allocate new position, and update the info
*/ pg->info.offset = allocatePositionInFile(pResultBuf, size);
ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); pResultBuf->nextPos += size;
if (ret != TSDB_CODE_SUCCESS) {
// dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile,
// strerror(errno));
return TSDB_CODE_QRY_NO_DISKSPACE;
} }
pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize; //3. write to disk.
pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
fwrite(t, size, 1, pResultBuf->file);
if (pResultBuf->pBuf == MAP_FAILED) { if (pResultBuf->fileSize < pg->info.offset + pg->info.length) {
// dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); pResultBuf->fileSize = pg->info.offset + pg->info.length;
return TSDB_CODE_QRY_OUT_OF_MEMORY;
} }
} }
return TSDB_CODE_SUCCESS; char* ret = pg->pData;
memset(ret, 0, pResultBuf->pageSize);
pg->pData = NULL;
pg->info.length = size;
pResultBuf->statis.flushBytes += pg->info.length;
return ret;
} }
#define NO_AVAILABLE_PAGES(_b) ((_b)->allocateId == (_b)->numOfPages - 1) static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
int32_t ret = TSDB_CODE_SUCCESS;
assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages);
static FORCE_INLINE int32_t getGroupIndex(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { if (pResultBuf->file == NULL) {
assert(pResultBuf != NULL); if ((ret = createDiskFile(pResultBuf)) != TSDB_CODE_SUCCESS) {
terrno = ret;
return NULL;
}
}
return doFlushPageToDisk(pResultBuf, pg);
}
// load file block data in disk
static char* loadPageFromDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
ret = fread(GET_DATA_PAYLOAD(pg), 1, pg->info.length, pResultBuf->file);
if (ret != pg->info.length) {
terrno = errno;
return NULL;
}
pResultBuf->statis.loadBytes += pg->info.length;
char* p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); int32_t fullSize = 0;
doDecompressData(GET_DATA_PAYLOAD(pg), pg->info.length, &fullSize, pResultBuf);
return GET_DATA_PAYLOAD(pg);
}
#define NO_AVAILABLE_PAGES(_b) ((_b)->numOfPages >= (_b)->inMemPages)
static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) {
assert(taosHashGet(pResultBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL);
SArray* pa = taosArrayInit(1, POINTER_BYTES);
int32_t ret = taosHashPut(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES);
assert(ret == 0);
return pa;
}
static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) {
SIDList list = NULL;
char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t));
if (p == NULL) { // it is a new group id if (p == NULL) { // it is a new group id
return -1; list = addNewGroup(pResultBuf, groupId);
} else {
list = (SIDList) (*p);
} }
int32_t slot = GET_INT32_VAL(p); pResultBuf->numOfPages += 1;
assert(slot >= 0 && slot < taosHashGetSize(pResultBuf->idsTable));
return slot; SPageInfo* ppi = malloc(sizeof(SPageInfo));//{ .info = PAGE_INFO_INITIALIZER, .pageId = pageId, .pn = NULL};
ppi->info = PAGE_INFO_INITIALIZER;
ppi->pageId = pageId;
ppi->pData = NULL;
ppi->pn = NULL;
ppi->used = true;
return *(SPageInfo**) taosArrayPush(list, &ppi);
} }
static int32_t addNewGroupId(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { static SListNode* getEldestUnrefedPage(SDiskbasedResultBuf* pResultBuf) {
int32_t num = getNumOfResultBufGroupId(pResultBuf); // the num is the newest allocated group id slot SListIter iter = {0};
taosHashPut(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &num, sizeof(int32_t)); tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD);
SListNode* pn = NULL;
while((pn = tdListNext(&iter)) != NULL) {
assert(pn != NULL);
SPageInfo* pageInfo = *(SPageInfo**) pn->data;
assert(pageInfo->pageId >= 0 && pageInfo->pn == pn);
SArray* pa = taosArrayInit(1, sizeof(int32_t)); if (!pageInfo->used) {
taosArrayPush(pResultBuf->list, &pa); break;
}
}
assert(taosArrayGetSize(pResultBuf->list) == taosHashGetSize(pResultBuf->idsTable)); return pn;
return num;
} }
static void registerPageId(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { static char* evicOneDataPage(SDiskbasedResultBuf* pResultBuf) {
int32_t slot = getGroupIndex(pResultBuf, groupId); char* bufPage = NULL;
if (slot < 0) { SListNode* pn = getEldestUnrefedPage(pResultBuf);
slot = addNewGroupId(pResultBuf, groupId);
// all pages are referenced by user, try to allocate new space
if (pn == NULL) {
int32_t prev = pResultBuf->inMemPages;
pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5;
qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev,
pResultBuf->inMemPages, pResultBuf->pageSize);
} else {
pResultBuf->statis.flushPages += 1;
tdListPopNode(pResultBuf->lruList, pn);
SPageInfo* d = *(SPageInfo**) pn->data;
assert(d->pn == pn);
d->pn = NULL;
tfree(pn);
bufPage = flushPageToDisk(pResultBuf, d);
} }
SIDList pList = taosArrayGetP(pResultBuf->list, slot); return bufPage;
taosArrayPush(pList, &pageId); }
static void lruListPushFront(SList *pList, SPageInfo* pi) {
tdListPrepend(pList, &pi);
SListNode* front = tdListGetHead(pList);
pi->pn = front;
}
static void lruListMoveToFront(SList *pList, SPageInfo* pi) {
tdListPopNode(pList, pi->pn);
tdListPrependNode(pList, pi->pn);
} }
tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) {
pResultBuf->statis.getPages += 1;
char* availablePage = NULL;
if (NO_AVAILABLE_PAGES(pResultBuf)) { if (NO_AVAILABLE_PAGES(pResultBuf)) {
if (extendDiskFileSize(pResultBuf, pResultBuf->incStep) != TSDB_CODE_SUCCESS) { availablePage = evicOneDataPage(pResultBuf);
return NULL;
}
} }
// register new id in this group // register new id in this group
*pageId = (++pResultBuf->allocateId); *pageId = (++pResultBuf->allocateId);
registerPageId(pResultBuf, groupId, *pageId);
// clear memory for the new page // register page id info
tFilePage* page = getResBufPage(pResultBuf, *pageId); SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId);
memset(page, 0, pResultBuf->pageSize);
// add to LRU list
return page; assert(listNEles(pResultBuf->lruList) < pResultBuf->inMemPages && pResultBuf->inMemPages > 0);
lruListPushFront(pResultBuf->lruList, pi);
// add to hash map
taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES);
// allocate buf
if (availablePage == NULL) {
pi->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES);
} else {
pi->pData = availablePage;
}
pResultBuf->totalBufSize += pResultBuf->pageSize;
((void**)pi->pData)[0] = pi;
pi->used = true;
return GET_DATA_PAYLOAD(pi);
}
tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) {
assert(pResultBuf != NULL && id >= 0);
pResultBuf->statis.getPages += 1;
SPageInfo** pi = taosHashGet(pResultBuf->all, &id, sizeof(int32_t));
assert(pi != NULL && *pi != NULL);
if ((*pi)->pData != NULL) { // it is in memory
// no need to update the LRU list if only one page exists
if (pResultBuf->numOfPages == 1) {
(*pi)->used = true;
return GET_DATA_PAYLOAD(*pi);
}
SPageInfo** pInfo = (SPageInfo**) ((*pi)->pn->data);
assert(*pInfo == *pi);
lruListMoveToFront(pResultBuf->lruList, (*pi));
(*pi)->used = true;
return GET_DATA_PAYLOAD(*pi);
} else { // not in memory
assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0);
char* availablePage = NULL;
if (NO_AVAILABLE_PAGES(pResultBuf)) {
availablePage = evicOneDataPage(pResultBuf);
}
if (availablePage == NULL) {
(*pi)->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES);
} else {
(*pi)->pData = availablePage;
}
((void**)((*pi)->pData))[0] = (*pi);
lruListPushFront(pResultBuf->lruList, *pi);
loadPageFromDisk(pResultBuf, *pi);
return GET_DATA_PAYLOAD(*pi);
}
} }
int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) {
assert(pResultBuf != NULL && page != NULL);
char* p = (char*) page - POINTER_BYTES;
SPageInfo* ppi = ((SPageInfo**) p)[0];
releaseResBufPageInfo(pResultBuf, ppi);
}
void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi) {
assert(pi->pData != NULL && pi->used);
pi->used = false;
pResultBuf->statis.releasePages += 1;
}
size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; }
size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->groupSet); }
size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; }
SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) {
int32_t slot = getGroupIndex(pResultBuf, groupId); assert(pResultBuf != NULL);
if (slot < 0) {
char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t));
if (p == NULL) { // it is a new group id
return pResultBuf->emptyDummyIdList; return pResultBuf->emptyDummyIdList;
} else { } else {
return taosArrayGetP(pResultBuf->list, slot); return (SArray*) (*p);
} }
} }
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) {
if (pResultBuf == NULL) { if (pResultBuf == NULL) {
return; return;
} }
if (FD_VALID(pResultBuf->fd)) { if (pResultBuf->file != NULL) {
qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file created:%s, file size:%d", handle, qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file size:%"PRId64" bytes",
pResultBuf->totalBufSize, pResultBuf->path, FILE_SIZE_ON_DISK(pResultBuf)); pResultBuf->handle, pResultBuf->totalBufSize, pResultBuf->fileSize);
close(pResultBuf->fd); fclose(pResultBuf->file);
munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf));
pResultBuf->pBuf = NULL;
} else { } else {
qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", handle, qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", pResultBuf->handle,
pResultBuf->totalBufSize); pResultBuf->totalBufSize);
} }
unlink(pResultBuf->path); unlink(pResultBuf->path);
tfree(pResultBuf->path); tfree(pResultBuf->path);
size_t size = taosArrayGetSize(pResultBuf->list); SHashMutableIterator* iter = taosHashCreateIter(pResultBuf->groupSet);
for (int32_t i = 0; i < size; ++i) { while(taosHashIterNext(iter)) {
SArray* pa = taosArrayGetP(pResultBuf->list, i); SArray** p = (SArray**) taosHashIterGet(iter);
taosArrayDestroy(pa); size_t n = taosArrayGetSize(*p);
for(int32_t i = 0; i < n; ++i) {
SPageInfo* pi = taosArrayGetP(*p, i);
tfree(pi->pData);
tfree(pi);
}
taosArrayDestroy(*p);
} }
taosArrayDestroy(pResultBuf->list); taosHashDestroyIter(iter);
tdListFree(pResultBuf->lruList);
taosArrayDestroy(pResultBuf->emptyDummyIdList); taosArrayDestroy(pResultBuf->emptyDummyIdList);
taosHashCleanup(pResultBuf->idsTable); taosHashCleanup(pResultBuf->groupSet);
taosHashCleanup(pResultBuf->all);
tfree(pResultBuf->iBuf); tfree(pResultBuf->assistBuf);
tfree(pResultBuf); tfree(pResultBuf);
} }
int32_t getLastPageId(SIDList pList) { SPageInfo* getLastPageInfo(SIDList pList) {
size_t size = taosArrayGetSize(pList); size_t size = taosArrayGetSize(pList);
return *(int32_t*) taosArrayGet(pList, size - 1); return (SPageInfo*) taosArrayGetP(pList, size - 1);
} }
...@@ -236,11 +236,13 @@ void clearTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindow ...@@ -236,11 +236,13 @@ void clearTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindow
if (pWindowRes == NULL) { if (pWindowRes == NULL) {
return; return;
} }
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutput; ++i) { for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutput; ++i) {
SResultInfo *pResultInfo = &pWindowRes->resultInfo[i]; SResultInfo *pResultInfo = &pWindowRes->resultInfo[i];
char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes); char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
size_t size = pRuntimeEnv->pQuery->pSelectExpr[i].bytes; size_t size = pRuntimeEnv->pQuery->pSelectExpr[i].bytes;
memset(s, 0, size); memset(s, 0, size);
...@@ -277,8 +279,11 @@ void copyTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *dst, con ...@@ -277,8 +279,11 @@ void copyTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *dst, con
memcpy(pDst->interResultBuf, pSrc->interResultBuf, pDst->bufLen); memcpy(pDst->interResultBuf, pSrc->interResultBuf, pDst->bufLen);
// copy the output buffer data from src to dst, the position info keep unchanged // copy the output buffer data from src to dst, the position info keep unchanged
char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst); tFilePage *dstpage = getResBufPage(pRuntimeEnv->pResultBuf, dst->pos.pageId);
char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src); char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst, dstpage);
tFilePage *srcpage = getResBufPage(pRuntimeEnv->pResultBuf, src->pos.pageId);
char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src, srcpage);
size_t s = pRuntimeEnv->pQuery->pSelectExpr[i].bytes; size_t s = pRuntimeEnv->pQuery->pSelectExpr[i].bytes;
memcpy(dstBuf, srcBuf, s); memcpy(dstBuf, srcBuf, s);
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
#include <stdbool.h> #include <stdbool.h>
#include "qsqlparser.h" #include "qSqlparser.h"
#include "tcmdtype.h" #include "tcmdtype.h"
#include "tstoken.h" #include "tstoken.h"
#include "ttokendef.h" #include "ttokendef.h"
......
...@@ -18,17 +18,144 @@ void simpleTest() { ...@@ -18,17 +18,144 @@ void simpleTest() {
tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId);
ASSERT_TRUE(pBufPage != NULL); ASSERT_TRUE(pBufPage != NULL);
ASSERT_EQ(getNumOfRowsPerPage(pResultBuf), (16384L - sizeof(int64_t))/64); ASSERT_EQ(getResBufSize(pResultBuf), 1024);
ASSERT_EQ(getResBufSize(pResultBuf), 1000*16384L);
SIDList list = getDataBufPagesIdList(pResultBuf, groupId); SIDList list = getDataBufPagesIdList(pResultBuf, groupId);
ASSERT_EQ(taosArrayGetSize(list), 1); ASSERT_EQ(taosArrayGetSize(list), 1);
ASSERT_EQ(getNumOfResultBufGroupId(pResultBuf), 1); ASSERT_EQ(getNumOfResultBufGroupId(pResultBuf), 1);
destroyResultBuf(pResultBuf, NULL); releaseResBufPage(pResultBuf, pBufPage);
tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t == pBufPage1);
tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t1 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t1 == pBufPage2);
tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t2 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t2 == pBufPage3);
tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t3 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t3 == pBufPage4);
tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t4 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t4 == pBufPage5);
destroyResultBuf(pResultBuf);
}
void writeDownTest() {
SDiskbasedResultBuf* pResultBuf = NULL;
int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1000, 64, 1024, 4, NULL);
int32_t pageId = 0;
int32_t writePageId = 0;
int32_t groupId = 0;
int32_t nx = 12345;
tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId);
ASSERT_TRUE(pBufPage != NULL);
*(int32_t*)(pBufPage->data) = nx;
writePageId = pageId;
releaseResBufPage(pResultBuf, pBufPage);
tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t1 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t1 == pBufPage1);
ASSERT_TRUE(pageId == 1);
tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t2 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t2 == pBufPage2);
ASSERT_TRUE(pageId == 2);
tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t3 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t3 == pBufPage3);
ASSERT_TRUE(pageId == 3);
tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t4 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t4 == pBufPage4);
ASSERT_TRUE(pageId == 4);
releaseResBufPage(pResultBuf, t4);
// flush the written page to disk, and read it out again
tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId);
ASSERT_EQ(*(int32_t*)pBufPagex->data, nx);
SArray* pa = getDataBufPagesIdList(pResultBuf, groupId);
ASSERT_EQ(taosArrayGetSize(pa), 5);
destroyResultBuf(pResultBuf);
}
void recyclePageTest() {
SDiskbasedResultBuf* pResultBuf = NULL;
int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1000, 64, 1024, 4, NULL);
int32_t pageId = 0;
int32_t writePageId = 0;
int32_t groupId = 0;
int32_t nx = 12345;
tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId);
ASSERT_TRUE(pBufPage != NULL);
releaseResBufPage(pResultBuf, pBufPage);
tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t1 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t1 == pBufPage1);
ASSERT_TRUE(pageId == 1);
tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t2 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t2 == pBufPage2);
ASSERT_TRUE(pageId == 2);
tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t3 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t3 == pBufPage3);
ASSERT_TRUE(pageId == 3);
tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t4 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t4 == pBufPage4);
ASSERT_TRUE(pageId == 4);
releaseResBufPage(pResultBuf, t4);
releaseResBufPage(pResultBuf, t4);
tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t5 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t5 == pBufPage5);
ASSERT_TRUE(pageId == 5);
// flush the written page to disk, and read it out again
tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId);
*(int32_t*)(pBufPagex->data) = nx;
writePageId = pageId; // update the data
releaseResBufPage(pResultBuf, pBufPagex);
tFilePage* pBufPagex1 = getResBufPage(pResultBuf, 1);
SArray* pa = getDataBufPagesIdList(pResultBuf, groupId);
ASSERT_EQ(taosArrayGetSize(pa), 6);
destroyResultBuf(pResultBuf);
} }
} // namespace } // namespace
TEST(testCase, resultBufferTest) { TEST(testCase, resultBufferTest) {
srand(time(NULL));
simpleTest(); simpleTest();
writeDownTest();
recyclePageTest();
} }
...@@ -210,7 +210,7 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab ...@@ -210,7 +210,7 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab
if (pQueryHandle->pColumns == NULL) { if (pQueryHandle->pColumns == NULL) {
goto out_of_memory; goto out_of_memory;
} }
for (int32_t i = 0; i < numOfCols; ++i) { for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData colInfo = {{0}, 0}; SColumnInfoData colInfo = {{0}, 0};
...@@ -222,29 +222,29 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab ...@@ -222,29 +222,29 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab
taosArrayPush(pQueryHandle->pColumns, &colInfo); taosArrayPush(pQueryHandle->pColumns, &colInfo);
pQueryHandle->statis[i].colId = colInfo.info.colId; pQueryHandle->statis[i].colId = colInfo.info.colId;
} }
pQueryHandle->pTableCheckInfo = taosArrayInit(groupList->numOfTables, sizeof(STableCheckInfo)); pQueryHandle->pTableCheckInfo = taosArrayInit(groupList->numOfTables, sizeof(STableCheckInfo));
if (pQueryHandle->pTableCheckInfo == NULL) { if (pQueryHandle->pTableCheckInfo == NULL) {
goto out_of_memory; goto out_of_memory;
} }
STsdbMeta* pMeta = tsdbGetMeta(tsdb); STsdbMeta* pMeta = tsdbGetMeta(tsdb);
assert(pMeta != NULL); assert(pMeta != NULL);
for (int32_t i = 0; i < sizeOfGroup; ++i) { for (int32_t i = 0; i < sizeOfGroup; ++i) {
SArray* group = *(SArray**) taosArrayGet(groupList->pGroupList, i); SArray* group = *(SArray**) taosArrayGet(groupList->pGroupList, i);
size_t gsize = taosArrayGetSize(group); size_t gsize = taosArrayGetSize(group);
assert(gsize > 0); assert(gsize > 0);
for (int32_t j = 0; j < gsize; ++j) { for (int32_t j = 0; j < gsize; ++j) {
STable* pTable = (STable*) taosArrayGetP(group, j); STable* pTable = (STable*) taosArrayGetP(group, j);
STableCheckInfo info = { STableCheckInfo info = {
.lastKey = pQueryHandle->window.skey, .lastKey = pQueryHandle->window.skey,
.tableId = pTable->tableId, .tableId = pTable->tableId,
.pTableObj = pTable, .pTableObj = pTable,
}; };
assert(info.pTableObj != NULL && (info.pTableObj->type == TSDB_NORMAL_TABLE || assert(info.pTableObj != NULL && (info.pTableObj->type == TSDB_NORMAL_TABLE ||
info.pTableObj->type == TSDB_CHILD_TABLE || info.pTableObj->type == TSDB_STREAM_TABLE)); info.pTableObj->type == TSDB_CHILD_TABLE || info.pTableObj->type == TSDB_STREAM_TABLE));
...@@ -280,17 +280,17 @@ TsdbQueryHandleT tsdbQueryLastRow(TSDB_REPO_T *tsdb, STsdbQueryCond *pCond, STab ...@@ -280,17 +280,17 @@ TsdbQueryHandleT tsdbQueryLastRow(TSDB_REPO_T *tsdb, STsdbQueryCond *pCond, STab
SArray* tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle) { SArray* tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle) {
assert(pHandle != NULL); assert(pHandle != NULL);
STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) pHandle; STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) pHandle;
size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
SArray* res = taosArrayInit(size, POINTER_BYTES); SArray* res = taosArrayInit(size, POINTER_BYTES);
for(int32_t i = 0; i < size; ++i) { for(int32_t i = 0; i < size; ++i) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
taosArrayPush(res, &pCheckInfo->pTableObj); taosArrayPush(res, &pCheckInfo->pTableObj);
} }
return res; return res;
} }
...@@ -306,11 +306,11 @@ TsdbQueryHandleT tsdbQueryRowsInExternalWindow(TSDB_REPO_T *tsdb, STsdbQueryCond ...@@ -306,11 +306,11 @@ TsdbQueryHandleT tsdbQueryRowsInExternalWindow(TSDB_REPO_T *tsdb, STsdbQueryCond
static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCheckInfo) { static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCheckInfo) {
STable* pTable = pCheckInfo->pTableObj; STable* pTable = pCheckInfo->pTableObj;
assert(pTable != NULL); assert(pTable != NULL);
if (pCheckInfo->initBuf) { if (pCheckInfo->initBuf) {
return true; return true;
} }
pCheckInfo->initBuf = true; pCheckInfo->initBuf = true;
int32_t order = pHandle->order; int32_t order = pHandle->order;
...@@ -318,7 +318,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh ...@@ -318,7 +318,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh
if (pHandle->mem == NULL && pHandle->imem == NULL) { if (pHandle->mem == NULL && pHandle->imem == NULL) {
return false; return false;
} }
assert(pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL); assert(pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL);
// TODO: add uid check // TODO: add uid check
...@@ -338,17 +338,17 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh ...@@ -338,17 +338,17 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh
if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) { if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) {
return false; return false;
} }
bool memEmpty = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter)); bool memEmpty = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter));
bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter)); bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter));
if (memEmpty && imemEmpty) { // buffer is empty if (memEmpty && imemEmpty) { // buffer is empty
return false; return false;
} }
if (!memEmpty) { if (!memEmpty) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter); SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
assert(node != NULL); assert(node != NULL);
SDataRow row = SL_GET_NODE_DATA(node); SDataRow row = SL_GET_NODE_DATA(node);
TSKEY key = dataRowKey(row); // first timestamp in buffer TSKEY key = dataRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64", tid:%d check data in mem from skey:%" PRId64 ", order:%d, %p", pHandle, tsdbDebug("%p uid:%" PRId64", tid:%d check data in mem from skey:%" PRId64 ", order:%d, %p", pHandle,
...@@ -357,11 +357,11 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh ...@@ -357,11 +357,11 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh
tsdbDebug("%p uid:%"PRId64", tid:%d no data in mem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, tsdbDebug("%p uid:%"PRId64", tid:%d no data in mem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid,
pHandle->qinfo); pHandle->qinfo);
} }
if (!imemEmpty) { if (!imemEmpty) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter); SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
assert(node != NULL); assert(node != NULL);
SDataRow row = SL_GET_NODE_DATA(node); SDataRow row = SL_GET_NODE_DATA(node);
TSKEY key = dataRowKey(row); // first timestamp in buffer TSKEY key = dataRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64", tid:%d check data in imem from skey:%" PRId64 ", order:%d, %p", pHandle, tsdbDebug("%p uid:%" PRId64", tid:%d check data in imem from skey:%" PRId64 ", order:%d, %p", pHandle,
...@@ -370,7 +370,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh ...@@ -370,7 +370,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh
tsdbDebug("%p uid:%"PRId64", tid:%d no data in imem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, tsdbDebug("%p uid:%"PRId64", tid:%d no data in imem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid,
pHandle->qinfo); pHandle->qinfo);
} }
return true; return true;
} }
...@@ -473,7 +473,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { ...@@ -473,7 +473,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) {
size_t size = taosArrayGetSize(pHandle->pTableCheckInfo); size_t size = taosArrayGetSize(pHandle->pTableCheckInfo);
assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1); assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1);
pHandle->cur.fid = -1; pHandle->cur.fid = -1;
STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex); STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex);
STable* pTable = pCheckInfo->pTableObj; STable* pTable = pCheckInfo->pTableObj;
...@@ -491,17 +491,17 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { ...@@ -491,17 +491,17 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) {
pCheckInfo->lastKey = dataRowKey(row); // first timestamp in buffer pCheckInfo->lastKey = dataRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64", tid:%d check data in buffer from skey:%" PRId64 ", order:%d, %p", pHandle, tsdbDebug("%p uid:%" PRId64", tid:%d check data in buffer from skey:%" PRId64 ", order:%d, %p", pHandle,
pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pCheckInfo->lastKey, pHandle->order, pHandle->qinfo); pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pCheckInfo->lastKey, pHandle->order, pHandle->qinfo);
// all data in mem are checked already. // all data in mem are checked already.
if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) || if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) ||
(pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) { (pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) {
return false; return false;
} }
int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1; int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1;
STimeWindow* win = &pHandle->cur.win; STimeWindow* win = &pHandle->cur.win;
pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle); pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle);
// update the last key value // update the last key value
pCheckInfo->lastKey = win->ekey + step; pCheckInfo->lastKey = win->ekey + step;
pHandle->cur.lastKey = win->ekey + step; pHandle->cur.lastKey = win->ekey + step;
...@@ -510,7 +510,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { ...@@ -510,7 +510,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) {
if (!ASCENDING_TRAVERSE(pHandle->order)) { if (!ASCENDING_TRAVERSE(pHandle->order)) {
SWAP(win->skey, win->ekey, TSKEY); SWAP(win->skey, win->ekey, TSKEY);
} }
return true; return true;
} }
...@@ -519,31 +519,31 @@ static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precisio ...@@ -519,31 +519,31 @@ static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precisio
if (key == TSKEY_INITIAL_VAL) { if (key == TSKEY_INITIAL_VAL) {
return INT32_MIN; return INT32_MIN;
} }
int64_t fid = (int64_t)(key / (daysPerFile * tsMsPerDay[precision])); // set the starting fileId int64_t fid = (int64_t)(key / (daysPerFile * tsMsPerDay[precision])); // set the starting fileId
if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32 if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32
fid = INT32_MIN; fid = INT32_MIN;
} }
if (fid > 0L && fid > INT32_MAX) { if (fid > 0L && fid > INT32_MAX) {
fid = INT32_MAX; fid = INT32_MAX;
} }
return fid; return fid;
} }
static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) { static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
int32_t firstSlot = 0; int32_t firstSlot = 0;
int32_t lastSlot = numOfBlocks - 1; int32_t lastSlot = numOfBlocks - 1;
int32_t midSlot = firstSlot; int32_t midSlot = firstSlot;
while (1) { while (1) {
numOfBlocks = lastSlot - firstSlot + 1; numOfBlocks = lastSlot - firstSlot + 1;
midSlot = (firstSlot + (numOfBlocks >> 1)); midSlot = (firstSlot + (numOfBlocks >> 1));
if (numOfBlocks == 1) break; if (numOfBlocks == 1) break;
if (skey > pBlock[midSlot].keyLast) { if (skey > pBlock[midSlot].keyLast) {
if (numOfBlocks == 2) break; if (numOfBlocks == 2) break;
if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break; if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break;
...@@ -555,7 +555,7 @@ static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSK ...@@ -555,7 +555,7 @@ static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSK
break; // got the slot break; // got the slot
} }
} }
return midSlot; return midSlot;
} }
...@@ -644,7 +644,7 @@ static int32_t getFileCompInfo(STsdbQueryHandle* pQueryHandle, int32_t* numOfBlo ...@@ -644,7 +644,7 @@ static int32_t getFileCompInfo(STsdbQueryHandle* pQueryHandle, int32_t* numOfBlo
.uid = (_checkInfo)->tableId.uid}) .uid = (_checkInfo)->tableId.uid})
static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock, STableCheckInfo* pCheckInfo) { static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock, STableCheckInfo* pCheckInfo, int32_t slotIndex) {
STsdbRepo *pRepo = pQueryHandle->pTsdb; STsdbRepo *pRepo = pQueryHandle->pTsdb;
bool blockLoaded = false; bool blockLoaded = false;
int64_t st = taosGetTimestampUs(); int64_t st = taosGetTimestampUs();
...@@ -678,8 +678,9 @@ static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlo ...@@ -678,8 +678,9 @@ static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlo
int64_t elapsedTime = (taosGetTimestampUs() - st); int64_t elapsedTime = (taosGetTimestampUs() - st);
pQueryHandle->cost.blockLoadTime += elapsedTime; pQueryHandle->cost.blockLoadTime += elapsedTime;
tsdbDebug("%p load file block into buffer, elapsed time:%"PRId64 " us", pQueryHandle, elapsedTime);
tsdbDebug("%p load file block into buffer, index:%d, brange:%"PRId64"-%"PRId64" , rows:%d, elapsed time:%"PRId64 " us, %p",
pQueryHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime, pQueryHandle->qinfo);
return blockLoaded; return blockLoaded;
} }
...@@ -692,18 +693,17 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock* ...@@ -692,18 +693,17 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock*
TSKEY key = (row != NULL)? dataRowKey(row):TSKEY_INITIAL_VAL; TSKEY key = (row != NULL)? dataRowKey(row):TSKEY_INITIAL_VAL;
cur->pos = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:(binfo.rows-1); cur->pos = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:(binfo.rows-1);
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) || if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) { (!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) {
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) || if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) { (!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) {
// do not load file block into buffer // do not load file block into buffer
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1; int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1;
cur->rows = tsdbReadRowsFromCache(pCheckInfo, binfo.window.skey - step, cur->rows = tsdbReadRowsFromCache(pCheckInfo, binfo.window.skey - step, pQueryHandle->outputCapacity, &cur->win, pQueryHandle);
pQueryHandle->outputCapacity, &cur->win, pQueryHandle);
pQueryHandle->realNumOfRows = cur->rows; pQueryHandle->realNumOfRows = cur->rows;
// update the last key value // update the last key value
...@@ -711,13 +711,13 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock* ...@@ -711,13 +711,13 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock*
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
SWAP(cur->win.skey, cur->win.ekey, TSKEY); SWAP(cur->win.skey, cur->win.ekey, TSKEY);
} }
cur->mixBlock = true; cur->mixBlock = true;
cur->blockCompleted = false; cur->blockCompleted = false;
return; return;
} }
doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo); doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot);
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else { } else {
/* /*
...@@ -742,39 +742,41 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock ...@@ -742,39 +742,41 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock
SQueryFilePos* cur = &pQueryHandle->cur; SQueryFilePos* cur = &pQueryHandle->cur;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) { if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
// query ended in current block // query ended in/started from current block
if (pQueryHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) { if (pQueryHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) {
if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) { if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) {
return false; return false;
} }
SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0]; SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0];
assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows); assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows);
if (pCheckInfo->lastKey > pBlock->keyFirst) { if (pCheckInfo->lastKey > pBlock->keyFirst) {
cur->pos = cur->pos =
binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order); binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order);
} else { } else {
cur->pos = 0; cur->pos = 0;
} }
assert(pCheckInfo->lastKey <= pBlock->keyLast);
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else { // the whole block is loaded in to buffer } else { // the whole block is loaded in to buffer
handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo);
} }
} else { //desc order, query ended in current block } else { //desc order, query ended in current block
if (pQueryHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) { if (pQueryHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) {
if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) { if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) {
return false; return false;
} }
SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0]; SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0];
if (pCheckInfo->lastKey < pBlock->keyLast) { if (pCheckInfo->lastKey < pBlock->keyLast) {
cur->pos = binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order); cur->pos = binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order);
} else { } else {
cur->pos = pBlock->numOfRows - 1; cur->pos = pBlock->numOfRows - 1;
} }
assert(pCheckInfo->lastKey >= pBlock->keyFirst);
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else { } else {
handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo);
...@@ -790,7 +792,7 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) { ...@@ -790,7 +792,7 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
TSKEY* keyList; TSKEY* keyList;
assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC); assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
if (num <= 0) return -1; if (num <= 0) return -1;
keyList = (TSKEY*)pValue; keyList = (TSKEY*)pValue;
...@@ -849,13 +851,19 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) { ...@@ -849,13 +851,19 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) { static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) {
char* pData = NULL; char* pData = NULL;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1; int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1;
SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0]; SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0];
TSKEY* tsArray = pCols->cols[0].pData; TSKEY* tsArray = pCols->cols[0].pData;
int32_t num = end - start + 1; int32_t num = end - start + 1;
assert(num >= 0);
if (num == 0) {
return numOfRows;
}
int32_t requiredNumOfCols = taosArrayGetSize(pQueryHandle->pColumns); int32_t requiredNumOfCols = taosArrayGetSize(pQueryHandle->pColumns);
//data in buffer has greater timestamp, copy data in file block //data in buffer has greater timestamp, copy data in file block
int32_t i = 0, j = 0; int32_t i = 0, j = 0;
while(i < requiredNumOfCols && j < pCols->numOfCols) { while(i < requiredNumOfCols && j < pCols->numOfCols) {
...@@ -928,7 +936,7 @@ static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t cap ...@@ -928,7 +936,7 @@ static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t cap
i++; i++;
} }
pQueryHandle->cur.win.ekey = tsArray[end]; pQueryHandle->cur.win.ekey = tsArray[end];
pQueryHandle->cur.lastKey = tsArray[end] + step; pQueryHandle->cur.lastKey = tsArray[end] + step;
...@@ -995,24 +1003,99 @@ static void copyOneRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity, ...@@ -995,24 +1003,99 @@ static void copyOneRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity,
} }
} }
static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols) {
if (numOfRows == 0 || ASCENDING_TRAVERSE(pQueryHandle->order)) {
return;
}
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (numOfRows < pQueryHandle->outputCapacity) {
int32_t emptySize = pQueryHandle->outputCapacity - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
}
static void getQualifiedRowsPos(STsdbQueryHandle* pQueryHandle, int32_t startPos, int32_t endPos,
int32_t numOfExisted, int32_t *start, int32_t *end) {
*start = -1;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
int32_t remain = endPos - startPos + 1;
if (remain + numOfExisted > pQueryHandle->outputCapacity) {
*end = (pQueryHandle->outputCapacity - numOfExisted) + startPos - 1;
} else {
*end = endPos;
}
*start = startPos;
} else {
int32_t remain = (startPos - endPos) + 1;
if (remain + numOfExisted > pQueryHandle->outputCapacity) {
*end = startPos + 1 - (pQueryHandle->outputCapacity - numOfExisted);
} else {
*end = endPos;
}
*start = *end;
*end = startPos;
}
}
static void updateInfoAfterMerge(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, int32_t numOfRows, int32_t endPos) {
SQueryFilePos* cur = &pQueryHandle->cur;
pCheckInfo->lastKey = cur->lastKey;
pQueryHandle->realNumOfRows = numOfRows;
cur->rows = numOfRows;
cur->pos = endPos;
}
static void doCheckGeneratedBlockRange(STsdbQueryHandle* pQueryHandle) {
SQueryFilePos* cur = &pQueryHandle->cur;
if (cur->rows > 0) {
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
assert(cur->win.skey >= pQueryHandle->window.skey && cur->win.ekey <= pQueryHandle->window.ekey);
} else {
assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey);
}
SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, 0);
assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]);
} else {
cur->win = pQueryHandle->window;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1;
cur->lastKey = pQueryHandle->window.ekey + step;
}
}
// only return the qualified data to client in terms of query time window, data rows in the same block but do not // only return the qualified data to client in terms of query time window, data rows in the same block but do not
// be included in the query time window will be discarded // be included in the query time window will be discarded
static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SCompBlock* pBlock) { static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SCompBlock* pBlock) {
SQueryFilePos* cur = &pQueryHandle->cur; SQueryFilePos* cur = &pQueryHandle->cur;
SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock); SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
initTableMemIterator(pQueryHandle, pCheckInfo); initTableMemIterator(pQueryHandle, pCheckInfo);
SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0]; SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0];
assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
cur->pos >= 0 && cur->pos < pBlock->numOfRows);
TSKEY* tsArray = pCols->cols[0].pData;
// for search the endPos, so the order needs to reverse // for search the endPos, so the order needs to reverse
int32_t order = (pQueryHandle->order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC; int32_t order = (pQueryHandle->order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1;
int32_t numOfCols = taosArrayGetSize(pQueryHandle->pColumns); int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle);
STable* pTable = pCheckInfo->pTableObj; STable* pTable = pCheckInfo->pTableObj;
int32_t endPos = cur->pos; int32_t endPos = cur->pos;
if (ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey > blockInfo.window.ekey) { if (ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey > blockInfo.window.ekey) {
endPos = blockInfo.rows - 1; endPos = blockInfo.rows - 1;
cur->mixBlock = (cur->pos != 0); cur->mixBlock = (cur->pos != 0);
...@@ -1024,48 +1107,36 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* ...@@ -1024,48 +1107,36 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo*
endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pQueryHandle->window.ekey, order); endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pQueryHandle->window.ekey, order);
cur->mixBlock = true; cur->mixBlock = true;
} }
// compared with the data from in-memory buffer, to generate the correct timestamp array list // compared with the data from in-memory buffer, to generate the correct timestamp array list
int32_t pos = cur->pos;
assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == 0);
TSKEY* tsArray = pCols->cols[0].pData;
int32_t numOfRows = 0; int32_t numOfRows = 0;
pQueryHandle->cur.win = TSWINDOW_INITIALIZER; int32_t pos = cur->pos;
cur->win = TSWINDOW_INITIALIZER;
// no data in buffer, load data from file directly // no data in buffer, load data from file directly
if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) { if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) {
int32_t start = cur->pos; int32_t start = cur->pos;
int32_t end = endPos; int32_t end = endPos;
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
end = cur->pos; SWAP(start, end, int32_t);
start = endPos; }
}
cur->win.skey = tsArray[start];
cur->win.ekey = tsArray[end];
// todo opt in case of no data in buffer
numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < pQueryHandle->outputCapacity) {
int32_t emptySize = pQueryHandle->outputCapacity - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) { // the time window should always be right order: skey <= ekey
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]};
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); cur->lastKey = tsArray[endPos];
}
}
pos += (end - start + 1) * step; pos += (end - start + 1) * step;
cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); cur->blockCompleted =
(((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
pCheckInfo->lastKey = cur->lastKey; ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order)));
pQueryHandle->realNumOfRows = numOfRows;
cur->rows = numOfRows; // if the buffer is not full in case of descending order query, move the data in the front of the buffer
moveDataToFront(pQueryHandle, numOfRows, numOfCols);
updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos);
doCheckGeneratedBlockRange(pQueryHandle);
return; return;
} else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) { } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) {
SSkipListNode* node = NULL; SSkipListNode* node = NULL;
...@@ -1111,30 +1182,18 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* ...@@ -1111,30 +1182,18 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo*
if (tsArray[end] == key) { // the value of key in cache equals to the end timestamp value, ignore it if (tsArray[end] == key) { // the value of key in cache equals to the end timestamp value, ignore it
moveToNextRowInMem(pCheckInfo); moveToNextRowInMem(pCheckInfo);
} }
int32_t start = -1;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
int32_t remain = end - pos + 1;
if (remain + numOfRows > pQueryHandle->outputCapacity) {
end = (pQueryHandle->outputCapacity - numOfRows) + pos - 1;
}
start = pos; int32_t qstart = 0, qend = 0;
} else { getQualifiedRowsPos(pQueryHandle, pos, end, numOfRows, &qstart, &qend);
int32_t remain = (pos - end) + 1;
if (remain + numOfRows > pQueryHandle->outputCapacity) {
end = pos + 1 - (pQueryHandle->outputCapacity - numOfRows);
}
start = end; numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, qstart, qend);
end = pos; pos += (qend - qstart + 1) * step;
}
numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? tsArray[qend]:tsArray[qstart];
pos += (end - start + 1) * step; cur->lastKey = cur->win.ekey + step;
} }
} while (numOfRows < pQueryHandle->outputCapacity); } while (numOfRows < pQueryHandle->outputCapacity);
if (numOfRows < pQueryHandle->outputCapacity) { if (numOfRows < pQueryHandle->outputCapacity) {
/** /**
* if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT
...@@ -1148,54 +1207,29 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* ...@@ -1148,54 +1207,29 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo*
cur->win.skey = tsArray[pos]; cur->win.skey = tsArray[pos];
} }
int32_t start = -1; int32_t start = -1, end = -1;
int32_t end = -1; getQualifiedRowsPos(pQueryHandle, pos, endPos, numOfRows, &start, &end);
// all remain data are qualified, but check the remain capacity in the first place.
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
int32_t remain = endPos - pos + 1;
if (remain + numOfRows > pQueryHandle->outputCapacity) {
endPos = (pQueryHandle->outputCapacity - numOfRows) + pos - 1;
}
start = pos;
end = endPos;
} else {
int32_t remain = pos + 1;
if (remain + numOfRows > pQueryHandle->outputCapacity) {
endPos = pos + 1 - (pQueryHandle->outputCapacity - numOfRows);
}
start = endPos;
end = pos;
}
numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end);
pos += (end - start + 1) * step; pos += (end - start + 1) * step;
cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? tsArray[end]:tsArray[start];
cur->lastKey = cur->win.ekey + step;
} }
} }
} }
cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || cur->blockCompleted =
((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order)));
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
SWAP(cur->win.skey, cur->win.ekey, TSKEY); SWAP(cur->win.skey, cur->win.ekey, TSKEY);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (numOfRows < pQueryHandle->outputCapacity) {
int32_t emptySize = pQueryHandle->outputCapacity - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
} }
pCheckInfo->lastKey = cur->lastKey; moveDataToFront(pQueryHandle, numOfRows, numOfCols);
pQueryHandle->realNumOfRows = numOfRows; updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos);
cur->rows = numOfRows; doCheckGeneratedBlockRange(pQueryHandle);
cur->pos = pos;
tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, brange:%"PRIu64"-%"PRIu64" rows:%d, %p", pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->win.skey, tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, brange:%"PRIu64"-%"PRIu64" rows:%d, %p", pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->win.skey,
cur->win.ekey, cur->rows, pQueryHandle->qinfo); cur->win.ekey, cur->rows, pQueryHandle->qinfo);
...@@ -1332,16 +1366,16 @@ static int32_t createDataBlocksInfo(STsdbQueryHandle* pQueryHandle, int32_t numO ...@@ -1332,16 +1366,16 @@ static int32_t createDataBlocksInfo(STsdbQueryHandle* pQueryHandle, int32_t numO
cleanBlockOrderSupporter(&sup, 0); cleanBlockOrderSupporter(&sup, 0);
return TSDB_CODE_TDB_OUT_OF_MEMORY; return TSDB_CODE_TDB_OUT_OF_MEMORY;
} }
int32_t cnt = 0; int32_t cnt = 0;
int32_t numOfQualTables = 0; int32_t numOfQualTables = 0;
for (int32_t j = 0; j < numOfTables; ++j) { for (int32_t j = 0; j < numOfTables; ++j) {
STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pQueryHandle->pTableCheckInfo, j); STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pQueryHandle->pTableCheckInfo, j);
if (pTableCheck->numOfBlocks <= 0) { if (pTableCheck->numOfBlocks <= 0) {
continue; continue;
} }
SCompBlock* pBlock = pTableCheck->pCompInfo->blocks; SCompBlock* pBlock = pTableCheck->pCompInfo->blocks;
sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks; sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks;
...@@ -1437,37 +1471,39 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex ...@@ -1437,37 +1471,39 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex
// current file are not overlapped with query time window, ignore remain files // current file are not overlapped with query time window, ignore remain files
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) || if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) { (!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) {
tsdbDebug("%p remain files are not qualified for qrange:%"PRId64"-%"PRId64", ignore, %p", pQueryHandle, pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qinfo) tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %p", pQueryHandle,
pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qinfo);
pQueryHandle->pFileGroup = NULL; pQueryHandle->pFileGroup = NULL;
assert(pQueryHandle->numOfBlocks == 0);
break; break;
} }
if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) { if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) {
break; break;
} }
tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks, tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks, numOfTables,
numOfTables, pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo); pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo);
assert(numOfBlocks >= 0); assert(numOfBlocks >= 0);
if (numOfBlocks == 0) { if (numOfBlocks == 0) {
continue; continue;
} }
// todo return error code to query engine // todo return error code to query engine
if (createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks) != TSDB_CODE_SUCCESS) { if ((code = createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks)) != TSDB_CODE_SUCCESS) {
break; break;
} }
assert(numOfBlocks >= pQueryHandle->numOfBlocks); assert(numOfBlocks >= pQueryHandle->numOfBlocks);
if (pQueryHandle->numOfBlocks > 0) { if (pQueryHandle->numOfBlocks > 0) {
break; break;
} }
} }
// no data in file anymore // no data in file anymore
if (pQueryHandle->numOfBlocks <= 0) { if (pQueryHandle->numOfBlocks <= 0 || code != TSDB_CODE_SUCCESS) {
if (code == TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) {
assert(pQueryHandle->pFileGroup == NULL); assert(pQueryHandle->pFileGroup == NULL);
} }
...@@ -1476,10 +1512,11 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex ...@@ -1476,10 +1512,11 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex
*exists = false; *exists = false;
return code; return code;
} }
assert(pQueryHandle->pFileGroup != NULL && pQueryHandle->numOfBlocks > 0);
cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1; cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1;
cur->fid = pQueryHandle->pFileGroup->fileId; cur->fid = pQueryHandle->pFileGroup->fileId;
STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot]; STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot];
*exists = loadFileDataBlock(pQueryHandle, pBlockInfo->compBlock, pBlockInfo->pTableCheckInfo); *exists = loadFileDataBlock(pQueryHandle, pBlockInfo->compBlock, pBlockInfo->pTableCheckInfo);
...@@ -1495,7 +1532,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists ...@@ -1495,7 +1532,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists
pQueryHandle->locateStart = true; pQueryHandle->locateStart = true;
STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; STsdbCfg* pCfg = &pQueryHandle->pTsdb->config;
int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision); int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision);
tsdbInitFileGroupIter(pFileHandle, &pQueryHandle->fileIter, pQueryHandle->order); tsdbInitFileGroupIter(pFileHandle, &pQueryHandle->fileIter, pQueryHandle->order);
tsdbSeekFileGroupIter(&pQueryHandle->fileIter, fid); tsdbSeekFileGroupIter(&pQueryHandle->fileIter, fid);
...@@ -1504,7 +1541,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists ...@@ -1504,7 +1541,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists
// check if current file block is all consumed // check if current file block is all consumed
STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot]; STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot];
STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo; STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo;
// current block is done, try next // current block is done, try next
if (!cur->mixBlock || cur->blockCompleted) { if (!cur->mixBlock || cur->blockCompleted) {
if ((cur->slot == pQueryHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pQueryHandle->order)) || if ((cur->slot == pQueryHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
...@@ -1515,10 +1552,10 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists ...@@ -1515,10 +1552,10 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists
// next block of the same file // next block of the same file
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1; int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1;
cur->slot += step; cur->slot += step;
cur->mixBlock = false; cur->mixBlock = false;
cur->blockCompleted = false; cur->blockCompleted = false;
STableBlockInfo* pNext = &pQueryHandle->pDataBlockInfo[cur->slot]; STableBlockInfo* pNext = &pQueryHandle->pDataBlockInfo[cur->slot];
*exists = loadFileDataBlock(pQueryHandle, pNext->compBlock, pNext->pTableCheckInfo); *exists = loadFileDataBlock(pQueryHandle, pNext->compBlock, pNext->pTableCheckInfo);
...@@ -1540,10 +1577,10 @@ static bool doHasDataInBuffer(STsdbQueryHandle* pQueryHandle) { ...@@ -1540,10 +1577,10 @@ static bool doHasDataInBuffer(STsdbQueryHandle* pQueryHandle) {
if (hasMoreDataInCache(pQueryHandle)) { if (hasMoreDataInCache(pQueryHandle)) {
return true; return true;
} }
pQueryHandle->activeIndex += 1; pQueryHandle->activeIndex += 1;
} }
return false; return false;
} }
...@@ -1561,14 +1598,14 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1561,14 +1598,14 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
if (pQueryHandle->type == TSDB_QUERY_TYPE_EXTERNAL) { if (pQueryHandle->type == TSDB_QUERY_TYPE_EXTERNAL) {
pQueryHandle->type = TSDB_QUERY_TYPE_ALL; pQueryHandle->type = TSDB_QUERY_TYPE_ALL;
pQueryHandle->order = TSDB_ORDER_DESC; pQueryHandle->order = TSDB_ORDER_DESC;
if (!tsdbNextDataBlock(pHandle)) { if (!tsdbNextDataBlock(pHandle)) {
return false; return false;
} }
/*SDataBlockInfo* pBlockInfo =*/ tsdbRetrieveDataBlockInfo(pHandle, &blockInfo); /*SDataBlockInfo* pBlockInfo =*/ tsdbRetrieveDataBlockInfo(pHandle, &blockInfo);
/*SArray *pDataBlock = */tsdbRetrieveDataBlock(pHandle, pQueryHandle->defaultLoadColumn); /*SArray *pDataBlock = */tsdbRetrieveDataBlock(pHandle, pQueryHandle->defaultLoadColumn);
if (pQueryHandle->cur.win.ekey == pQueryHandle->window.skey) { if (pQueryHandle->cur.win.ekey == pQueryHandle->window.skey) {
// data already retrieve, discard other data rows and return // data already retrieve, discard other data rows and return
int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle);
...@@ -1576,7 +1613,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1576,7 +1613,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i);
memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes); memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes);
} }
pQueryHandle->cur.win = (STimeWindow){pQueryHandle->window.skey, pQueryHandle->window.skey}; pQueryHandle->cur.win = (STimeWindow){pQueryHandle->window.skey, pQueryHandle->window.skey};
pQueryHandle->window = pQueryHandle->cur.win; pQueryHandle->window = pQueryHandle->cur.win;
pQueryHandle->cur.rows = 1; pQueryHandle->cur.rows = 1;
...@@ -1593,7 +1630,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1593,7 +1630,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
pSecQueryHandle->checkFiles = true; pSecQueryHandle->checkFiles = true;
pSecQueryHandle->activeIndex = 0; pSecQueryHandle->activeIndex = 0;
pSecQueryHandle->outputCapacity = ((STsdbRepo*)pSecQueryHandle->pTsdb)->config.maxRowsPerFileBlock; pSecQueryHandle->outputCapacity = ((STsdbRepo*)pSecQueryHandle->pTsdb)->config.maxRowsPerFileBlock;
if (tsdbInitReadHelper(&pSecQueryHandle->rhelper, (STsdbRepo*) pSecQueryHandle->pTsdb) != 0) { if (tsdbInitReadHelper(&pSecQueryHandle->rhelper, (STsdbRepo*) pSecQueryHandle->pTsdb) != 0) {
free(pSecQueryHandle); free(pSecQueryHandle);
return false; return false;
...@@ -1603,24 +1640,24 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1603,24 +1640,24 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
// allocate buffer in order to load data blocks from file // allocate buffer in order to load data blocks from file
int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle);
pSecQueryHandle->statis = calloc(numOfCols, sizeof(SDataStatis)); pSecQueryHandle->statis = calloc(numOfCols, sizeof(SDataStatis));
pSecQueryHandle->pColumns = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); pSecQueryHandle->pColumns = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
for (int32_t i = 0; i < numOfCols; ++i) { for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData colInfo = {{0}, 0}; SColumnInfoData colInfo = {{0}, 0};
SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i);
colInfo.info = pCol->info; colInfo.info = pCol->info;
colInfo.pData = calloc(1, EXTRA_BYTES + pQueryHandle->outputCapacity * pCol->info.bytes); colInfo.pData = calloc(1, EXTRA_BYTES + pQueryHandle->outputCapacity * pCol->info.bytes);
taosArrayPush(pSecQueryHandle->pColumns, &colInfo); taosArrayPush(pSecQueryHandle->pColumns, &colInfo);
} }
size_t si = taosArrayGetSize(pQueryHandle->pTableCheckInfo); size_t si = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
pSecQueryHandle->pTableCheckInfo = taosArrayInit(si, sizeof(STableCheckInfo)); pSecQueryHandle->pTableCheckInfo = taosArrayInit(si, sizeof(STableCheckInfo));
STsdbMeta* pMeta = tsdbGetMeta(pQueryHandle->pTsdb); STsdbMeta* pMeta = tsdbGetMeta(pQueryHandle->pTsdb);
assert(pMeta != NULL); assert(pMeta != NULL);
for (int32_t j = 0; j < si; ++j) { for (int32_t j = 0; j < si; ++j) {
STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, j); STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, j);
STableCheckInfo info = { STableCheckInfo info = {
...@@ -1628,10 +1665,10 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1628,10 +1665,10 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
.tableId = pCheckInfo->tableId, .tableId = pCheckInfo->tableId,
.pTableObj = pCheckInfo->pTableObj, .pTableObj = pCheckInfo->pTableObj,
}; };
taosArrayPush(pSecQueryHandle->pTableCheckInfo, &info); taosArrayPush(pSecQueryHandle->pTableCheckInfo, &info);
} }
tsdbInitDataBlockLoadInfo(&pSecQueryHandle->dataBlockLoadInfo); tsdbInitDataBlockLoadInfo(&pSecQueryHandle->dataBlockLoadInfo);
tsdbInitCompBlockLoadInfo(&pSecQueryHandle->compBlockLoadInfo); tsdbInitCompBlockLoadInfo(&pSecQueryHandle->compBlockLoadInfo);
pSecQueryHandle->defaultLoadColumn = taosArrayClone(pQueryHandle->defaultLoadColumn); pSecQueryHandle->defaultLoadColumn = taosArrayClone(pQueryHandle->defaultLoadColumn);
...@@ -1641,17 +1678,17 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1641,17 +1678,17 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
tsdbRetrieveDataBlockInfo((void*) pSecQueryHandle, &blockInfo); tsdbRetrieveDataBlockInfo((void*) pSecQueryHandle, &blockInfo);
tsdbRetrieveDataBlock((void*) pSecQueryHandle, pSecQueryHandle->defaultLoadColumn); tsdbRetrieveDataBlock((void*) pSecQueryHandle, pSecQueryHandle->defaultLoadColumn);
for (int32_t i = 0; i < numOfCols; ++i) { for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i);
memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes); memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes);
SColumnInfoData* pCol1 = taosArrayGet(pSecQueryHandle->pColumns, i); SColumnInfoData* pCol1 = taosArrayGet(pSecQueryHandle->pColumns, i);
assert(pCol->info.colId == pCol1->info.colId); assert(pCol->info.colId == pCol1->info.colId);
memcpy(pCol->pData + pCol->info.bytes, pCol1->pData, pCol1->info.bytes); memcpy(pCol->pData + pCol->info.bytes, pCol1->pData, pCol1->info.bytes);
} }
SColumnInfoData* pTSCol = taosArrayGet(pQueryHandle->pColumns, 0); SColumnInfoData* pTSCol = taosArrayGet(pQueryHandle->pColumns, 0);
// it is ascending order // it is ascending order
...@@ -1675,7 +1712,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1675,7 +1712,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
pQueryHandle->checkFiles = false; pQueryHandle->checkFiles = false;
return true; return true;
} }
if (pQueryHandle->checkFiles) { if (pQueryHandle->checkFiles) {
bool exists = true; bool exists = true;
int32_t code = getDataBlocksInFiles(pQueryHandle, &exists); int32_t code = getDataBlocksInFiles(pQueryHandle, &exists);
...@@ -1688,11 +1725,11 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1688,11 +1725,11 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
pQueryHandle->cost.checkForNextTime += elapsedTime; pQueryHandle->cost.checkForNextTime += elapsedTime;
return exists; return exists;
} }
pQueryHandle->activeIndex = 0; pQueryHandle->activeIndex = 0;
pQueryHandle->checkFiles = false; pQueryHandle->checkFiles = false;
} }
// TODO: opt by consider the scan order // TODO: opt by consider the scan order
bool ret = doHasDataInBuffer(pQueryHandle); bool ret = doHasDataInBuffer(pQueryHandle);
terrno = TSDB_CODE_SUCCESS; terrno = TSDB_CODE_SUCCESS;
...@@ -1705,15 +1742,15 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { ...@@ -1705,15 +1742,15 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) { void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pqHandle; STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pqHandle;
assert(!ASCENDING_TRAVERSE(pQueryHandle->order)); assert(!ASCENDING_TRAVERSE(pQueryHandle->order));
// starts from the buffer in case of descending timestamp order check data blocks // starts from the buffer in case of descending timestamp order check data blocks
// todo consider the query time window, current last_row does not apply the query time window // todo consider the query time window, current last_row does not apply the query time window
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
TSKEY key = TSKEY_INITIAL_VAL; TSKEY key = TSKEY_INITIAL_VAL;
int32_t index = -1; int32_t index = -1;
for(int32_t i = 0; i < numOfTables; ++i) { for(int32_t i = 0; i < numOfTables; ++i) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
if (pCheckInfo->pTableObj->lastKey > key) { if (pCheckInfo->pTableObj->lastKey > key) {
...@@ -1721,36 +1758,36 @@ void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) { ...@@ -1721,36 +1758,36 @@ void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) {
index = i; index = i;
} }
} }
if (index == -1) { if (index == -1) {
// todo add failure test cases // todo add failure test cases
return; return;
} }
// erase all other elements in array list // erase all other elements in array list
size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
for (int32_t i = 0; i < size; ++i) { for (int32_t i = 0; i < size; ++i) {
if (i == index) { if (i == index) {
continue; continue;
} }
STableCheckInfo* pTableCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); STableCheckInfo* pTableCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
tSkipListDestroyIter(pTableCheckInfo->iter); tSkipListDestroyIter(pTableCheckInfo->iter);
if (pTableCheckInfo->pDataCols != NULL) { if (pTableCheckInfo->pDataCols != NULL) {
tfree(pTableCheckInfo->pDataCols->buf); tfree(pTableCheckInfo->pDataCols->buf);
} }
tfree(pTableCheckInfo->pDataCols); tfree(pTableCheckInfo->pDataCols);
tfree(pTableCheckInfo->pCompInfo); tfree(pTableCheckInfo->pCompInfo);
} }
STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, index); STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, index);
taosArrayClear(pQueryHandle->pTableCheckInfo); taosArrayClear(pQueryHandle->pTableCheckInfo);
info.lastKey = key; info.lastKey = key;
taosArrayPush(pQueryHandle->pTableCheckInfo, &info); taosArrayPush(pQueryHandle->pTableCheckInfo, &info);
// update the query time window according to the chosen last timestamp // update the query time window according to the chosen last timestamp
pQueryHandle->window = (STimeWindow) {key, key}; pQueryHandle->window = (STimeWindow) {key, key};
} }
...@@ -1759,13 +1796,13 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) { ...@@ -1759,13 +1796,13 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) {
// filter the queried time stamp in the first place // filter the queried time stamp in the first place
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle; STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle;
pQueryHandle->order = TSDB_ORDER_DESC; pQueryHandle->order = TSDB_ORDER_DESC;
assert(pQueryHandle->window.skey == pQueryHandle->window.ekey); assert(pQueryHandle->window.skey == pQueryHandle->window.ekey);
// starts from the buffer in case of descending timestamp order check data blocks // starts from the buffer in case of descending timestamp order check data blocks
// todo consider the query time window, current last_row does not apply the query time window // todo consider the query time window, current last_row does not apply the query time window
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
int32_t i = 0; int32_t i = 0;
while(i < numOfTables) { while(i < numOfTables) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
...@@ -1773,21 +1810,21 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) { ...@@ -1773,21 +1810,21 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) {
pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL) { pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL) {
break; break;
} }
i++; i++;
} }
// there are no data in all the tables // there are no data in all the tables
if (i == numOfTables) { if (i == numOfTables) {
return; return;
} }
STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i); STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i);
taosArrayClear(pQueryHandle->pTableCheckInfo); taosArrayClear(pQueryHandle->pTableCheckInfo);
info.lastKey = pQueryHandle->window.skey; info.lastKey = pQueryHandle->window.skey;
taosArrayPush(pQueryHandle->pTableCheckInfo, &info); taosArrayPush(pQueryHandle->pTableCheckInfo, &info);
// update the query time window according to the chosen last timestamp // update the query time window according to the chosen last timestamp
pQueryHandle->window = (STimeWindow) {info.lastKey, TSKEY_INITIAL_VAL}; pQueryHandle->window = (STimeWindow) {info.lastKey, TSKEY_INITIAL_VAL};
} }
...@@ -1811,7 +1848,7 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int ...@@ -1811,7 +1848,7 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int
if ((key > maxKey && ASCENDING_TRAVERSE(pQueryHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pQueryHandle->order))) { if ((key > maxKey && ASCENDING_TRAVERSE(pQueryHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pQueryHandle->order))) {
tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pQueryHandle, key, pQueryHandle->window.skey, tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pQueryHandle, key, pQueryHandle->window.skey,
pQueryHandle->window.ekey); pQueryHandle->window.ekey);
break; break;
} }
...@@ -1826,24 +1863,24 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int ...@@ -1826,24 +1863,24 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int
moveToNextRowInMem(pCheckInfo); moveToNextRowInMem(pCheckInfo);
break; break;
} }
} while(moveToNextRowInMem(pCheckInfo)); } while(moveToNextRowInMem(pCheckInfo));
assert(numOfRows <= maxRowsToRead); assert(numOfRows <= maxRowsToRead);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer // if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < maxRowsToRead) { if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < maxRowsToRead) {
int32_t emptySize = maxRowsToRead - numOfRows; int32_t emptySize = maxRowsToRead - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) { for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
} }
} }
int64_t elapsedTime = taosGetTimestampUs() - st; int64_t elapsedTime = taosGetTimestampUs() - st;
tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d", pQueryHandle, tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d, %p", pQueryHandle,
elapsedTime, numOfRows, numOfCols); elapsedTime, numOfRows, numOfCols, pQueryHandle->qinfo);
return numOfRows; return numOfRows;
} }
...@@ -1852,7 +1889,7 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p ...@@ -1852,7 +1889,7 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p
STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle; STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle;
SQueryFilePos* cur = &pHandle->cur; SQueryFilePos* cur = &pHandle->cur;
STable* pTable = NULL; STable* pTable = NULL;
// there are data in file // there are data in file
if (pHandle->cur.fid >= 0) { if (pHandle->cur.fid >= 0) {
STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot]; STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot];
...@@ -1874,13 +1911,13 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p ...@@ -1874,13 +1911,13 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p
*/ */
int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataStatis** pBlockStatis) { int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataStatis** pBlockStatis) {
STsdbQueryHandle* pHandle = (STsdbQueryHandle*) pQueryHandle; STsdbQueryHandle* pHandle = (STsdbQueryHandle*) pQueryHandle;
SQueryFilePos* c = &pHandle->cur; SQueryFilePos* c = &pHandle->cur;
if (c->mixBlock) { if (c->mixBlock) {
*pBlockStatis = NULL; *pBlockStatis = NULL;
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot]; STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot];
assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0))); assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0)));
...@@ -1900,7 +1937,7 @@ int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataSta ...@@ -1900,7 +1937,7 @@ int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataSta
for(int32_t i = 0; i < numOfCols; ++i) { for(int32_t i = 0; i < numOfCols; ++i) {
pHandle->statis[i].colId = colIds[i]; pHandle->statis[i].colId = colIds[i];
} }
tsdbGetDataStatis(&pHandle->rhelper, pHandle->statis, numOfCols); tsdbGetDataStatis(&pHandle->rhelper, pHandle->statis, numOfCols);
// always load the first primary timestamp column data // always load the first primary timestamp column data
...@@ -1949,31 +1986,31 @@ SArray* tsdbRetrieveDataBlock(TsdbQueryHandleT* pQueryHandle, SArray* pIdList) { ...@@ -1949,31 +1986,31 @@ SArray* tsdbRetrieveDataBlock(TsdbQueryHandleT* pQueryHandle, SArray* pIdList) {
} else { } else {
SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock); SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock);
assert(pHandle->realNumOfRows <= binfo.rows); assert(pHandle->realNumOfRows <= binfo.rows);
// data block has been loaded, todo extract method // data block has been loaded, todo extract method
SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo; SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo;
if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fileId == pHandle->cur.fid && if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fileId == pHandle->cur.fid &&
pBlockLoadInfo->tid == pCheckInfo->pTableObj->tableId.tid) { pBlockLoadInfo->tid == pCheckInfo->pTableObj->tableId.tid) {
return pHandle->pColumns; return pHandle->pColumns;
} else { // only load the file block } else { // only load the file block
SCompBlock* pBlock = pBlockInfo->compBlock; SCompBlock* pBlock = pBlockInfo->compBlock;
doLoadFileDataBlock(pHandle, pBlock, pCheckInfo); doLoadFileDataBlock(pHandle, pBlock, pCheckInfo, pHandle->cur.slot);
// todo refactor // todo refactor
int32_t numOfRows = copyDataFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1); int32_t numOfRows = copyDataFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer // if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) { if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) {
int32_t emptySize = pHandle->outputCapacity - numOfRows; int32_t emptySize = pHandle->outputCapacity - numOfRows;
int32_t reqNumOfCols = taosArrayGetSize(pHandle->pColumns); int32_t reqNumOfCols = taosArrayGetSize(pHandle->pColumns);
for(int32_t i = 0; i < reqNumOfCols; ++i) { for(int32_t i = 0; i < reqNumOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i); SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
} }
} }
return pHandle->pColumns; return pHandle->pColumns;
} }
} }
...@@ -1984,11 +2021,11 @@ static int32_t getAllTableList(STable* pSuperTable, SArray* list) { ...@@ -1984,11 +2021,11 @@ static int32_t getAllTableList(STable* pSuperTable, SArray* list) {
SSkipListIterator* iter = tSkipListCreateIter(pSuperTable->pIndex); SSkipListIterator* iter = tSkipListCreateIter(pSuperTable->pIndex);
while (tSkipListIterNext(iter)) { while (tSkipListIterNext(iter)) {
SSkipListNode* pNode = tSkipListIterGet(iter); SSkipListNode* pNode = tSkipListIterGet(iter);
STable** pTable = (STable**) SL_GET_NODE_DATA((SSkipListNode*) pNode); STable** pTable = (STable**) SL_GET_NODE_DATA((SSkipListNode*) pNode);
taosArrayPush(list, pTable); taosArrayPush(list, pTable);
} }
tSkipListDestroyIter(iter); tSkipListDestroyIter(iter);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
...@@ -1998,12 +2035,12 @@ static void destroyHelper(void* param) { ...@@ -1998,12 +2035,12 @@ static void destroyHelper(void* param) {
return; return;
} }
tQueryInfo* pInfo = (tQueryInfo*)param; tQueryInfo* pInfo = (tQueryInfo*)param;
if (pInfo->optr != TSDB_RELATION_IN) { if (pInfo->optr != TSDB_RELATION_IN) {
tfree(pInfo->q); tfree(pInfo->q);
} }
// tVariantDestroy(&(pInfo->q)); // tVariantDestroy(&(pInfo->q));
free(param); free(param);
} }
...@@ -2015,7 +2052,7 @@ void filterPrepare(void* expr, void* param) { ...@@ -2015,7 +2052,7 @@ void filterPrepare(void* expr, void* param) {
} }
pExpr->_node.info = calloc(1, sizeof(tQueryInfo)); pExpr->_node.info = calloc(1, sizeof(tQueryInfo));
STSchema* pTSSchema = (STSchema*) param; STSchema* pTSSchema = (STSchema*) param;
tQueryInfo* pInfo = pExpr->_node.info; tQueryInfo* pInfo = pExpr->_node.info;
tVariant* pCond = pExpr->_node.pRight->pVal; tVariant* pCond = pExpr->_node.pRight->pVal;
...@@ -2025,7 +2062,7 @@ void filterPrepare(void* expr, void* param) { ...@@ -2025,7 +2062,7 @@ void filterPrepare(void* expr, void* param) {
pInfo->optr = pExpr->_node.optr; pInfo->optr = pExpr->_node.optr;
pInfo->compare = getComparFunc(pSchema->type, pInfo->optr); pInfo->compare = getComparFunc(pSchema->type, pInfo->optr);
pInfo->param = pTSSchema; pInfo->param = pTSSchema;
if (pInfo->optr == TSDB_RELATION_IN) { if (pInfo->optr == TSDB_RELATION_IN) {
pInfo->q = (char*) pCond->arr; pInfo->q = (char*) pCond->arr;
} else { } else {
...@@ -2038,25 +2075,24 @@ typedef struct STableGroupSupporter { ...@@ -2038,25 +2075,24 @@ typedef struct STableGroupSupporter {
int32_t numOfCols; int32_t numOfCols;
SColIndex* pCols; SColIndex* pCols;
STSchema* pTagSchema; STSchema* pTagSchema;
// void* tsdbMeta;
} STableGroupSupporter; } STableGroupSupporter;
int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) { int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) {
STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param; STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param;
STable* pTable1 = *(STable**) p1; STable* pTable1 = *(STable**) p1;
STable* pTable2 = *(STable**) p2; STable* pTable2 = *(STable**) p2;
for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) { for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) {
SColIndex* pColIndex = &pTableGroupSupp->pCols[i]; SColIndex* pColIndex = &pTableGroupSupp->pCols[i];
int32_t colIndex = pColIndex->colIndex; int32_t colIndex = pColIndex->colIndex;
assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX); assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX);
char * f1 = NULL; char * f1 = NULL;
char * f2 = NULL; char * f2 = NULL;
int32_t type = 0; int32_t type = 0;
int32_t bytes = 0; int32_t bytes = 0;
if (colIndex == TSDB_TBNAME_COLUMN_INDEX) { if (colIndex == TSDB_TBNAME_COLUMN_INDEX) {
f1 = (char*) TABLE_NAME(pTable1); f1 = (char*) TABLE_NAME(pTable1);
f2 = (char*) TABLE_NAME(pTable2); f2 = (char*) TABLE_NAME(pTable2);
...@@ -2090,14 +2126,14 @@ int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) { ...@@ -2090,14 +2126,14 @@ int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) {
return ret; return ret;
} }
} }
return 0; return 0;
} }
void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, STableGroupSupporter* pSupp, void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, STableGroupSupporter* pSupp,
__ext_compar_fn_t compareFn) { __ext_compar_fn_t compareFn) {
STable* pTable = taosArrayGetP(pTableList, 0); STable* pTable = taosArrayGetP(pTableList, 0);
SArray* g = taosArrayInit(16, POINTER_BYTES); SArray* g = taosArrayInit(16, POINTER_BYTES);
taosArrayPush(g, &pTable); taosArrayPush(g, &pTable);
tsdbRefTable(pTable); tsdbRefTable(pTable);
...@@ -2105,10 +2141,10 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable ...@@ -2105,10 +2141,10 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable
for (int32_t i = 1; i < numOfTables; ++i) { for (int32_t i = 1; i < numOfTables; ++i) {
STable** prev = taosArrayGet(pTableList, i - 1); STable** prev = taosArrayGet(pTableList, i - 1);
STable** p = taosArrayGet(pTableList, i); STable** p = taosArrayGet(pTableList, i);
int32_t ret = compareFn(prev, p, pSupp); int32_t ret = compareFn(prev, p, pSupp);
assert(ret == 0 || ret == -1); assert(ret == 0 || ret == -1);
tsdbRefTable(*p); tsdbRefTable(*p);
assert((*p)->type == TSDB_CHILD_TABLE); assert((*p)->type == TSDB_CHILD_TABLE);
...@@ -2120,20 +2156,20 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable ...@@ -2120,20 +2156,20 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable
taosArrayPush(g, p); taosArrayPush(g, p);
} }
} }
taosArrayPush(pGroups, &g); taosArrayPush(pGroups, &g);
} }
SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols) { SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols) {
assert(pTableList != NULL); assert(pTableList != NULL);
SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES); SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);
size_t size = taosArrayGetSize(pTableList); size_t size = taosArrayGetSize(pTableList);
if (size == 0) { if (size == 0) {
tsdbDebug("no qualified tables"); tsdbDebug("no qualified tables");
return pTableGroup; return pTableGroup;
} }
if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table
SArray* sa = taosArrayInit(size, POINTER_BYTES); SArray* sa = taosArrayInit(size, POINTER_BYTES);
for(int32_t i = 0; i < size; ++i) { for(int32_t i = 0; i < size; ++i) {
...@@ -2143,7 +2179,7 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC ...@@ -2143,7 +2179,7 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC
tsdbRefTable(*pTable); tsdbRefTable(*pTable);
taosArrayPush(sa, pTable); taosArrayPush(sa, pTable);
} }
taosArrayPush(pTableGroup, &sa); taosArrayPush(pTableGroup, &sa);
tsdbDebug("all %zu tables belong to one group", size); tsdbDebug("all %zu tables belong to one group", size);
} else { } else {
...@@ -2151,18 +2187,18 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC ...@@ -2151,18 +2187,18 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC
pSupp->numOfCols = numOfOrderCols; pSupp->numOfCols = numOfOrderCols;
pSupp->pTagSchema = pTagSchema; pSupp->pTagSchema = pTagSchema;
pSupp->pCols = pCols; pSupp->pCols = pCols;
taosqsort(pTableList->pData, size, POINTER_BYTES, pSupp, tableGroupComparFn); taosqsort(pTableList->pData, size, POINTER_BYTES, pSupp, tableGroupComparFn);
createTableGroupImpl(pTableGroup, pTableList, size, pSupp, tableGroupComparFn); createTableGroupImpl(pTableGroup, pTableList, size, pSupp, tableGroupComparFn);
tfree(pSupp); tfree(pSupp);
} }
return pTableGroup; return pTableGroup;
} }
bool indexedNodeFilterFp(const void* pNode, void* param) { bool indexedNodeFilterFp(const void* pNode, void* param) {
tQueryInfo* pInfo = (tQueryInfo*) param; tQueryInfo* pInfo = (tQueryInfo*) param;
STable* pTable = *(STable**)(SL_GET_NODE_DATA((SSkipListNode*)pNode)); STable* pTable = *(STable**)(SL_GET_NODE_DATA((SSkipListNode*)pNode));
char* val = NULL; char* val = NULL;
...@@ -2172,7 +2208,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) { ...@@ -2172,7 +2208,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) {
} else { } else {
val = tdGetKVRowValOfCol(pTable->tagVal, pInfo->sch.colId); val = tdGetKVRowValOfCol(pTable->tagVal, pInfo->sch.colId);
} }
int32_t ret = 0; int32_t ret = 0;
if (val == NULL) { //the val is possible to be null, so check it out carefully if (val == NULL) { //the val is possible to be null, so check it out carefully
ret = -1; // val is missing in table tags value pairs ret = -1; // val is missing in table tags value pairs
...@@ -2209,7 +2245,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) { ...@@ -2209,7 +2245,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) {
default: default:
assert(false); assert(false);
} }
return true; return true;
} }
...@@ -2239,7 +2275,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT ...@@ -2239,7 +2275,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT
goto _error; goto _error;
} }
if (pTable->type != TSDB_SUPER_TABLE) { if (pTable->type != TSDB_SUPER_TABLE) {
tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", tid:%d, name:%s", tsdb, uid, pTable->tableId.tid, tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", tid:%d, name:%s", tsdb, uid, pTable->tableId.tid,
pTable->name->data); pTable->name->data);
...@@ -2252,7 +2288,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT ...@@ -2252,7 +2288,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT
//NOTE: not add ref count for super table //NOTE: not add ref count for super table
SArray* res = taosArrayInit(8, POINTER_BYTES); SArray* res = taosArrayInit(8, POINTER_BYTES);
STSchema* pTagSchema = tsdbGetTableTagSchema(pTable); STSchema* pTagSchema = tsdbGetTableTagSchema(pTable);
// no tags and tbname condition, all child tables of this stable are involved // no tags and tbname condition, all child tables of this stable are involved
if (tbnameCond == NULL && (pTagCond == NULL || len == 0)) { if (tbnameCond == NULL && (pTagCond == NULL || len == 0)) {
int32_t ret = getAllTableList(pTable, res); int32_t ret = getAllTableList(pTable, res);
...@@ -2263,7 +2299,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT ...@@ -2263,7 +2299,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT
pGroupInfo->numOfTables = taosArrayGetSize(res); pGroupInfo->numOfTables = taosArrayGetSize(res);
pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols); pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols);
tsdbDebug("%p no table name/tag condition, all tables belong to one group, numOfTables:%zu", tsdb, pGroupInfo->numOfTables); tsdbDebug("%p no table name/tag condition, all tables belong to one group, numOfTables:%zu", tsdb, pGroupInfo->numOfTables);
taosArrayDestroy(res); taosArrayDestroy(res);
...@@ -2299,7 +2335,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT ...@@ -2299,7 +2335,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT
} CATCH( code ) { } CATCH( code ) {
CLEANUP_EXECUTE(); CLEANUP_EXECUTE();
terrno = code; terrno = code;
goto _error; goto _error;
// TODO: more error handling // TODO: more error handling
} END_TRY } END_TRY
...@@ -2335,12 +2371,12 @@ int32_t tsdbGetOneTableGroup(TSDB_REPO_T* tsdb, uint64_t uid, STableGroupInfo* p ...@@ -2335,12 +2371,12 @@ int32_t tsdbGetOneTableGroup(TSDB_REPO_T* tsdb, uint64_t uid, STableGroupInfo* p
pGroupInfo->numOfTables = 1; pGroupInfo->numOfTables = 1;
pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES); pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
SArray* group = taosArrayInit(1, POINTER_BYTES); SArray* group = taosArrayInit(1, POINTER_BYTES);
taosArrayPush(group, &pTable); taosArrayPush(group, &pTable);
taosArrayPush(pGroupInfo->pGroupList, &group); taosArrayPush(pGroupInfo->pGroupList, &group);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
_error: _error:
...@@ -2428,8 +2464,8 @@ void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle) { ...@@ -2428,8 +2464,8 @@ void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle) {
tsdbDestroyHelper(&pQueryHandle->rhelper); tsdbDestroyHelper(&pQueryHandle->rhelper);
SIOCostSummary* pCost = &pQueryHandle->cost; SIOCostSummary* pCost = &pQueryHandle->cost;
tsdbDebug(":io-cost summary: statis-info:%"PRId64"us, datablock:%" PRId64"us, check data:%"PRId64"us, %p", tsdbDebug("%p :io-cost summary: statis-info:%"PRId64"us, datablock:%" PRId64"us, check data:%"PRId64"us, %p",
pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pQueryHandle->qinfo); pQueryHandle, pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pQueryHandle->qinfo);
tfree(pQueryHandle); tfree(pQueryHandle);
} }
......
...@@ -68,8 +68,6 @@ typedef struct { ...@@ -68,8 +68,6 @@ typedef struct {
int64_t refreshTime; int64_t refreshTime;
STrashElem * pTrash; STrashElem * pTrash;
char* name; char* name;
// void * tmrCtrl;
// void * pTimer;
SCacheStatis statistics; SCacheStatis statistics;
SHashObj * pHashTable; SHashObj * pHashTable;
__cache_free_fn_t freeFp; __cache_free_fn_t freeFp;
......
...@@ -55,6 +55,8 @@ int tdListPrepend(SList *list, void *data); ...@@ -55,6 +55,8 @@ int tdListPrepend(SList *list, void *data);
int tdListAppend(SList *list, void *data); int tdListAppend(SList *list, void *data);
SListNode *tdListPopHead(SList *list); SListNode *tdListPopHead(SList *list);
SListNode *tdListPopTail(SList *list); SListNode *tdListPopTail(SList *list);
SListNode *tdListGetHead(SList *list);
SListNode *tsListGetTail(SList *list);
SListNode *tdListPopNode(SList *list, SListNode *node); SListNode *tdListPopNode(SList *list, SListNode *node);
void tdListMove(SList *src, SList *dst); void tdListMove(SList *src, SList *dst);
void tdListDiscard(SList *list); void tdListDiscard(SList *list);
......
...@@ -343,7 +343,7 @@ void* taosCacheUpdateExpireTimeByName(SCacheObj *pCacheObj, void *key, size_t ke ...@@ -343,7 +343,7 @@ void* taosCacheUpdateExpireTimeByName(SCacheObj *pCacheObj, void *key, size_t ke
SCacheDataNode **ptNode = (SCacheDataNode **)taosHashGet(pCacheObj->pHashTable, key, keyLen); SCacheDataNode **ptNode = (SCacheDataNode **)taosHashGet(pCacheObj->pHashTable, key, keyLen);
if (ptNode != NULL) { if (ptNode != NULL) {
T_REF_INC(*ptNode); T_REF_INC(*ptNode);
(*ptNode)->expireTime = taosGetTimestampMs() + (*ptNode)->lifespan; (*ptNode)->expireTime = expireTime; // taosGetTimestampMs() + (*ptNode)->lifespan;
} }
__cache_unlock(pCacheObj); __cache_unlock(pCacheObj);
...@@ -381,7 +381,7 @@ void *taosCacheAcquireByData(SCacheObj *pCacheObj, void *data) { ...@@ -381,7 +381,7 @@ void *taosCacheAcquireByData(SCacheObj *pCacheObj, void *data) {
} }
void *taosCacheTransfer(SCacheObj *pCacheObj, void **data) { void *taosCacheTransfer(SCacheObj *pCacheObj, void **data) {
if (pCacheObj == NULL || data == NULL) return NULL; if (pCacheObj == NULL || data == NULL || (*data) == NULL) return NULL;
size_t offset = offsetof(SCacheDataNode, data); size_t offset = offsetof(SCacheDataNode, data);
SCacheDataNode *ptNode = (SCacheDataNode *)((char *)(*data) - offset); SCacheDataNode *ptNode = (SCacheDataNode *)((char *)(*data) - offset);
...@@ -419,7 +419,7 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { ...@@ -419,7 +419,7 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) {
// note: extend lifespan before dec ref count // note: extend lifespan before dec ref count
bool inTrashCan = pNode->inTrashCan; bool inTrashCan = pNode->inTrashCan;
if (pCacheObj->extendLifespan && (!inTrashCan)) { if (pCacheObj->extendLifespan && (!inTrashCan) && (!_remove)) {
atomic_store_64(&pNode->expireTime, pNode->lifespan + taosGetTimestampMs()); atomic_store_64(&pNode->expireTime, pNode->lifespan + taosGetTimestampMs());
uDebug("cache:%s data:%p extend life time to %"PRId64 " before release", pCacheObj->name, pNode->data, pNode->expireTime); uDebug("cache:%s data:%p extend life time to %"PRId64 " before release", pCacheObj->name, pNode->data, pNode->expireTime);
} }
...@@ -457,8 +457,9 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { ...@@ -457,8 +457,9 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) {
} else { } else {
// NOTE: once refcount is decrease, pNode may be freed by other thread immediately. // NOTE: once refcount is decrease, pNode may be freed by other thread immediately.
int32_t ref = T_REF_DEC(pNode); int32_t ref = T_REF_DEC(pNode);
uDebug("cache:%s, key:%p, %p is released, refcnt:%d, in trashcan:%d", pCacheObj->name, pNode->key, pNode->data, ref,
inTrashCan); uDebug("cache:%s, key:%p, %p released, refcnt:%d, data in trancan:%d", pCacheObj->name, pNode->key, pNode->data,
ref, inTrashCan);
} }
} }
...@@ -572,6 +573,7 @@ void taosRemoveFromTrashCan(SCacheObj *pCacheObj, STrashElem *pElem) { ...@@ -572,6 +573,7 @@ void taosRemoveFromTrashCan(SCacheObj *pCacheObj, STrashElem *pElem) {
free(pElem); free(pElem);
} }
// TODO add another lock when scanning trashcan
void taosTrashCanEmpty(SCacheObj *pCacheObj, bool force) { void taosTrashCanEmpty(SCacheObj *pCacheObj, bool force) {
__cache_wr_lock(pCacheObj); __cache_wr_lock(pCacheObj);
...@@ -643,6 +645,7 @@ static void doCacheRefresh(SCacheObj* pCacheObj, int64_t time, __cache_free_fn_t ...@@ -643,6 +645,7 @@ static void doCacheRefresh(SCacheObj* pCacheObj, int64_t time, __cache_free_fn_t
__cache_wr_lock(pCacheObj); __cache_wr_lock(pCacheObj);
while (taosHashIterNext(pIter)) { while (taosHashIterNext(pIter)) {
SCacheDataNode *pNode = *(SCacheDataNode **)taosHashIterGet(pIter); SCacheDataNode *pNode = *(SCacheDataNode **)taosHashIterGet(pIter);
if (pNode->expireTime < time && T_REF_VAL_GET(pNode) <= 0) { if (pNode->expireTime < time && T_REF_VAL_GET(pNode) <= 0) {
taosCacheReleaseNode(pCacheObj, pNode); taosCacheReleaseNode(pCacheObj, pNode);
continue; continue;
...@@ -674,6 +677,7 @@ void* taosCacheTimedRefresh(void *handle) { ...@@ -674,6 +677,7 @@ void* taosCacheTimedRefresh(void *handle) {
// check if current cache object will be deleted every 500ms. // check if current cache object will be deleted every 500ms.
if (pCacheObj->deleting) { if (pCacheObj->deleting) {
uDebug("%s refresh threads quit", pCacheObj->name);
break; break;
} }
......
#include "taosdef.h" #include "taosdef.h"
#include "tcompare.h" #include "tcompare.h"
#include <tarray.h> #include "tarray.h"
#include "tutil.h" #include "tutil.h"
int32_t compareInt32Val(const void *pLeft, const void *pRight) { int32_t compareInt32Val(const void *pLeft, const void *pRight) {
......
...@@ -76,6 +76,7 @@ int tdListPrepend(SList *list, void *data) { ...@@ -76,6 +76,7 @@ int tdListPrepend(SList *list, void *data) {
SListNode *node = (SListNode *)malloc(sizeof(SListNode) + list->eleSize); SListNode *node = (SListNode *)malloc(sizeof(SListNode) + list->eleSize);
if (node == NULL) return -1; if (node == NULL) return -1;
node->next = node->prev = NULL;
memcpy((void *)(node->data), data, list->eleSize); memcpy((void *)(node->data), data, list->eleSize);
tdListPrependNode(list, node); tdListPrependNode(list, node);
...@@ -121,6 +122,22 @@ SListNode *tdListPopTail(SList *list) { ...@@ -121,6 +122,22 @@ SListNode *tdListPopTail(SList *list) {
return node; return node;
} }
SListNode *tdListGetHead(SList *list) {
if (list == NULL || list->numOfEles == 0) {
return NULL;
}
return list->head;
}
SListNode *tsListGetTail(SList *list) {
if (list == NULL || list->numOfEles == 0) {
return NULL;
}
return list->tail;
}
SListNode *tdListPopNode(SList *list, SListNode *node) { SListNode *tdListPopNode(SList *list, SListNode *node) {
if (list->head == node) { if (list->head == node) {
list->head = node->next; list->head = node->next;
......
...@@ -71,11 +71,45 @@ static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle) { ...@@ -71,11 +71,45 @@ static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle) {
pRead->rpcMsg.msgType = TSDB_MSG_TYPE_QUERY; pRead->rpcMsg.msgType = TSDB_MSG_TYPE_QUERY;
pRead->pCont = qhandle; pRead->pCont = qhandle;
pRead->contLen = 0; pRead->contLen = 0;
pRead->rpcMsg.handle = NULL;
atomic_add_fetch_32(&pVnode->refCount, 1); atomic_add_fetch_32(&pVnode->refCount, 1);
taosWriteQitem(pVnode->rqueue, TAOS_QTYPE_QUERY, pRead); taosWriteQitem(pVnode->rqueue, TAOS_QTYPE_QUERY, pRead);
} }
static int32_t vnodeDumpQueryResult(SRspRet *pRet, void* pVnode, void* handle, bool* freeHandle) {
bool continueExec = false;
int32_t code = TSDB_CODE_SUCCESS;
if ((code = qDumpRetrieveResult(handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) {
if (continueExec) {
vDebug("QInfo:%p add to query task queue for exec", handle);
vnodePutItemIntoReadQueue(pVnode, handle);
pRet->qhandle = handle;
*freeHandle = false;
} else {
vDebug("QInfo:%p exec completed", handle);
*freeHandle = true;
}
} else {
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
*freeHandle = true;
}
return code;
}
static void vnodeBuildNoResultQueryRsp(SRspRet* pRet) {
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
pRet->len = sizeof(SRetrieveTableRsp);
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
SRetrieveTableRsp* pRsp = pRet->rsp;
pRsp->completed = true;
}
static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
void *pCont = pReadMsg->pCont; void *pCont = pReadMsg->pCont;
int32_t contLen = pReadMsg->contLen; int32_t contLen = pReadMsg->contLen;
...@@ -98,6 +132,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { ...@@ -98,6 +132,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
vWarn("QInfo:%p invalid qhandle, no matched query handle, conn:%p", (void*) killQueryMsg->qhandle, pReadMsg->rpcMsg.handle); vWarn("QInfo:%p invalid qhandle, no matched query handle, conn:%p", (void*) killQueryMsg->qhandle, pReadMsg->rpcMsg.handle);
} else { } else {
assert(*qhandle == (void*) killQueryMsg->qhandle); assert(*qhandle == (void*) killQueryMsg->qhandle);
qKillQuery(*qhandle); qKillQuery(*qhandle);
qReleaseQInfo(pVnode->qMgmt, (void**) &qhandle, true); qReleaseQInfo(pVnode->qMgmt, (void**) &qhandle, true);
} }
...@@ -110,7 +145,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { ...@@ -110,7 +145,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
if (contLen != 0) { if (contLen != 0) {
qinfo_t pQInfo = NULL; qinfo_t pQInfo = NULL;
code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, pVnode, &pQInfo); code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, &pQInfo);
SQueryTableRsp *pRsp = (SQueryTableRsp *) rpcMallocCont(sizeof(SQueryTableRsp)); SQueryTableRsp *pRsp = (SQueryTableRsp *) rpcMallocCont(sizeof(SQueryTableRsp));
pRsp->code = code; pRsp->code = code;
...@@ -133,7 +168,6 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { ...@@ -133,7 +168,6 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
pRsp->qhandle = htobe64((uint64_t) pQInfo); pRsp->qhandle = htobe64((uint64_t) pQInfo);
} }
pQInfo = NULL;
if (handle != NULL && vnodeNotifyCurrentQhandle(pReadMsg->rpcMsg.handle, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) { if (handle != NULL && vnodeNotifyCurrentQhandle(pReadMsg->rpcMsg.handle, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) {
vError("vgId:%d, QInfo:%p, query discarded since link is broken, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle); vError("vgId:%d, QInfo:%p, query discarded since link is broken, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle);
pRsp->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; pRsp->code = TSDB_CODE_RPC_NETWORK_UNAVAIL;
...@@ -153,16 +187,34 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { ...@@ -153,16 +187,34 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
} else { } else {
assert(pCont != NULL); assert(pCont != NULL);
handle = qAcquireQInfo(pVnode->qMgmt, (uint64_t) pCont); handle = qAcquireQInfo(pVnode->qMgmt, (uint64_t) pCont);
if (handle == NULL) { if (handle == NULL) {
vWarn("QInfo:%p invalid qhandle in continuing exec query, conn:%p", (void*) pCont, pReadMsg->rpcMsg.handle); vWarn("QInfo:%p invalid qhandle in continuing exec query, conn:%p", (void*) pCont, pReadMsg->rpcMsg.handle);
code = TSDB_CODE_QRY_INVALID_QHANDLE; code = TSDB_CODE_QRY_INVALID_QHANDLE;
} else { } else {
vDebug("vgId:%d, QInfo:%p, dnode continue exec query", pVnode->vgId, (void*) pCont); vDebug("vgId:%d, QInfo:%p, dnode continue exec query", pVnode->vgId, (void*) pCont);
code = TSDB_CODE_VND_ACTION_IN_PROGRESS;
qTableQuery(*handle); // do execute query bool freehandle = false;
bool buildRes = qTableQuery(*handle); // do execute query
// build query rsp
if (buildRes) {
// update the connection info according to the retrieve connection
pReadMsg->rpcMsg.handle = qGetResultRetrieveMsg(*handle);
assert(pReadMsg->rpcMsg.handle != NULL);
vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle);
code = vnodeDumpQueryResult(&pReadMsg->rspRet, pVnode, *handle, &freehandle);
// todo test the error code case
if (code == TSDB_CODE_SUCCESS) {
code = TSDB_CODE_QRY_HAS_RSP;
}
}
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freehandle);
} }
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false);
} }
return code; return code;
...@@ -176,7 +228,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { ...@@ -176,7 +228,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
pRetrieve->qhandle = htobe64(pRetrieve->qhandle); pRetrieve->qhandle = htobe64(pRetrieve->qhandle);
pRetrieve->free = htons(pRetrieve->free); pRetrieve->free = htons(pRetrieve->free);
vDebug("vgId:%d, QInfo:%p, retrieve msg is disposed", pVnode->vgId, (void*) pRetrieve->qhandle); vDebug("vgId:%d, QInfo:%p, retrieve msg is disposed, free:%d, conn:%p", pVnode->vgId, (void*) pRetrieve->qhandle, pRetrieve->free, pReadMsg->rpcMsg.handle);
memset(pRet, 0, sizeof(SRspRet)); memset(pRet, 0, sizeof(SRspRet));
...@@ -185,16 +237,8 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { ...@@ -185,16 +237,8 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
if (handle == NULL || (*handle) != (void*) pRetrieve->qhandle) { if (handle == NULL || (*handle) != (void*) pRetrieve->qhandle) {
code = TSDB_CODE_QRY_INVALID_QHANDLE; code = TSDB_CODE_QRY_INVALID_QHANDLE;
vDebug("vgId:%d, invalid qhandle in fetch result, QInfo:%p", pVnode->vgId, (void*) pRetrieve->qhandle); vDebug("vgId:%d, invalid qhandle in fetch result, QInfo:%p", pVnode->vgId, (void*) pRetrieve->qhandle);
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); vnodeBuildNoResultQueryRsp(pRet);
pRet->len = sizeof(SRetrieveTableRsp);
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
SRetrieveTableRsp* pRsp = pRet->rsp;
pRsp->numOfRows = 0;
pRsp->useconds = 0;
pRsp->completed = true;
return code; return code;
} }
...@@ -203,35 +247,25 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { ...@@ -203,35 +247,25 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
qKillQuery(*handle); qKillQuery(*handle);
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true);
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); vnodeBuildNoResultQueryRsp(pRet);
pRet->len = sizeof(SRetrieveTableRsp);
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
SRetrieveTableRsp* pRsp = pRet->rsp;
pRsp->numOfRows = 0;
pRsp->completed = true;
pRsp->useconds = 0;
return code; return code;
} }
bool freeHandle = true; bool freeHandle = true;
code = qRetrieveQueryResultInfo(*handle); bool buildRes = false;
code = qRetrieveQueryResultInfo(*handle, &buildRes, pReadMsg->rpcMsg.handle);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
//TODO handle malloc failure //TODO handle malloc failure
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
} else { // if failed to dump result, free qhandle immediately } else { // result is not ready, return immediately
if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len)) == TSDB_CODE_SUCCESS) { if (!buildRes) {
if (qHasMoreResultsToRetrieve(*handle)) { qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false);
vnodePutItemIntoReadQueue(pVnode, *handle); return TSDB_CODE_QRY_NOT_READY;
pRet->qhandle = *handle;
freeHandle = false;
} else {
qKillQuery(*handle);
freeHandle = true;
}
} }
code = vnodeDumpQueryResult(pRet, pVnode, *handle, &freeHandle);
} }
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle); qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle);
......
...@@ -93,6 +93,8 @@ run general/parser/groupby.sim ...@@ -93,6 +93,8 @@ run general/parser/groupby.sim
sleep 2000 sleep 2000
run general/parser/tags_filter.sim run general/parser/tags_filter.sim
sleep 2000 sleep 2000
run general/parser/topbot.sim
sleep 2000
run general/parser/union.sim run general/parser/union.sim
sleep 2000 sleep 2000
run general/parser/sliding.sim run general/parser/sliding.sim
......
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/cfg.sh -n dnode1 -c walLevel -v 0
system sh/exec.sh -n dnode1 -s start
sleep 3000
sql connect
$dbPrefix = tb_db
$tbPrefix = tb_tb
$stbPrefix = tb_stb
$tbNum = 10
$rowNum = 1000
$totalNum = $tbNum * $rowNum
$loops = 200000
$log = 10000
$ts0 = 1537146000000
$delta = 600000
print ========== topbot.sim
$i = 0
$db = $dbPrefix . $i
$stb = $stbPrefix . $i
sql drop database $db -x step1
step1:
sql create database $db cache 16 maxtables 200
print ====== create tables
sql use $db
sql create table $stb (ts timestamp, c1 int, c2 bigint, c3 float, c4 double, c5 smallint, c6 tinyint, c7 bool, c8 binary(10), c9 nchar(10)) tags(t1 int)
$i = 0
$ts = $ts0
$halfNum = $tbNum / 2
while $i < $halfNum
$tbId = $i + $halfNum
$tb = $tbPrefix . $i
$tb1 = $tbPrefix . $tbId
sql create table $tb using $stb tags( $i )
sql create table $tb1 using $stb tags( $tbId )
$x = 0
while $x < $rowNum
$xs = $x * $delta
$ts = $ts0 + $xs
$c = $x / 10
$c = $c * 10
$c = $x - $c
$binary = 'binary . $c
$binary = $binary . '
$nchar = 'nchar . $c
$nchar = $nchar . '
sql insert into $tb values ( $ts , $c , $c , $c , $c , $c , $c , true, $binary , $nchar )
sql insert into $tb1 values ( $ts , $c , NULL , $c , NULL , $c , $c , true, $binary , $nchar )
$x = $x + 1
endw
$i = $i + 1
endw
print ====== tables created
sql use $db
##### select from table
print ====== select top/bot from table and check num of rows returned
sql select top(c1, 100) from tb_stb0
if $row != 100 then
return -1
endi
sql select last(c2) from tb_tb9
if $row != 1 then
return -1
endi
system sh/exec.sh -n dnode1 -s stop -x SIGINT
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册