diff --git a/src/client/inc/tsclient.h b/src/client/inc/tsclient.h index 17840df4a4062002b54f4aea5293427a5a7a1c3b..7efcd54cfda619d24e3457f1834ff77a8139828d 100644 --- a/src/client/inc/tsclient.h +++ b/src/client/inc/tsclient.h @@ -31,8 +31,8 @@ extern "C" { #include "tutil.h" #include "qExecutor.h" +#include "qSqlparser.h" #include "qTsbuf.h" -#include "qsqlparser.h" #include "tcmdtype.h" // forward declaration diff --git a/src/client/src/tscAsync.c b/src/client/src/tscAsync.c index 9dd33e03cb21d8dfc807a8d8e1f91042621c7633..85cff4ba17859fe0ca48ce1e5b9a280765e0c79a 100644 --- a/src/client/src/tscAsync.c +++ b/src/client/src/tscAsync.c @@ -430,7 +430,7 @@ void tscTableMetaCallBack(void *param, TAOS_RES *res, int code) { pRes->code = code; if (code != TSDB_CODE_SUCCESS) { - tscError("%p ge tableMeta failed, code:%s", pSql, tstrerror(code)); + tscError("%p get tableMeta failed, code:%s", pSql, tstrerror(code)); goto _error; } else { tscDebug("%p get tableMeta successfully", pSql); diff --git a/src/client/src/tscFunctionImpl.c b/src/client/src/tscFunctionImpl.c index 72ccd5adc6ee835e9e25f9b9b8b2511cad797910..17f6c97ea16223d692d4b8d06c00856ab477d09b 100644 --- a/src/client/src/tscFunctionImpl.c +++ b/src/client/src/tscFunctionImpl.c @@ -2131,6 +2131,11 @@ static STopBotInfo *getTopBotOutputInfo(SQLFunctionCtx *pCtx) { } bool topbot_datablock_filter(SQLFunctionCtx *pCtx, int32_t functionId, const char *minval, const char *maxval) { + SResultInfo *pResInfo = GET_RES_INFO(pCtx); + if (pResInfo == NULL) { + return true; + } + STopBotInfo *pTopBotInfo = getTopBotOutputInfo(pCtx); // required number of results are not reached, continue load data block diff --git a/src/client/src/tscLocalMerge.c b/src/client/src/tscLocalMerge.c index 80fc82d90b1856753c4c88268c650147fed9e124..bf76b8cbe8a9fe31ef139db1a8b6f266a4b13c3b 100644 --- a/src/client/src/tscLocalMerge.c +++ b/src/client/src/tscLocalMerge.c @@ -691,9 +691,15 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr pModel = createColumnModel(pSchema, size, capacity); + int32_t pg = DEFAULT_PAGE_SIZE; + int32_t overhead = sizeof(tFilePage); + while((pg - overhead) < pModel->rowSize * 2) { + pg *= 2; + } + size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups; for (int32_t i = 0; i < numOfSubs; ++i) { - (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel); + (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pg, pModel); (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL; } diff --git a/src/client/src/tscSubquery.c b/src/client/src/tscSubquery.c index 5d26d09fae1dcc6c947375004703a9e6621ec629..95d65f4aff325acdf05a4ec30c867e60e53b8fb4 100644 --- a/src/client/src/tscSubquery.c +++ b/src/client/src/tscSubquery.c @@ -1505,12 +1505,11 @@ static int32_t tscReissueSubquery(SRetrieveSupport *trsupport, SSqlObj *pSql, in SSqlObj *pNew = tscCreateSqlObjForSubquery(trsupport->pParentSql, trsupport, pSql); - // todo add to async res or not?? if (pNew == NULL) { - tscError("%p sub:%p failed to create new subquery due to out of memory, abort retry, vgId:%d, orderOfSub:%d", - trsupport->pParentSql, pSql, pVgroup->vgId, trsupport->subqueryIndex); + tscError("%p sub:%p failed to create new subquery due to error:%s, abort retry, vgId:%d, orderOfSub:%d", + trsupport->pParentSql, pSql, tstrerror(terrno), pVgroup->vgId, trsupport->subqueryIndex); - pParentSql->res.code = TSDB_CODE_TSC_OUT_OF_MEMORY; + pParentSql->res.code = terrno; trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; return pParentSql->res.code; diff --git a/src/client/src/tscSystem.c b/src/client/src/tscSystem.c index bf1b67426089a91244f6e4d05863a6dfa5c90ee9..42356108b18dcbe6c78a619fd6f100ea493600c5 100644 --- a/src/client/src/tscSystem.c +++ b/src/client/src/tscSystem.c @@ -148,7 +148,7 @@ void taos_init_imp() { refreshTime = refreshTime < 10 ? 10 : refreshTime; if (tscCacheHandle == NULL) { - tscCacheHandle = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, NULL, "client"); + tscCacheHandle = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, NULL, "tableMeta"); } tscDebug("client is initialized successfully"); diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 17adc0c03d804280c43df0326a955cd29fd1aa60..e7fa2a84a9d23ca2e94a25679955c8536f6d483d 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -356,9 +356,9 @@ void tscPartiallyFreeSqlObj(SSqlObj* pSql) { // pSql->sqlstr will be used by tscBuildQueryStreamDesc if (pObj->signature == pObj) { - pthread_mutex_lock(&pObj->mutex); + //pthread_mutex_lock(&pObj->mutex); tfree(pSql->sqlstr); - pthread_mutex_unlock(&pObj->mutex); + //pthread_mutex_unlock(&pObj->mutex); } tscFreeSqlResult(pSql); @@ -1675,6 +1675,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void SSqlObj* pNew = (SSqlObj*)calloc(1, sizeof(SSqlObj)); if (pNew == NULL) { tscError("%p new subquery failed, tableIndex:%d", pSql, tableIndex); + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } @@ -1688,6 +1689,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex); free(pNew); + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } @@ -1706,6 +1708,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void if (tscAddSubqueryInfo(pnCmd) != TSDB_CODE_SUCCESS) { tscFreeSqlObj(pNew); + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } @@ -1743,6 +1746,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void if (tscAllocPayload(pnCmd, TSDB_DEFAULT_PAYLOAD_SIZE) != TSDB_CODE_SUCCESS) { tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex); tscFreeSqlObj(pNew); + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } @@ -1827,8 +1831,16 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void } if (pFinalInfo->pTableMeta == NULL) { - tscError("%p new subquery failed for get tableMeta is NULL from cache", pSql); + tscError("%p new subquery failed since no tableMeta in cache, name:%s", pSql, name); tscFreeSqlObj(pNew); + + if (pPrevSql != NULL) { + assert(pPrevSql->res.code != TSDB_CODE_SUCCESS); + terrno = pPrevSql->res.code; + } else { + terrno = TSDB_CODE_TSC_APP_ERROR; + } + return NULL; } diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index bbea1a5e0bb52a56cfbf5a23dd29a17e094586a9..cb53bb5e60b1d4f0316c776644e7582af0579ebe 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -49,7 +49,7 @@ static taos_qset readQset; int32_t dnodeInitVnodeRead() { readQset = taosOpenQset(); - readPool.min = 2; + readPool.min = tsNumOfCores; readPool.max = tsNumOfCores * tsNumOfThreadsPerCore; if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min; readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max); @@ -206,10 +206,14 @@ static void *dnodeProcessReadQueue(void *param) { taosMsg[pReadMsg->rpcMsg.msgType], type); int32_t code = vnodeProcessRead(pVnode, pReadMsg); - if (type == TAOS_QTYPE_RPC) { + if (type == TAOS_QTYPE_RPC && code != TSDB_CODE_QRY_NOT_READY) { dnodeSendRpcReadRsp(pVnode, pReadMsg, code); } else { - dnodeDispatchNonRspMsg(pVnode, pReadMsg, code); + if (code == TSDB_CODE_QRY_HAS_RSP) { + dnodeSendRpcReadRsp(pVnode, pReadMsg, TSDB_CODE_SUCCESS); + } else { + dnodeDispatchNonRspMsg(pVnode, pReadMsg, code); + } } taosFreeQitem(pReadMsg); diff --git a/src/inc/query.h b/src/inc/query.h index d201b649f9de6aa55e0b9f1e0aa8c9aff11a6092..ccff05bc1be0bca05e83b9ad89a4ded7f018867a 100644 --- a/src/inc/query.h +++ b/src/inc/query.h @@ -28,7 +28,7 @@ typedef void* qinfo_t; * @param qinfo * @return */ -int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMsg, void* param, qinfo_t* qinfo); +int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMsg, qinfo_t* qinfo); /** @@ -38,7 +38,10 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMs * @param qinfo * @return */ -void qTableQuery(qinfo_t qinfo); +bool qTableQuery(qinfo_t qinfo); + +void* pGetRspMsg(qinfo_t qinfo); + /** * Retrieve the produced results information, if current query is not paused or completed, @@ -48,7 +51,7 @@ void qTableQuery(qinfo_t qinfo); * @param qinfo * @return */ -int32_t qRetrieveQueryResultInfo(qinfo_t qinfo); +int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext); /** * @@ -60,16 +63,9 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo); * @param contLen payload length * @return */ -int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen); +int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec); -/** - * Decide if more results will be produced or not, NOTE: this function will increase the ref count of QInfo, - * so it can be only called once for each retrieve - * - * @param qinfo - * @return - */ -bool qHasMoreResultsToRetrieve(qinfo_t qinfo); +void* qGetResultRetrieveMsg(qinfo_t qinfo); /** * kill current ongoing query and free query handle automatically diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index 1f03f1fe097930cf2fff8b30a0bfbb81515e2fb2..bb3a6ee78e197f1bddbb661cd9d7522b79d54a58 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -216,6 +216,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_QRY_OUT_OF_MEMORY, 0, 0x0703, "query out TAOS_DEFINE_ERROR(TSDB_CODE_QRY_APP_ERROR, 0, 0x0704, "query app error") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_DUP_JOIN_KEY, 0, 0x0705, "query duplicated join key") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_EXCEED_TAGS_LIMIT, 0, 0x0706, "query tag conditon too many") +TAOS_DEFINE_ERROR(TSDB_CODE_QRY_NOT_READY, 0, 0x0707, "query not ready") +TAOS_DEFINE_ERROR(TSDB_CODE_QRY_HAS_RSP, 0, 0x0708, "query should response") // grant TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_EXPIRED, 0, 0x0800, "grant expired") diff --git a/src/mnode/src/mnodeProfile.c b/src/mnode/src/mnodeProfile.c index 9121f31131bc0cafa85830409bf9d40390da6790..30a292f522cb0fa380a30b2c990b2858f9242adf 100644 --- a/src/mnode/src/mnodeProfile.c +++ b/src/mnode/src/mnodeProfile.c @@ -68,7 +68,7 @@ int32_t mnodeInitProfile() { mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_STREAM, mnodeProcessKillStreamMsg); mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_CONN, mnodeProcessKillConnectionMsg); - tsMnodeConnCache = taosCacheInit(TSDB_DATA_TYPE_INT, CONN_CHECK_TIME, false, mnodeFreeConn, "conn"); + tsMnodeConnCache = taosCacheInit(TSDB_DATA_TYPE_INT, CONN_CHECK_TIME, true, mnodeFreeConn, "conn"); return 0; } @@ -119,7 +119,7 @@ SConnObj *mnodeAccquireConn(int32_t connId, char *user, uint32_t ip, uint16_t po return NULL; } - if (pConn->ip != ip || pConn->port != port /* || strcmp(pConn->user, user) != 0 */) { + if (/* pConn->ip != ip || */ pConn->port != port /* || strcmp(pConn->user, user) != 0 */) { mError("connId:%d, incoming conn user:%s ip:%s:%u, not match exist conn user:%s ip:%s:%u", connId, user, taosIpStr(ip), port, pConn->user, taosIpStr(pConn->ip), pConn->port); taosCacheRelease(tsMnodeConnCache, (void **)&pConn, false); diff --git a/src/plugins/http/src/httpContext.c b/src/plugins/http/src/httpContext.c index 225977abae698ccfeb12c36562d5588a3f166027..ca65f6560816935ae3e453d97093402b90d166e9 100644 --- a/src/plugins/http/src/httpContext.c +++ b/src/plugins/http/src/httpContext.c @@ -58,7 +58,7 @@ static void httpDestroyContext(void *data) { } bool httpInitContexts() { - tsHttpServer.contextCache = taosCacheInit(TSDB_DATA_TYPE_BIGINT, 2, false, httpDestroyContext, "restc"); + tsHttpServer.contextCache = taosCacheInit(TSDB_DATA_TYPE_BIGINT, 2, true, httpDestroyContext, "restc"); if (tsHttpServer.contextCache == NULL) { httpError("failed to init context cache"); return false; diff --git a/src/query/inc/qExecutor.h b/src/query/inc/qExecutor.h index 42dfce42a6f893af32c5175bc52a094658c9a3ab..bd2e0a4470a4c067a4f1da1af9642351e6391d9a 100644 --- a/src/query/inc/qExecutor.h +++ b/src/query/inc/qExecutor.h @@ -20,8 +20,8 @@ #include "hash.h" #include "qFill.h" #include "qResultbuf.h" +#include "qSqlparser.h" #include "qTsbuf.h" -#include "qsqlparser.h" #include "query.h" #include "taosdef.h" #include "tarray.h" @@ -43,7 +43,7 @@ typedef struct SSqlGroupbyExpr { typedef struct SPosInfo { int32_t pageId; - int16_t rowId; + int32_t rowId; } SPosInfo; typedef struct SWindowStatus { @@ -177,13 +177,18 @@ typedef struct SQueryRuntimeEnv { SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file } SQueryRuntimeEnv; +enum { + QUERY_RESULT_NOT_READY = 1, + QUERY_RESULT_READY = 2, +}; + typedef struct SQInfo { void* signature; int32_t pointsInterpo; int32_t code; // error code to returned to client - sem_t dataReady; +// sem_t dataReady; + void* tsdb; - void* param; int32_t vgId; STableGroupInfo tableGroupInfo; // table id list < only includes the STable list> STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray structure @@ -200,8 +205,11 @@ typedef struct SQInfo { */ int32_t tableIndex; int32_t numOfGroupResultPages; - void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables; + void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables; + pthread_mutex_t lock; // used to synchronize the rsp/query threads + int32_t dataReady; // denote if query result is ready or not + void* rspContext; // response context } SQInfo; #endif // TDENGINE_QUERYEXECUTOR_H diff --git a/src/query/inc/qExtbuffer.h b/src/query/inc/qExtbuffer.h index b57c48933f0878ca86384219b1c7f66446db28eb..36fc0c9820a8338973e718c473e7619da1990ad8 100644 --- a/src/query/inc/qExtbuffer.h +++ b/src/query/inc/qExtbuffer.h @@ -19,7 +19,6 @@ extern "C" { #endif - #include "os.h" #include "taosmsg.h" @@ -28,9 +27,9 @@ extern "C" { #include "tdataformat.h" #include "talgo.h" -#define DEFAULT_PAGE_SIZE (1024L*4) // 16k larger than the SHistoInfo -#define MAX_TMPFILE_PATH_LENGTH PATH_MAX +#define MAX_TMPFILE_PATH_LENGTH PATH_MAX #define INITIAL_ALLOCATION_BUFFER_SIZE 64 +#define DEFAULT_PAGE_SIZE (4096L) // 16k larger than the SHistoInfo typedef enum EXT_BUFFER_FLUSH_MODEL { /* @@ -126,7 +125,7 @@ typedef struct tExtMemBuffer { * @param pModel * @return */ -tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel); +tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, int32_t pagesize, SColumnModel *pModel); /** * diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index 8c8afb0957c862042e1da99e211ed06e091dee4a..5303251d981f9e12bb92088b65cf6231d6e45f88 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -13,50 +13,85 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODEQUERYUTIL_H -#define TDENGINE_VNODEQUERYUTIL_H +#ifndef TDENGINE_QRESULTBUF_H +#define TDENGINE_QRESULTBUF_H #ifdef __cplusplus extern "C" { #endif +#include #include "hash.h" #include "os.h" #include "qExtbuffer.h" +#include "tlockfree.h" typedef struct SArray* SIDList; +typedef struct SPageDiskInfo { + int32_t offset; + int32_t length; +} SPageDiskInfo; + +typedef struct SPageInfo { + SListNode* pn; // point to list node + int32_t pageId; + SPageDiskInfo info; + void* pData; + bool used; // set current page is in used +} SPageInfo; + +typedef struct SFreeListItem { + int32_t offset; + int32_t len; +} SFreeListItem; + +typedef struct SResultBufStatis { + int32_t flushBytes; + int32_t loadBytes; + int32_t getPages; + int32_t releasePages; + int32_t flushPages; +} SResultBufStatis; + typedef struct SDiskbasedResultBuf { int32_t numOfRowsPerPage; int32_t numOfPages; int64_t totalBufSize; - int32_t fd; // data file fd + int64_t fileSize; // disk file size + FILE* file; int32_t allocateId; // allocated page id - int32_t incStep; // minimum allocated pages - void* pBuf; // mmap buffer pointer char* path; // file path int32_t pageSize; // current used page size int32_t inMemPages; // numOfPages that are allocated in memory - SHashObj* idsTable; // id hash table - SIDList list; // for each id, there is a page id list - - void* iBuf; // inmemory buf - void* handle; // for debug purpose + SHashObj* groupSet; // id hash table + SHashObj* all; + SList* lruList; void* emptyDummyIdList; // dummy id list + void* assistBuf; // assistant buffer for compress/decompress data + SArray* pFree; // free area in file + bool comp; // compressed before flushed to disk + int32_t nextPos; // next page flush position + + const void* handle; // for debug purpose + SResultBufStatis statis; } SDiskbasedResultBuf; -#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) +#define DEFAULT_INTERN_BUF_PAGE_SIZE (4096L) #define DEFAULT_INMEM_BUF_PAGES 10 +#define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1} /** * create disk-based result buffer * @param pResultBuf - * @param size * @param rowSize + * @param pagesize + * @param inMemPages + * @param handle * @return */ -int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, int32_t pagesize, - int32_t inMemPages, void* handle); +int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t rowSize, int32_t pagesize, + int32_t inMemBufSize, const void* handle); /** * @@ -72,7 +107,7 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32 * @param pResultBuf * @return */ -int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf); +size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf); /** * @@ -88,42 +123,52 @@ SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId); * @param id * @return */ -static FORCE_INLINE tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { - if (id < pResultBuf->inMemPages) { - return (tFilePage*) ((char*) pResultBuf->iBuf + id * pResultBuf->pageSize); - } else { - return (tFilePage*) ((char*) pResultBuf->pBuf + (id - pResultBuf->inMemPages) * pResultBuf->pageSize); - } -} +tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); + +/** + * release the referenced buf pages + * @param pResultBuf + * @param page + */ +void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page); + +/** + * + * @param pResultBuf + * @param pi + */ +void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi); + + /** * get the total buffer size in the format of disk file * @param pResultBuf * @return */ -int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf); +size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf); /** * get the number of groups in the result buffer * @param pResultBuf * @return */ -int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf); +size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf); /** * destroy result buffer * @param pResultBuf */ -void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle); +void destroyResultBuf(SDiskbasedResultBuf* pResultBuf); /** * * @param pList * @return */ -int32_t getLastPageId(SIDList pList); +SPageInfo* getLastPageInfo(SIDList pList); #ifdef __cplusplus } #endif -#endif // TDENGINE_VNODEQUERYUTIL_H +#endif // TDENGINE_QRESULTBUF_H diff --git a/src/query/inc/qsqlparser.h b/src/query/inc/qSqlparser.h similarity index 100% rename from src/query/inc/qsqlparser.h rename to src/query/inc/qSqlparser.h diff --git a/src/query/inc/qUtil.h b/src/query/inc/qUtil.h index 7119cb75fe9f6604e42a96406ff631840477d5f7..ed7c7e8845317ac754758b8553e3f65ba8da4544 100644 --- a/src/query/inc/qUtil.h +++ b/src/query/inc/qUtil.h @@ -45,13 +45,14 @@ bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot); int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize); -static FORCE_INLINE char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) { +static FORCE_INLINE char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult, + tFilePage* page) { assert(pResult != NULL && pRuntimeEnv != NULL); SQuery *pQuery = pRuntimeEnv->pQuery; - tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); - int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery); +// tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); + int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery); return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage + pQuery->pSelectExpr[columnIndex].bytes * realRowId; } diff --git a/src/query/src/qAst.c b/src/query/src/qAst.c index c2578c15c0536ea222f376e833a8fe11fa269229..e3c0c1dbb0bc1e5856094c058619fa2b5cca149f 100644 --- a/src/query/src/qAst.c +++ b/src/query/src/qAst.c @@ -18,8 +18,8 @@ #include "exception.h" #include "qAst.h" +#include "qSqlparser.h" #include "qSyntaxtreefunction.h" -#include "qsqlparser.h" #include "taosdef.h" #include "taosmsg.h" #include "tarray.h" diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 8afc2fd87a988d32f4311af876bb57e627eecf30..ea0312b875567cf120256da548217849237123d1 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -221,7 +221,7 @@ void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) { } static int32_t getGroupResultId(int32_t groupIndex) { - int32_t base = 200000; + int32_t base = 20000000; return base + (groupIndex * 10000); } @@ -478,10 +478,14 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult if (taosArrayGetSize(list) == 0) { pData = getNewDataBuf(pResultBuf, sid, &pageId); } else { - pageId = getLastPageId(list); - pData = getResBufPage(pResultBuf, pageId); + SPageInfo* pi = getLastPageInfo(list); + pData = getResBufPage(pResultBuf, pi->pageId); + pageId = pi->pageId; if (pData->num >= numOfRowsPerPage) { + // release current page first, and prepare the next one + releaseResBufPageInfo(pResultBuf, pi); + pData = getNewDataBuf(pResultBuf, sid, &pageId); if (pData != NULL) { assert(pData->num == 0); // number of elements must be 0 for new allocated buffer @@ -497,6 +501,8 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult if (pWindowRes->pos.pageId == -1) { // not allocated yet, allocate new buffer pWindowRes->pos.pageId = pageId; pWindowRes->pos.rowId = pData->num++; + + assert(pWindowRes->pos.pageId >= 0); } return 0; @@ -1490,8 +1496,6 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order goto _clean; } - qDebug("QInfo:%p setup runtime env1", GET_QINFO_ADDR(pRuntimeEnv)); - pRuntimeEnv->offset[0] = 0; for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base; @@ -1536,8 +1540,6 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order } } - qDebug("QInfo:%p setup runtime env2", GET_QINFO_ADDR(pRuntimeEnv)); - // set the order information for top/bottom query int32_t functionId = pCtx->functionId; @@ -1558,25 +1560,19 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order } } - qDebug("QInfo:%p setup runtime env3", GET_QINFO_ADDR(pRuntimeEnv)); - char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput; // set the intermediate result output buffer setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf); - qDebug("QInfo:%p setup runtime env4", GET_QINFO_ADDR(pRuntimeEnv)); - // if it is group by normal column, do not set output buffer, the output buffer is pResult if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) { resetCtxOutputBuf(pRuntimeEnv); } - qDebug("QInfo:%p setup runtime env5", GET_QINFO_ADDR(pRuntimeEnv)); - setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx); - qDebug("QInfo:%p init completed", GET_QINFO_ADDR(pRuntimeEnv)); + qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv)); return TSDB_CODE_SUCCESS; _clean: @@ -1615,7 +1611,7 @@ static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) { pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo); - destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo); + destroyResultBuf(pRuntimeEnv->pResultBuf); tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle); tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle); @@ -2111,9 +2107,6 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, } if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) { -#if defined(_DEBUG_VIEW) - qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv)); -#endif // current block has been discard due to filter applied pRuntimeEnv->summary.discardBlocks += 1; qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv), @@ -2446,6 +2439,8 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes SQuery * pQuery = pRuntimeEnv->pQuery; SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId); + for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { int32_t functionId = pQuery->pSelectExpr[i].base.functionId; if (!mergeFlag) { @@ -2458,7 +2453,7 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes pCtx[i].hasNull = true; pCtx[i].nStartQueryTimestamp = timestamp; - pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes); + pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page); // in case of tag column, the tag information should be extracted from input buffer if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) { @@ -2615,14 +2610,16 @@ int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo; SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos); + tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId); - char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1); + char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1); TSKEY leftTimestamp = GET_INT64_VAL(b1); SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo; SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos); + tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId); - char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2); + char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2); TSKEY rightTimestamp = GET_INT64_VAL(b2); if (leftTimestamp == rightTimestamp) { @@ -2685,35 +2682,26 @@ void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) { int32_t id = getGroupResultId(pQInfo->groupIndex - 1); SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id); - int32_t total = 0; int32_t size = taosArrayGetSize(list); - for (int32_t i = 0; i < size; ++i) { - int32_t* pgId = taosArrayGet(list, i); - tFilePage *pData = getResBufPage(pResultBuf, *pgId); - total += pData->num; - } - - int32_t rows = total; int32_t offset = 0; for (int32_t j = 0; j < size; ++j) { - int32_t* pgId = taosArrayGet(list, j); - tFilePage *pData = getResBufPage(pResultBuf, *pgId); + SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j); + tFilePage *pData = getResBufPage(pResultBuf, pi->pageId); for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; char * pDest = pQuery->sdata[i]->data; - - memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num, - bytes * pData->num); + memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num, bytes * pData->num); } +// rows += pData->num; offset += pData->num; } assert(pQuery->rec.rows == 0); - pQuery->rec.rows += rows; + pQuery->rec.rows += offset; pQInfo->offset += 1; } @@ -2777,7 +2765,6 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) { assert(pQInfo->numOfGroupResultPages == 0); return 0; } else if (numOfTables == 1) { // no need to merge results since only one table in each group - } SCompSupporter cs = {pTableList, posList, pQInfo}; @@ -2802,8 +2789,9 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) { SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo; SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]); + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId); - char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes); + char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page); TSKEY ts = GET_INT64_VAL(b); assert(ts == pWindowRes->window.skey); @@ -3517,9 +3505,11 @@ void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult SQuery *pQuery = pRuntimeEnv->pQuery; // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); + for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); + pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page); int32_t functionId = pQuery->pSelectExpr[i].base.functionId; if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { @@ -3542,6 +3532,8 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult * SQuery *pQuery = pRuntimeEnv->pQuery; // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group + tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); + for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; @@ -3550,7 +3542,7 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult * continue; } - pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); + pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage); pCtx->currentStage = 0; int32_t functionId = pCtx->functionId; @@ -3713,11 +3705,13 @@ static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_ pQInfo->groupIndex += 1; } + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId); + for (int32_t j = 0; j < pQuery->numOfOutput; ++j) { int32_t size = pRuntimeEnv->pCtx[j].outputBytes; char *out = pQuery->sdata[j]->data + numOfResult * size; - char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]); + char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page); memcpy(out, in + oldOffset * size, size * numOfRowsToCopy); } @@ -4238,10 +4232,10 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo int32_t ps = DEFAULT_PAGE_SIZE; int32_t rowsize = 0; getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize); + int32_t TWOMB = 1024*1024*2; if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) { - int32_t numOfPages = getInitialPageNum(pQInfo); - code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, numOfPages, rowsize, ps, numOfPages, pQInfo); + code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -4269,8 +4263,7 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) { int32_t numOfResultRows = getInitialPageNum(pQInfo); getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize); - - code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, numOfResultRows, rowsize, ps, numOfResultRows, pQInfo); + code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -5894,16 +5887,11 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, } pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo)); + pQInfo->dataReady = QUERY_RESULT_NOT_READY; + pthread_mutex_init(&pQInfo->lock, NULL); pQuery->pos = -1; pQuery->window = pQueryMsg->window; - - if (sem_init(&pQInfo->dataReady, 0, 0) != 0) { - int32_t code = TAOS_SYSTEM_ERROR(errno); - qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code)); - goto _cleanup; - } - colIdCheck(pQuery); qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo); @@ -5945,7 +5933,7 @@ static bool isValidQInfo(void *param) { return (sig == (uint64_t)pQInfo); } -static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param) { +static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) { int32_t code = TSDB_CODE_SUCCESS; SQuery *pQuery = pQInfo->runtimeEnv.pQuery; @@ -5965,18 +5953,12 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ pQuery->window.ekey, pQuery->order.order); setQueryStatus(pQuery, QUERY_COMPLETED); pQInfo->tableqinfoGroupInfo.numOfTables = 0; - - sem_post(&pQInfo->dataReady); return TSDB_CODE_SUCCESS; } - pQInfo->param = param; - if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) { qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo); setQueryStatus(pQuery, QUERY_COMPLETED); - - sem_post(&pQInfo->dataReady); return TSDB_CODE_SUCCESS; } @@ -6018,7 +6000,6 @@ static void freeQInfo(SQInfo *pQInfo) { tfree(pQuery->sdata[col]); } - sem_destroy(&(pQInfo->dataReady)); teardownQueryRuntimeEnv(&pQInfo->runtimeEnv); for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) { @@ -6170,7 +6151,7 @@ typedef struct SQueryMgmt { pthread_mutex_t lock; } SQueryMgmt; -int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, qinfo_t* pQInfo) { +int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) { assert(pQueryMsg != NULL && tsdb != NULL); int32_t code = TSDB_CODE_SUCCESS; @@ -6266,7 +6247,7 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, vo goto _over; } - code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param); + code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery); _over: free(tagCond); @@ -6306,24 +6287,32 @@ void qDestroyQueryInfo(qinfo_t qHandle) { freeQInfo(pQInfo); } -void qTableQuery(qinfo_t qinfo) { +static void setQueryResultReady(SQInfo* pQInfo) { + pthread_mutex_lock(&pQInfo->lock); + pQInfo->dataReady = QUERY_RESULT_READY; + pthread_mutex_unlock(&pQInfo->lock); +} + +bool qTableQuery(qinfo_t qinfo) { SQInfo *pQInfo = (SQInfo *)qinfo; if (pQInfo == NULL || pQInfo->signature != pQInfo) { qDebug("QInfo:%p has been freed, no need to execute", pQInfo); - return; + return false; } if (IS_QUERY_KILLED(pQInfo)) { qDebug("QInfo:%p it is already killed, abort", pQInfo); - sem_post(&pQInfo->dataReady); - return; + setQueryResultReady(pQInfo); + return false; } if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) { + setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED); + setQueryResultReady(pQInfo); + qDebug("QInfo:%p no table exists for query, abort", pQInfo); - sem_post(&pQInfo->dataReady); - return; + return false; } // error occurs, record the error code and return to client @@ -6331,8 +6320,9 @@ void qTableQuery(qinfo_t qinfo) { if (ret != TSDB_CODE_SUCCESS) { pQInfo->code = ret; qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code)); - sem_post(&pQInfo->dataReady); - return; + + setQueryResultReady(pQInfo); + return false; } qDebug("QInfo:%p query task is launched", pQInfo); @@ -6357,10 +6347,20 @@ void qTableQuery(qinfo_t qinfo) { pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows); } - sem_post(&pQInfo->dataReady); + bool buildRes = false; + pthread_mutex_lock(&pQInfo->lock); + pQInfo->dataReady = QUERY_RESULT_READY; + + if (pQInfo->rspContext != NULL) { + buildRes = true; + } + + + pthread_mutex_unlock(&pQInfo->lock); + return buildRes; } -int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) { +int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) { SQInfo *pQInfo = (SQInfo *)qinfo; if (pQInfo == NULL || !isValidQInfo(pQInfo)) { @@ -6373,11 +6373,21 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) { return pQInfo->code; } - sem_wait(&pQInfo->dataReady); - qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows, - pQInfo->code); + int32_t code = TSDB_CODE_SUCCESS; + pthread_mutex_lock(&pQInfo->lock); + if (pQInfo->dataReady == QUERY_RESULT_READY) { + *buildRes = true; + qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows, + pQInfo->code); + } else { + *buildRes = false; + qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo); + pQInfo->rspContext = pRspContext; + } - return pQInfo->code; + code = pQInfo->code; + pthread_mutex_unlock(&pQInfo->lock); + return code; } bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { @@ -6389,6 +6399,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { } SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + bool ret = false; if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) { ret = false; @@ -6407,7 +6418,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { return ret; } -int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) { +int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) { SQInfo *pQInfo = (SQInfo *)qinfo; if (pQInfo == NULL || !isValidQInfo(pQInfo)) { @@ -6417,8 +6428,10 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; SQuery *pQuery = pQInfo->runtimeEnv.pQuery; size_t size = getResultSize(pQInfo, &pQuery->rec.rows); + size += sizeof(int32_t); size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo); + *contLen = size + sizeof(SRetrieveTableRsp); // todo proper handle failed to allocate memory, @@ -6427,6 +6440,7 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co if (*pRsp == NULL) { return TSDB_CODE_QRY_OUT_OF_MEMORY; } + (*pRsp)->numOfRows = htonl(pQuery->rec.rows); int32_t code = pQInfo->code; @@ -6434,8 +6448,8 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co (*pRsp)->offset = htobe64(pQuery->limit.offset); (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime); } else { - (*pRsp)->offset = 0; (*pRsp)->useconds = 0; + (*pRsp)->offset = 0; } (*pRsp)->precision = htons(pQuery->precision); @@ -6446,10 +6460,20 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co code = pQInfo->code; } + pQInfo->rspContext = NULL; + pQInfo->dataReady = QUERY_RESULT_NOT_READY; + if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) { (*pRsp)->completed = 1; // notify no more result to client } + if (qHasMoreResultsToRetrieve(pQInfo)) { + *continueExec = true; + } else { // failed to dump result, free qhandle immediately + *continueExec = false; + qKillQuery(pQInfo); + } + return code; } @@ -6460,7 +6484,7 @@ int32_t qKillQuery(qinfo_t qinfo) { return TSDB_CODE_QRY_INVALID_QHANDLE; } - sem_post(&pQInfo->dataReady); +// sem_post(&pQInfo->dataReady); setQueryKilled(pQInfo); return TSDB_CODE_SUCCESS; } @@ -6607,6 +6631,13 @@ static void buildTagQueryResult(SQInfo* pQInfo) { setQueryStatus(pQuery, QUERY_COMPLETED); } +void* qGetResultRetrieveMsg(qinfo_t qinfo) { + SQInfo* pQInfo = (SQInfo*) qinfo; + assert(pQInfo != NULL); + + return pQInfo->rspContext; +} + void freeqinfoFn(void *qhandle) { void** handle = qhandle; if (handle == NULL || *handle == NULL) { @@ -6618,19 +6649,21 @@ void freeqinfoFn(void *qhandle) { } void* qOpenQueryMgmt(int32_t vgId) { - const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool + const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool char cacheName[128] = {0}; sprintf(cacheName, "qhandle_%d", vgId); - SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt)); + SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt)); + + pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName); + pQueryMgmt->closed = false; + pQueryMgmt->vgId = vgId; - pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName); - pQueryHandle->closed = false; - pthread_mutex_init(&pQueryHandle->lock, NULL); + pthread_mutex_init(&pQueryMgmt->lock, NULL); qDebug("vgId:%d, open querymgmt success", vgId); - return pQueryHandle; + return pQueryMgmt; } static void queryMgmtKillQueryFn(void* handle) { @@ -6670,7 +6703,7 @@ void qCleanupQueryMgmt(void* pQMgmt) { pthread_mutex_destroy(&pQueryMgmt->lock); tfree(pQueryMgmt); - qDebug("vgId:%d querymgmt cleanup completed", vgId); + qDebug("vgId:%d queryMgmt cleanup completed", vgId); } void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) { @@ -6727,3 +6760,4 @@ void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) { return 0; } + diff --git a/src/query/src/qExtbuffer.c b/src/query/src/qExtbuffer.c index 69c5f0e24fe6361d41953c35fce1380b97d4e752..fb57f71199d8688fa8503790dab5551e7105ca3a 100644 --- a/src/query/src/qExtbuffer.c +++ b/src/query/src/qExtbuffer.c @@ -28,10 +28,10 @@ /* * SColumnModel is deeply copy */ -tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel) { +tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, int32_t pagesize, SColumnModel *pModel) { tExtMemBuffer* pMemBuffer = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer)); - pMemBuffer->pageSize = DEFAULT_PAGE_SIZE; + pMemBuffer->pageSize = pagesize; pMemBuffer->inMemCapacity = ALIGN8(inMemSize) / pMemBuffer->pageSize; pMemBuffer->nElemSize = elemSize; diff --git a/src/query/src/qParserImpl.c b/src/query/src/qParserImpl.c index ecc11f8f4d76769d0d52b03f44813349aed29adc..1e58dbbe0b75ee5ed2ddc0627b1590e314456b94 100644 --- a/src/query/src/qParserImpl.c +++ b/src/query/src/qParserImpl.c @@ -14,7 +14,7 @@ */ #include "os.h" -#include "qsqlparser.h" +#include "qSqlparser.h" #include "queryLog.h" #include "taosdef.h" #include "taosmsg.h" diff --git a/src/query/src/qPercentile.c b/src/query/src/qPercentile.c index c4490a01e79408ec2bf09527049082f9c05566c8..85e45e46b3eee0beef2babc893234aa0a5a0818f 100644 --- a/src/query/src/qPercentile.c +++ b/src/query/src/qPercentile.c @@ -535,7 +535,7 @@ void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) { if (pSeg->pBuffer[slotIdx] == NULL) { pSeg->pBuffer[slotIdx] = createExtMemBuffer(pBucket->numOfTotalPages * pBucket->pageSize, pBucket->nElemSize, - pBucket->pOrderDesc->pColumnModel); + pBucket->pageSize, pBucket->pOrderDesc->pColumnModel); pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL; pBucket->pOrderDesc->pColumnModel->capacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage; } diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index a3d3386a878cf69c9dae7ea089f36c28df57ed4f..93ccad8e27b8ec3fc16af70f3015f429c70a8af2 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -1,221 +1,439 @@ #include "qResultbuf.h" +#include "stddef.h" +#include "tscompression.h" #include "hash.h" #include "qExtbuffer.h" #include "queryLog.h" #include "taoserror.h" -int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, - int32_t pagesize, int32_t inMemPages, void* handle) { +#define GET_DATA_PAYLOAD(_p) ((_p)->pData + POINTER_BYTES) +int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t rowSize, int32_t pagesize, + int32_t inMemBufSize, const void* handle) { *pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf)); + SDiskbasedResultBuf* pResBuf = *pResultBuf; if (pResBuf == NULL) { return TSDB_CODE_COM_OUT_OF_MEMORY; } - pResBuf->pageSize = pagesize; - pResBuf->numOfPages = inMemPages; // all pages are in buffer in the first place - pResBuf->inMemPages = inMemPages; - assert(inMemPages <= numOfPages); - - pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize; + pResBuf->pageSize = pagesize; + pResBuf->numOfPages = 0; // all pages are in buffer in the first place + pResBuf->totalBufSize = 0; + pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit. + pResBuf->allocateId = -1; + pResBuf->comp = true; + pResBuf->file = NULL; + pResBuf->handle = handle; + pResBuf->fileSize = 0; - pResBuf->totalBufSize = pResBuf->numOfPages * pagesize; - pResBuf->incStep = 4; - pResBuf->allocateId = -1; + // at least more than 2 pages must be in memory + assert(inMemBufSize >= pagesize * 2); - pResBuf->iBuf = calloc(pResBuf->inMemPages, pResBuf->pageSize); + pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize; + pResBuf->lruList = tdListNew(POINTER_BYTES); // init id hash table - pResBuf->idsTable = taosHashInit(numOfPages, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); - pResBuf->list = taosArrayInit(numOfPages, POINTER_BYTES); + pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); + pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES + pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); char path[PATH_MAX] = {0}; - getTmpfilePath("tsdb_qbuf", path); + getTmpfilePath("qbuf", path); pResBuf->path = strdup(path); - pResBuf->fd = FD_INITIALIZER; - pResBuf->pBuf = NULL; pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); - qDebug("QInfo:%p create resBuf for output, page size:%d, initial pages:%d, %" PRId64 "bytes", handle, - pResBuf->pageSize, pResBuf->numOfPages, pResBuf->totalBufSize); - + qDebug("QInfo:%p create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", handle, pResBuf->pageSize, + pResBuf->inMemPages, pResBuf->path); + return TSDB_CODE_SUCCESS; } -int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->idsTable); } - -int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; } - -#define NUM_OF_PAGES_ON_DISK(_r) ((_r)->numOfPages - (_r)->inMemPages) -#define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize) - -static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { - pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR | O_TRUNC, 0666); - if (!FD_VALID(pResultBuf->fd)) { +static int32_t createDiskFile(SDiskbasedResultBuf* pResultBuf) { + pResultBuf->file = fopen(pResultBuf->path, "wb+"); + if (pResultBuf->file == NULL) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); } - assert(pResultBuf->numOfPages == pResultBuf->inMemPages); - pResultBuf->numOfPages += pResultBuf->incStep; + return TSDB_CODE_SUCCESS; +} - int32_t ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); - if (ret != TSDB_CODE_SUCCESS) { - qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); - return TAOS_SYSTEM_ERROR(errno); +static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing + if (!pResultBuf->comp) { + *dst = srcSize; + return data; } - pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); - if (pResultBuf->pBuf == MAP_FAILED) { - qError("QInfo:%p failed to map temp file: %s. %s", pResultBuf->handle, pResultBuf->path, strerror(errno)); - return TAOS_SYSTEM_ERROR(errno); + *dst = tsCompressString(data, srcSize, 1, pResultBuf->assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0); + + memcpy(data, pResultBuf->assistBuf, *dst); + return data; +} + +static char* doDecompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing + if (!pResultBuf->comp) { + *dst = srcSize; + return data; } - pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize; - return TSDB_CODE_SUCCESS; + *dst = tsDecompressString(data, srcSize, 1, pResultBuf->assistBuf, pResultBuf->pageSize, ONE_STAGE_COMP, NULL, 0); + + memcpy(data, pResultBuf->assistBuf, *dst); + return data; } -static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNumOfPages) { - assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize); - int32_t ret = TSDB_CODE_SUCCESS; +static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t size) { + if (pResultBuf->pFree == NULL) { + return pResultBuf->nextPos; + } else { + int32_t offset = -1; + + size_t num = taosArrayGetSize(pResultBuf->pFree); + for(int32_t i = 0; i < num; ++i) { + SFreeListItem* pi = taosArrayGet(pResultBuf->pFree, i); + if (pi->len >= size) { + offset = pi->offset; + pi->offset += size; + pi->len -= size; + + return offset; + } + } + + // no available recycle space, allocate new area in file + return pResultBuf->nextPos; + } +} + +static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + assert(!pg->used && pg->pData != NULL); + + int32_t size = -1; + char* t = doCompressData(GET_DATA_PAYLOAD(pg), pResultBuf->pageSize, &size, pResultBuf); + + // this page is flushed to disk for the first time + if (pg->info.offset == -1) { + pg->info.offset = allocatePositionInFile(pResultBuf, size); + pResultBuf->nextPos += size; - if (pResultBuf->pBuf == NULL) { - assert(pResultBuf->fd == FD_INITIALIZER); + fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + /*int32_t ret =*/ fwrite(t, 1, size, pResultBuf->file); - if ((ret = createDiskResidesBuf(pResultBuf)) != TSDB_CODE_SUCCESS) { - return ret; + if (pResultBuf->fileSize < pg->info.offset + pg->info.length) { + pResultBuf->fileSize = pg->info.offset + pg->info.length; } } else { - ret = munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf)); - pResultBuf->numOfPages += incNumOfPages; - - /* - * disk-based output buffer is exhausted, try to extend the disk-based buffer, the available disk space may - * be insufficient - */ - ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); - if (ret != TSDB_CODE_SUCCESS) { - // dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile, - // strerror(errno)); - return TSDB_CODE_QRY_NO_DISKSPACE; + // length becomes greater, current space is not enough, allocate new place, otherwise, do nothing + if (pg->info.length < size) { + // 1. add current space to free list + taosArrayPush(pResultBuf->pFree, &pg->info); + + // 2. allocate new position, and update the info + pg->info.offset = allocatePositionInFile(pResultBuf, size); + pResultBuf->nextPos += size; } - pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize; - pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); + //3. write to disk. + fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + fwrite(t, size, 1, pResultBuf->file); - if (pResultBuf->pBuf == MAP_FAILED) { - // dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); - return TSDB_CODE_QRY_OUT_OF_MEMORY; + if (pResultBuf->fileSize < pg->info.offset + pg->info.length) { + pResultBuf->fileSize = pg->info.offset + pg->info.length; } } - return TSDB_CODE_SUCCESS; + char* ret = pg->pData; + memset(ret, 0, pResultBuf->pageSize); + + pg->pData = NULL; + pg->info.length = size; + + pResultBuf->statis.flushBytes += pg->info.length; + + return ret; } -#define NO_AVAILABLE_PAGES(_b) ((_b)->allocateId == (_b)->numOfPages - 1) +static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + int32_t ret = TSDB_CODE_SUCCESS; + assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages); -static FORCE_INLINE int32_t getGroupIndex(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - assert(pResultBuf != NULL); + if (pResultBuf->file == NULL) { + if ((ret = createDiskFile(pResultBuf)) != TSDB_CODE_SUCCESS) { + terrno = ret; + return NULL; + } + } + + return doFlushPageToDisk(pResultBuf, pg); +} + +// load file block data in disk +static char* loadPageFromDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + ret = fread(GET_DATA_PAYLOAD(pg), 1, pg->info.length, pResultBuf->file); + if (ret != pg->info.length) { + terrno = errno; + return NULL; + } + + pResultBuf->statis.loadBytes += pg->info.length; - char* p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); + int32_t fullSize = 0; + doDecompressData(GET_DATA_PAYLOAD(pg), pg->info.length, &fullSize, pResultBuf); + + return GET_DATA_PAYLOAD(pg); +} + +#define NO_AVAILABLE_PAGES(_b) ((_b)->numOfPages >= (_b)->inMemPages) + +static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { + assert(taosHashGet(pResultBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL); + + SArray* pa = taosArrayInit(1, POINTER_BYTES); + int32_t ret = taosHashPut(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES); + assert(ret == 0); + + return pa; +} + +static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { + SIDList list = NULL; + + char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); if (p == NULL) { // it is a new group id - return -1; + list = addNewGroup(pResultBuf, groupId); + } else { + list = (SIDList) (*p); } - int32_t slot = GET_INT32_VAL(p); - assert(slot >= 0 && slot < taosHashGetSize(pResultBuf->idsTable)); + pResultBuf->numOfPages += 1; - return slot; + SPageInfo* ppi = malloc(sizeof(SPageInfo));//{ .info = PAGE_INFO_INITIALIZER, .pageId = pageId, .pn = NULL}; + ppi->info = PAGE_INFO_INITIALIZER; + ppi->pageId = pageId; + ppi->pData = NULL; + ppi->pn = NULL; + ppi->used = true; + + return *(SPageInfo**) taosArrayPush(list, &ppi); } -static int32_t addNewGroupId(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - int32_t num = getNumOfResultBufGroupId(pResultBuf); // the num is the newest allocated group id slot - taosHashPut(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &num, sizeof(int32_t)); +static SListNode* getEldestUnrefedPage(SDiskbasedResultBuf* pResultBuf) { + SListIter iter = {0}; + tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD); + + SListNode* pn = NULL; + while((pn = tdListNext(&iter)) != NULL) { + assert(pn != NULL); + + SPageInfo* pageInfo = *(SPageInfo**) pn->data; + assert(pageInfo->pageId >= 0 && pageInfo->pn == pn); - SArray* pa = taosArrayInit(1, sizeof(int32_t)); - taosArrayPush(pResultBuf->list, &pa); + if (!pageInfo->used) { + break; + } + } - assert(taosArrayGetSize(pResultBuf->list) == taosHashGetSize(pResultBuf->idsTable)); - return num; + return pn; } -static void registerPageId(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { - int32_t slot = getGroupIndex(pResultBuf, groupId); - if (slot < 0) { - slot = addNewGroupId(pResultBuf, groupId); +static char* evicOneDataPage(SDiskbasedResultBuf* pResultBuf) { + char* bufPage = NULL; + SListNode* pn = getEldestUnrefedPage(pResultBuf); + + // all pages are referenced by user, try to allocate new space + if (pn == NULL) { + int32_t prev = pResultBuf->inMemPages; + pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5; + + qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, + pResultBuf->inMemPages, pResultBuf->pageSize); + } else { + pResultBuf->statis.flushPages += 1; + tdListPopNode(pResultBuf->lruList, pn); + + SPageInfo* d = *(SPageInfo**) pn->data; + assert(d->pn == pn); + + d->pn = NULL; + tfree(pn); + + bufPage = flushPageToDisk(pResultBuf, d); } - SIDList pList = taosArrayGetP(pResultBuf->list, slot); - taosArrayPush(pList, &pageId); + return bufPage; +} + +static void lruListPushFront(SList *pList, SPageInfo* pi) { + tdListPrepend(pList, &pi); + SListNode* front = tdListGetHead(pList); + pi->pn = front; +} + +static void lruListMoveToFront(SList *pList, SPageInfo* pi) { + tdListPopNode(pList, pi->pn); + tdListPrependNode(pList, pi->pn); } tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { + pResultBuf->statis.getPages += 1; + + char* availablePage = NULL; if (NO_AVAILABLE_PAGES(pResultBuf)) { - if (extendDiskFileSize(pResultBuf, pResultBuf->incStep) != TSDB_CODE_SUCCESS) { - return NULL; - } + availablePage = evicOneDataPage(pResultBuf); } // register new id in this group *pageId = (++pResultBuf->allocateId); - registerPageId(pResultBuf, groupId, *pageId); - // clear memory for the new page - tFilePage* page = getResBufPage(pResultBuf, *pageId); - memset(page, 0, pResultBuf->pageSize); - - return page; + // register page id info + SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId); + + // add to LRU list + assert(listNEles(pResultBuf->lruList) < pResultBuf->inMemPages && pResultBuf->inMemPages > 0); + + lruListPushFront(pResultBuf->lruList, pi); + + // add to hash map + taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); + + // allocate buf + if (availablePage == NULL) { + pi->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES); + } else { + pi->pData = availablePage; + } + + pResultBuf->totalBufSize += pResultBuf->pageSize; + + ((void**)pi->pData)[0] = pi; + pi->used = true; + + return GET_DATA_PAYLOAD(pi); +} + +tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { + assert(pResultBuf != NULL && id >= 0); + pResultBuf->statis.getPages += 1; + + SPageInfo** pi = taosHashGet(pResultBuf->all, &id, sizeof(int32_t)); + assert(pi != NULL && *pi != NULL); + + if ((*pi)->pData != NULL) { // it is in memory + // no need to update the LRU list if only one page exists + if (pResultBuf->numOfPages == 1) { + (*pi)->used = true; + return GET_DATA_PAYLOAD(*pi); + } + + SPageInfo** pInfo = (SPageInfo**) ((*pi)->pn->data); + assert(*pInfo == *pi); + + lruListMoveToFront(pResultBuf->lruList, (*pi)); + (*pi)->used = true; + + return GET_DATA_PAYLOAD(*pi); + + } else { // not in memory + assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0); + + char* availablePage = NULL; + if (NO_AVAILABLE_PAGES(pResultBuf)) { + availablePage = evicOneDataPage(pResultBuf); + } + + if (availablePage == NULL) { + (*pi)->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES); + } else { + (*pi)->pData = availablePage; + } + + ((void**)((*pi)->pData))[0] = (*pi); + + lruListPushFront(pResultBuf->lruList, *pi); + loadPageFromDisk(pResultBuf, *pi); + return GET_DATA_PAYLOAD(*pi); + } } -int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } +void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) { + assert(pResultBuf != NULL && page != NULL); + char* p = (char*) page - POINTER_BYTES; + + SPageInfo* ppi = ((SPageInfo**) p)[0]; + releaseResBufPageInfo(pResultBuf, ppi); +} + +void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi) { + assert(pi->pData != NULL && pi->used); + + pi->used = false; + pResultBuf->statis.releasePages += 1; +} + +size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } + +size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->groupSet); } + +size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; } SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - int32_t slot = getGroupIndex(pResultBuf, groupId); - if (slot < 0) { + assert(pResultBuf != NULL); + + char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); + if (p == NULL) { // it is a new group id return pResultBuf->emptyDummyIdList; } else { - return taosArrayGetP(pResultBuf->list, slot); + return (SArray*) (*p); } } -void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { +void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) { if (pResultBuf == NULL) { return; } - if (FD_VALID(pResultBuf->fd)) { - qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file created:%s, file size:%d", handle, - pResultBuf->totalBufSize, pResultBuf->path, FILE_SIZE_ON_DISK(pResultBuf)); + if (pResultBuf->file != NULL) { + qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file size:%"PRId64" bytes", + pResultBuf->handle, pResultBuf->totalBufSize, pResultBuf->fileSize); - close(pResultBuf->fd); - munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf)); - pResultBuf->pBuf = NULL; + fclose(pResultBuf->file); } else { - qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", handle, + qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", pResultBuf->handle, pResultBuf->totalBufSize); } unlink(pResultBuf->path); tfree(pResultBuf->path); - size_t size = taosArrayGetSize(pResultBuf->list); - for (int32_t i = 0; i < size; ++i) { - SArray* pa = taosArrayGetP(pResultBuf->list, i); - taosArrayDestroy(pa); + SHashMutableIterator* iter = taosHashCreateIter(pResultBuf->groupSet); + while(taosHashIterNext(iter)) { + SArray** p = (SArray**) taosHashIterGet(iter); + size_t n = taosArrayGetSize(*p); + for(int32_t i = 0; i < n; ++i) { + SPageInfo* pi = taosArrayGetP(*p, i); + tfree(pi->pData); + tfree(pi); + } + + taosArrayDestroy(*p); } - taosArrayDestroy(pResultBuf->list); + taosHashDestroyIter(iter); + + tdListFree(pResultBuf->lruList); taosArrayDestroy(pResultBuf->emptyDummyIdList); - taosHashCleanup(pResultBuf->idsTable); + taosHashCleanup(pResultBuf->groupSet); + taosHashCleanup(pResultBuf->all); - tfree(pResultBuf->iBuf); + tfree(pResultBuf->assistBuf); tfree(pResultBuf); } -int32_t getLastPageId(SIDList pList) { +SPageInfo* getLastPageInfo(SIDList pList) { size_t size = taosArrayGetSize(pList); - return *(int32_t*) taosArrayGet(pList, size - 1); + return (SPageInfo*) taosArrayGetP(pList, size - 1); } diff --git a/src/query/src/qUtil.c b/src/query/src/qUtil.c index 7b03dd9d941668119b93d3fb19dd1be3af9f3289..c9143b3a5323fa2419e3a543cc3d4d94d404cffd 100644 --- a/src/query/src/qUtil.c +++ b/src/query/src/qUtil.c @@ -236,11 +236,13 @@ void clearTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindow if (pWindowRes == NULL) { return; } - + + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId); + for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutput; ++i) { SResultInfo *pResultInfo = &pWindowRes->resultInfo[i]; - char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes); + char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page); size_t size = pRuntimeEnv->pQuery->pSelectExpr[i].bytes; memset(s, 0, size); @@ -277,8 +279,11 @@ void copyTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *dst, con memcpy(pDst->interResultBuf, pSrc->interResultBuf, pDst->bufLen); // copy the output buffer data from src to dst, the position info keep unchanged - char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst); - char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src); + tFilePage *dstpage = getResBufPage(pRuntimeEnv->pResultBuf, dst->pos.pageId); + char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst, dstpage); + + tFilePage *srcpage = getResBufPage(pRuntimeEnv->pResultBuf, src->pos.pageId); + char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src, srcpage); size_t s = pRuntimeEnv->pQuery->pSelectExpr[i].bytes; memcpy(dstBuf, srcBuf, s); diff --git a/src/query/src/sql.c b/src/query/src/sql.c index ac9952bb97d9c333a5e4209318cf1b26d3fe4b9c..307d5203b34161c024a8dd259d610a7170098438 100644 --- a/src/query/src/sql.c +++ b/src/query/src/sql.c @@ -30,7 +30,7 @@ #include #include #include -#include "qsqlparser.h" +#include "qSqlparser.h" #include "tcmdtype.h" #include "tstoken.h" #include "ttokendef.h" diff --git a/src/query/tests/resultBufferTest.cpp b/src/query/tests/resultBufferTest.cpp index 63ed89ab9fe1de2222c6f5ea8ae05b331437f76d..3b74bf1b643d127ab10591242182ccbf414b0c3d 100644 --- a/src/query/tests/resultBufferTest.cpp +++ b/src/query/tests/resultBufferTest.cpp @@ -18,17 +18,144 @@ void simpleTest() { tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); ASSERT_TRUE(pBufPage != NULL); - ASSERT_EQ(getNumOfRowsPerPage(pResultBuf), (16384L - sizeof(int64_t))/64); - ASSERT_EQ(getResBufSize(pResultBuf), 1000*16384L); + ASSERT_EQ(getResBufSize(pResultBuf), 1024); SIDList list = getDataBufPagesIdList(pResultBuf, groupId); ASSERT_EQ(taosArrayGetSize(list), 1); ASSERT_EQ(getNumOfResultBufGroupId(pResultBuf), 1); - - destroyResultBuf(pResultBuf, NULL); + + releaseResBufPage(pResultBuf, pBufPage); + + tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + + tFilePage* t = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t == pBufPage1); + + tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t1 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t1 == pBufPage2); + + tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t2 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t2 == pBufPage3); + + tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t3 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t3 == pBufPage4); + + tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t4 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t4 == pBufPage5); + + destroyResultBuf(pResultBuf); +} + +void writeDownTest() { + SDiskbasedResultBuf* pResultBuf = NULL; + int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1000, 64, 1024, 4, NULL); + + int32_t pageId = 0; + int32_t writePageId = 0; + int32_t groupId = 0; + int32_t nx = 12345; + + tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); + ASSERT_TRUE(pBufPage != NULL); + + *(int32_t*)(pBufPage->data) = nx; + writePageId = pageId; + releaseResBufPage(pResultBuf, pBufPage); + + tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t1 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t1 == pBufPage1); + ASSERT_TRUE(pageId == 1); + + tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t2 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t2 == pBufPage2); + ASSERT_TRUE(pageId == 2); + + tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t3 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t3 == pBufPage3); + ASSERT_TRUE(pageId == 3); + + tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t4 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t4 == pBufPage4); + ASSERT_TRUE(pageId == 4); + releaseResBufPage(pResultBuf, t4); + + // flush the written page to disk, and read it out again + tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId); + ASSERT_EQ(*(int32_t*)pBufPagex->data, nx); + + SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); + ASSERT_EQ(taosArrayGetSize(pa), 5); + + destroyResultBuf(pResultBuf); +} + +void recyclePageTest() { + SDiskbasedResultBuf* pResultBuf = NULL; + int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1000, 64, 1024, 4, NULL); + + int32_t pageId = 0; + int32_t writePageId = 0; + int32_t groupId = 0; + int32_t nx = 12345; + + tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); + ASSERT_TRUE(pBufPage != NULL); + releaseResBufPage(pResultBuf, pBufPage); + + tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t1 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t1 == pBufPage1); + ASSERT_TRUE(pageId == 1); + + tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t2 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t2 == pBufPage2); + ASSERT_TRUE(pageId == 2); + + tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t3 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t3 == pBufPage3); + ASSERT_TRUE(pageId == 3); + + tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t4 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t4 == pBufPage4); + ASSERT_TRUE(pageId == 4); + releaseResBufPage(pResultBuf, t4); + releaseResBufPage(pResultBuf, t4); + + tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t5 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t5 == pBufPage5); + ASSERT_TRUE(pageId == 5); + + // flush the written page to disk, and read it out again + tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId); + *(int32_t*)(pBufPagex->data) = nx; + writePageId = pageId; // update the data + releaseResBufPage(pResultBuf, pBufPagex); + + tFilePage* pBufPagex1 = getResBufPage(pResultBuf, 1); + + SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); + ASSERT_EQ(taosArrayGetSize(pa), 6); + + destroyResultBuf(pResultBuf); } } // namespace + TEST(testCase, resultBufferTest) { + srand(time(NULL)); simpleTest(); + writeDownTest(); + recyclePageTest(); } diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index c0f6c82b99728b02d276c1b3328e806212756b67..a3907e471996b373a72293fbb16b909ac5bbdbf0 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -210,7 +210,7 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab if (pQueryHandle->pColumns == NULL) { goto out_of_memory; } - + for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData colInfo = {{0}, 0}; @@ -222,29 +222,29 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab taosArrayPush(pQueryHandle->pColumns, &colInfo); pQueryHandle->statis[i].colId = colInfo.info.colId; } - + pQueryHandle->pTableCheckInfo = taosArrayInit(groupList->numOfTables, sizeof(STableCheckInfo)); if (pQueryHandle->pTableCheckInfo == NULL) { goto out_of_memory; } STsdbMeta* pMeta = tsdbGetMeta(tsdb); assert(pMeta != NULL); - + for (int32_t i = 0; i < sizeOfGroup; ++i) { SArray* group = *(SArray**) taosArrayGet(groupList->pGroupList, i); - + size_t gsize = taosArrayGetSize(group); assert(gsize > 0); - + for (int32_t j = 0; j < gsize; ++j) { STable* pTable = (STable*) taosArrayGetP(group, j); - + STableCheckInfo info = { .lastKey = pQueryHandle->window.skey, .tableId = pTable->tableId, .pTableObj = pTable, }; - + assert(info.pTableObj != NULL && (info.pTableObj->type == TSDB_NORMAL_TABLE || info.pTableObj->type == TSDB_CHILD_TABLE || info.pTableObj->type == TSDB_STREAM_TABLE)); @@ -280,17 +280,17 @@ TsdbQueryHandleT tsdbQueryLastRow(TSDB_REPO_T *tsdb, STsdbQueryCond *pCond, STab SArray* tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle) { assert(pHandle != NULL); - + STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) pHandle; - + size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); SArray* res = taosArrayInit(size, POINTER_BYTES); - + for(int32_t i = 0; i < size; ++i) { STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); taosArrayPush(res, &pCheckInfo->pTableObj); } - + return res; } @@ -306,11 +306,11 @@ TsdbQueryHandleT tsdbQueryRowsInExternalWindow(TSDB_REPO_T *tsdb, STsdbQueryCond static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCheckInfo) { STable* pTable = pCheckInfo->pTableObj; assert(pTable != NULL); - + if (pCheckInfo->initBuf) { return true; } - + pCheckInfo->initBuf = true; int32_t order = pHandle->order; @@ -318,7 +318,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh if (pHandle->mem == NULL && pHandle->imem == NULL) { return false; } - + assert(pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL); // TODO: add uid check @@ -338,17 +338,17 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) { return false; } - + bool memEmpty = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter)); bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter)); if (memEmpty && imemEmpty) { // buffer is empty return false; } - + if (!memEmpty) { SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter); assert(node != NULL); - + SDataRow row = SL_GET_NODE_DATA(node); TSKEY key = dataRowKey(row); // first timestamp in buffer tsdbDebug("%p uid:%" PRId64", tid:%d check data in mem from skey:%" PRId64 ", order:%d, %p", pHandle, @@ -357,11 +357,11 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh tsdbDebug("%p uid:%"PRId64", tid:%d no data in mem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pHandle->qinfo); } - + if (!imemEmpty) { SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter); assert(node != NULL); - + SDataRow row = SL_GET_NODE_DATA(node); TSKEY key = dataRowKey(row); // first timestamp in buffer tsdbDebug("%p uid:%" PRId64", tid:%d check data in imem from skey:%" PRId64 ", order:%d, %p", pHandle, @@ -370,7 +370,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh tsdbDebug("%p uid:%"PRId64", tid:%d no data in imem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pHandle->qinfo); } - + return true; } @@ -473,7 +473,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { size_t size = taosArrayGetSize(pHandle->pTableCheckInfo); assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1); pHandle->cur.fid = -1; - + STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex); STable* pTable = pCheckInfo->pTableObj; @@ -491,17 +491,17 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { pCheckInfo->lastKey = dataRowKey(row); // first timestamp in buffer tsdbDebug("%p uid:%" PRId64", tid:%d check data in buffer from skey:%" PRId64 ", order:%d, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pCheckInfo->lastKey, pHandle->order, pHandle->qinfo); - + // all data in mem are checked already. if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) || (pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) { return false; } - + int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1; STimeWindow* win = &pHandle->cur.win; pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle); - + // update the last key value pCheckInfo->lastKey = win->ekey + step; pHandle->cur.lastKey = win->ekey + step; @@ -510,7 +510,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { if (!ASCENDING_TRAVERSE(pHandle->order)) { SWAP(win->skey, win->ekey, TSKEY); } - + return true; } @@ -519,31 +519,31 @@ static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precisio if (key == TSKEY_INITIAL_VAL) { return INT32_MIN; } - + int64_t fid = (int64_t)(key / (daysPerFile * tsMsPerDay[precision])); // set the starting fileId if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32 fid = INT32_MIN; } - + if (fid > 0L && fid > INT32_MAX) { fid = INT32_MAX; } - + return fid; } static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) { int32_t firstSlot = 0; int32_t lastSlot = numOfBlocks - 1; - + int32_t midSlot = firstSlot; - + while (1) { numOfBlocks = lastSlot - firstSlot + 1; midSlot = (firstSlot + (numOfBlocks >> 1)); - + if (numOfBlocks == 1) break; - + if (skey > pBlock[midSlot].keyLast) { if (numOfBlocks == 2) break; if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break; @@ -555,7 +555,7 @@ static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSK break; // got the slot } } - + return midSlot; } @@ -644,7 +644,7 @@ static int32_t getFileCompInfo(STsdbQueryHandle* pQueryHandle, int32_t* numOfBlo .uid = (_checkInfo)->tableId.uid}) -static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock, STableCheckInfo* pCheckInfo) { +static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock, STableCheckInfo* pCheckInfo, int32_t slotIndex) { STsdbRepo *pRepo = pQueryHandle->pTsdb; bool blockLoaded = false; int64_t st = taosGetTimestampUs(); @@ -678,8 +678,9 @@ static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlo int64_t elapsedTime = (taosGetTimestampUs() - st); pQueryHandle->cost.blockLoadTime += elapsedTime; - tsdbDebug("%p load file block into buffer, elapsed time:%"PRId64 " us", pQueryHandle, elapsedTime); + tsdbDebug("%p load file block into buffer, index:%d, brange:%"PRId64"-%"PRId64" , rows:%d, elapsed time:%"PRId64 " us, %p", + pQueryHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime, pQueryHandle->qinfo); return blockLoaded; } @@ -692,18 +693,17 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock* TSKEY key = (row != NULL)? dataRowKey(row):TSKEY_INITIAL_VAL; cur->pos = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:(binfo.rows-1); - + if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) || (!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) { - + if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) || (!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) { // do not load file block into buffer int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1; - cur->rows = tsdbReadRowsFromCache(pCheckInfo, binfo.window.skey - step, - pQueryHandle->outputCapacity, &cur->win, pQueryHandle); + cur->rows = tsdbReadRowsFromCache(pCheckInfo, binfo.window.skey - step, pQueryHandle->outputCapacity, &cur->win, pQueryHandle); pQueryHandle->realNumOfRows = cur->rows; // update the last key value @@ -711,13 +711,13 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock* if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { SWAP(cur->win.skey, cur->win.ekey, TSKEY); } - + cur->mixBlock = true; cur->blockCompleted = false; return; } - - doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo); + + doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { /* @@ -742,39 +742,41 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock SQueryFilePos* cur = &pQueryHandle->cur; if (ASCENDING_TRAVERSE(pQueryHandle->order)) { - // query ended in current block + // query ended in/started from current block if (pQueryHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) { - if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) { + if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) { return false; } SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0]; assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows); - + if (pCheckInfo->lastKey > pBlock->keyFirst) { cur->pos = binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order); } else { cur->pos = 0; } - + + assert(pCheckInfo->lastKey <= pBlock->keyLast); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { // the whole block is loaded in to buffer handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); } } else { //desc order, query ended in current block if (pQueryHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) { - if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) { + if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) { return false; } - + SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0]; if (pCheckInfo->lastKey < pBlock->keyLast) { cur->pos = binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order); } else { cur->pos = pBlock->numOfRows - 1; } - + + assert(pCheckInfo->lastKey >= pBlock->keyFirst); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); @@ -790,7 +792,7 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) { TSKEY* keyList; assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC); - + if (num <= 0) return -1; keyList = (TSKEY*)pValue; @@ -849,13 +851,19 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) { static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) { char* pData = NULL; int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1; - + SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0]; TSKEY* tsArray = pCols->cols[0].pData; - + int32_t num = end - start + 1; + assert(num >= 0); + + if (num == 0) { + return numOfRows; + } + int32_t requiredNumOfCols = taosArrayGetSize(pQueryHandle->pColumns); - + //data in buffer has greater timestamp, copy data in file block int32_t i = 0, j = 0; while(i < requiredNumOfCols && j < pCols->numOfCols) { @@ -928,7 +936,7 @@ static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t cap i++; } - + pQueryHandle->cur.win.ekey = tsArray[end]; pQueryHandle->cur.lastKey = tsArray[end] + step; @@ -995,24 +1003,99 @@ static void copyOneRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity, } } +static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols) { + if (numOfRows == 0 || ASCENDING_TRAVERSE(pQueryHandle->order)) { + return; + } + + // if the buffer is not full in case of descending order query, move the data in the front of the buffer + if (numOfRows < pQueryHandle->outputCapacity) { + int32_t emptySize = pQueryHandle->outputCapacity - numOfRows; + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); + } + } +} + +static void getQualifiedRowsPos(STsdbQueryHandle* pQueryHandle, int32_t startPos, int32_t endPos, + int32_t numOfExisted, int32_t *start, int32_t *end) { + *start = -1; + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + int32_t remain = endPos - startPos + 1; + if (remain + numOfExisted > pQueryHandle->outputCapacity) { + *end = (pQueryHandle->outputCapacity - numOfExisted) + startPos - 1; + } else { + *end = endPos; + } + + *start = startPos; + } else { + int32_t remain = (startPos - endPos) + 1; + if (remain + numOfExisted > pQueryHandle->outputCapacity) { + *end = startPos + 1 - (pQueryHandle->outputCapacity - numOfExisted); + } else { + *end = endPos; + } + + *start = *end; + *end = startPos; + } +} + +static void updateInfoAfterMerge(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, int32_t numOfRows, int32_t endPos) { + SQueryFilePos* cur = &pQueryHandle->cur; + + pCheckInfo->lastKey = cur->lastKey; + pQueryHandle->realNumOfRows = numOfRows; + cur->rows = numOfRows; + cur->pos = endPos; +} + +static void doCheckGeneratedBlockRange(STsdbQueryHandle* pQueryHandle) { + SQueryFilePos* cur = &pQueryHandle->cur; + + if (cur->rows > 0) { + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + assert(cur->win.skey >= pQueryHandle->window.skey && cur->win.ekey <= pQueryHandle->window.ekey); + } else { + assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey); + } + + SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, 0); + assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]); + } else { + cur->win = pQueryHandle->window; + + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; + cur->lastKey = pQueryHandle->window.ekey + step; + } +} + // only return the qualified data to client in terms of query time window, data rows in the same block but do not // be included in the query time window will be discarded static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SCompBlock* pBlock) { SQueryFilePos* cur = &pQueryHandle->cur; SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock); - + initTableMemIterator(pQueryHandle, pCheckInfo); + SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0]; + assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_INDEX && + cur->pos >= 0 && cur->pos < pBlock->numOfRows); + + TSKEY* tsArray = pCols->cols[0].pData; // for search the endPos, so the order needs to reverse int32_t order = (pQueryHandle->order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC; int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; - int32_t numOfCols = taosArrayGetSize(pQueryHandle->pColumns); + int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); STable* pTable = pCheckInfo->pTableObj; - int32_t endPos = cur->pos; + if (ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey > blockInfo.window.ekey) { endPos = blockInfo.rows - 1; cur->mixBlock = (cur->pos != 0); @@ -1024,48 +1107,36 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pQueryHandle->window.ekey, order); cur->mixBlock = true; } - + // compared with the data from in-memory buffer, to generate the correct timestamp array list - int32_t pos = cur->pos; - - assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == 0); - TSKEY* tsArray = pCols->cols[0].pData; - int32_t numOfRows = 0; - pQueryHandle->cur.win = TSWINDOW_INITIALIZER; + int32_t pos = cur->pos; + cur->win = TSWINDOW_INITIALIZER; // no data in buffer, load data from file directly if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) { int32_t start = cur->pos; int32_t end = endPos; + if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { - end = cur->pos; - start = endPos; - } - - cur->win.skey = tsArray[start]; - cur->win.ekey = tsArray[end]; - - // todo opt in case of no data in buffer + SWAP(start, end, int32_t); + } + numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); - - // if the buffer is not full in case of descending order query, move the data in the front of the buffer - if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < pQueryHandle->outputCapacity) { - int32_t emptySize = pQueryHandle->outputCapacity - numOfRows; - for(int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); - memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); - } - } - + // the time window should always be right order: skey <= ekey + cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]}; + cur->lastKey = tsArray[endPos]; pos += (end - start + 1) * step; - cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || - ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); - - pCheckInfo->lastKey = cur->lastKey; - pQueryHandle->realNumOfRows = numOfRows; - cur->rows = numOfRows; + + cur->blockCompleted = + (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || + ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); + + // if the buffer is not full in case of descending order query, move the data in the front of the buffer + moveDataToFront(pQueryHandle, numOfRows, numOfCols); + updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos); + doCheckGeneratedBlockRange(pQueryHandle); return; } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) { SSkipListNode* node = NULL; @@ -1111,30 +1182,18 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* if (tsArray[end] == key) { // the value of key in cache equals to the end timestamp value, ignore it moveToNextRowInMem(pCheckInfo); } - - int32_t start = -1; - if (ASCENDING_TRAVERSE(pQueryHandle->order)) { - int32_t remain = end - pos + 1; - if (remain + numOfRows > pQueryHandle->outputCapacity) { - end = (pQueryHandle->outputCapacity - numOfRows) + pos - 1; - } - start = pos; - } else { - int32_t remain = (pos - end) + 1; - if (remain + numOfRows > pQueryHandle->outputCapacity) { - end = pos + 1 - (pQueryHandle->outputCapacity - numOfRows); - } + int32_t qstart = 0, qend = 0; + getQualifiedRowsPos(pQueryHandle, pos, end, numOfRows, &qstart, &qend); - start = end; - end = pos; - } + numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, qstart, qend); + pos += (qend - qstart + 1) * step; - numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); - pos += (end - start + 1) * step; + cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? tsArray[qend]:tsArray[qstart]; + cur->lastKey = cur->win.ekey + step; } } while (numOfRows < pQueryHandle->outputCapacity); - + if (numOfRows < pQueryHandle->outputCapacity) { /** * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT @@ -1148,54 +1207,29 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* cur->win.skey = tsArray[pos]; } - int32_t start = -1; - int32_t end = -1; - - // all remain data are qualified, but check the remain capacity in the first place. - if (ASCENDING_TRAVERSE(pQueryHandle->order)) { - int32_t remain = endPos - pos + 1; - if (remain + numOfRows > pQueryHandle->outputCapacity) { - endPos = (pQueryHandle->outputCapacity - numOfRows) + pos - 1; - } - - start = pos; - end = endPos; - } else { - int32_t remain = pos + 1; - if (remain + numOfRows > pQueryHandle->outputCapacity) { - endPos = pos + 1 - (pQueryHandle->outputCapacity - numOfRows); - } - - start = endPos; - end = pos; - } + int32_t start = -1, end = -1; + getQualifiedRowsPos(pQueryHandle, pos, endPos, numOfRows, &start, &end); numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); pos += (end - start + 1) * step; + + cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? tsArray[end]:tsArray[start]; + cur->lastKey = cur->win.ekey + step; } } } - - cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || - ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); + + cur->blockCompleted = + (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || + ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { SWAP(cur->win.skey, cur->win.ekey, TSKEY); - - // if the buffer is not full in case of descending order query, move the data in the front of the buffer - if (numOfRows < pQueryHandle->outputCapacity) { - int32_t emptySize = pQueryHandle->outputCapacity - numOfRows; - for(int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); - memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); - } - } } - - pCheckInfo->lastKey = cur->lastKey; - pQueryHandle->realNumOfRows = numOfRows; - cur->rows = numOfRows; - cur->pos = pos; + + moveDataToFront(pQueryHandle, numOfRows, numOfCols); + updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos); + doCheckGeneratedBlockRange(pQueryHandle); tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, brange:%"PRIu64"-%"PRIu64" rows:%d, %p", pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->win.skey, cur->win.ekey, cur->rows, pQueryHandle->qinfo); @@ -1332,16 +1366,16 @@ static int32_t createDataBlocksInfo(STsdbQueryHandle* pQueryHandle, int32_t numO cleanBlockOrderSupporter(&sup, 0); return TSDB_CODE_TDB_OUT_OF_MEMORY; } - + int32_t cnt = 0; int32_t numOfQualTables = 0; - + for (int32_t j = 0; j < numOfTables; ++j) { STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pQueryHandle->pTableCheckInfo, j); if (pTableCheck->numOfBlocks <= 0) { continue; } - + SCompBlock* pBlock = pTableCheck->pCompInfo->blocks; sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks; @@ -1437,37 +1471,39 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex // current file are not overlapped with query time window, ignore remain files if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) || - (!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) { - tsdbDebug("%p remain files are not qualified for qrange:%"PRId64"-%"PRId64", ignore, %p", pQueryHandle, pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qinfo) + (!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) { + tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %p", pQueryHandle, + pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qinfo); pQueryHandle->pFileGroup = NULL; + assert(pQueryHandle->numOfBlocks == 0); break; } if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) { break; } - - tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks, - numOfTables, pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo); - + + tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks, numOfTables, + pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo); + assert(numOfBlocks >= 0); if (numOfBlocks == 0) { continue; } - + // todo return error code to query engine - if (createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks) != TSDB_CODE_SUCCESS) { + if ((code = createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks)) != TSDB_CODE_SUCCESS) { break; } - + assert(numOfBlocks >= pQueryHandle->numOfBlocks); if (pQueryHandle->numOfBlocks > 0) { break; } } - + // no data in file anymore - if (pQueryHandle->numOfBlocks <= 0) { + if (pQueryHandle->numOfBlocks <= 0 || code != TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) { assert(pQueryHandle->pFileGroup == NULL); } @@ -1476,10 +1512,11 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex *exists = false; return code; } - + + assert(pQueryHandle->pFileGroup != NULL && pQueryHandle->numOfBlocks > 0); cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1; cur->fid = pQueryHandle->pFileGroup->fileId; - + STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot]; *exists = loadFileDataBlock(pQueryHandle, pBlockInfo->compBlock, pBlockInfo->pTableCheckInfo); @@ -1495,7 +1532,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists pQueryHandle->locateStart = true; STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision); - + tsdbInitFileGroupIter(pFileHandle, &pQueryHandle->fileIter, pQueryHandle->order); tsdbSeekFileGroupIter(&pQueryHandle->fileIter, fid); @@ -1504,7 +1541,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists // check if current file block is all consumed STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot]; STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo; - + // current block is done, try next if (!cur->mixBlock || cur->blockCompleted) { if ((cur->slot == pQueryHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pQueryHandle->order)) || @@ -1515,10 +1552,10 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists // next block of the same file int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1; cur->slot += step; - + cur->mixBlock = false; cur->blockCompleted = false; - + STableBlockInfo* pNext = &pQueryHandle->pDataBlockInfo[cur->slot]; *exists = loadFileDataBlock(pQueryHandle, pNext->compBlock, pNext->pTableCheckInfo); @@ -1540,10 +1577,10 @@ static bool doHasDataInBuffer(STsdbQueryHandle* pQueryHandle) { if (hasMoreDataInCache(pQueryHandle)) { return true; } - + pQueryHandle->activeIndex += 1; } - + return false; } @@ -1561,14 +1598,14 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { if (pQueryHandle->type == TSDB_QUERY_TYPE_EXTERNAL) { pQueryHandle->type = TSDB_QUERY_TYPE_ALL; pQueryHandle->order = TSDB_ORDER_DESC; - + if (!tsdbNextDataBlock(pHandle)) { return false; } - + /*SDataBlockInfo* pBlockInfo =*/ tsdbRetrieveDataBlockInfo(pHandle, &blockInfo); /*SArray *pDataBlock = */tsdbRetrieveDataBlock(pHandle, pQueryHandle->defaultLoadColumn); - + if (pQueryHandle->cur.win.ekey == pQueryHandle->window.skey) { // data already retrieve, discard other data rows and return int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); @@ -1576,7 +1613,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes); } - + pQueryHandle->cur.win = (STimeWindow){pQueryHandle->window.skey, pQueryHandle->window.skey}; pQueryHandle->window = pQueryHandle->cur.win; pQueryHandle->cur.rows = 1; @@ -1593,7 +1630,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { pSecQueryHandle->checkFiles = true; pSecQueryHandle->activeIndex = 0; pSecQueryHandle->outputCapacity = ((STsdbRepo*)pSecQueryHandle->pTsdb)->config.maxRowsPerFileBlock; - + if (tsdbInitReadHelper(&pSecQueryHandle->rhelper, (STsdbRepo*) pSecQueryHandle->pTsdb) != 0) { free(pSecQueryHandle); return false; @@ -1603,24 +1640,24 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { // allocate buffer in order to load data blocks from file int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); - + pSecQueryHandle->statis = calloc(numOfCols, sizeof(SDataStatis)); pSecQueryHandle->pColumns = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData colInfo = {{0}, 0}; SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); - + colInfo.info = pCol->info; colInfo.pData = calloc(1, EXTRA_BYTES + pQueryHandle->outputCapacity * pCol->info.bytes); taosArrayPush(pSecQueryHandle->pColumns, &colInfo); } - + size_t si = taosArrayGetSize(pQueryHandle->pTableCheckInfo); pSecQueryHandle->pTableCheckInfo = taosArrayInit(si, sizeof(STableCheckInfo)); STsdbMeta* pMeta = tsdbGetMeta(pQueryHandle->pTsdb); assert(pMeta != NULL); - + for (int32_t j = 0; j < si; ++j) { STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, j); STableCheckInfo info = { @@ -1628,10 +1665,10 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { .tableId = pCheckInfo->tableId, .pTableObj = pCheckInfo->pTableObj, }; - + taosArrayPush(pSecQueryHandle->pTableCheckInfo, &info); } - + tsdbInitDataBlockLoadInfo(&pSecQueryHandle->dataBlockLoadInfo); tsdbInitCompBlockLoadInfo(&pSecQueryHandle->compBlockLoadInfo); pSecQueryHandle->defaultLoadColumn = taosArrayClone(pQueryHandle->defaultLoadColumn); @@ -1641,17 +1678,17 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { tsdbRetrieveDataBlockInfo((void*) pSecQueryHandle, &blockInfo); tsdbRetrieveDataBlock((void*) pSecQueryHandle, pSecQueryHandle->defaultLoadColumn); - + for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes); - + SColumnInfoData* pCol1 = taosArrayGet(pSecQueryHandle->pColumns, i); assert(pCol->info.colId == pCol1->info.colId); - + memcpy(pCol->pData + pCol->info.bytes, pCol1->pData, pCol1->info.bytes); } - + SColumnInfoData* pTSCol = taosArrayGet(pQueryHandle->pColumns, 0); // it is ascending order @@ -1675,7 +1712,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { pQueryHandle->checkFiles = false; return true; } - + if (pQueryHandle->checkFiles) { bool exists = true; int32_t code = getDataBlocksInFiles(pQueryHandle, &exists); @@ -1688,11 +1725,11 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { pQueryHandle->cost.checkForNextTime += elapsedTime; return exists; } - + pQueryHandle->activeIndex = 0; pQueryHandle->checkFiles = false; } - + // TODO: opt by consider the scan order bool ret = doHasDataInBuffer(pQueryHandle); terrno = TSDB_CODE_SUCCESS; @@ -1705,15 +1742,15 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) { STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pqHandle; assert(!ASCENDING_TRAVERSE(pQueryHandle->order)); - + // starts from the buffer in case of descending timestamp order check data blocks - + // todo consider the query time window, current last_row does not apply the query time window size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); - + TSKEY key = TSKEY_INITIAL_VAL; int32_t index = -1; - + for(int32_t i = 0; i < numOfTables; ++i) { STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); if (pCheckInfo->pTableObj->lastKey > key) { @@ -1721,36 +1758,36 @@ void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) { index = i; } } - + if (index == -1) { // todo add failure test cases return; } - + // erase all other elements in array list size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); for (int32_t i = 0; i < size; ++i) { if (i == index) { continue; } - + STableCheckInfo* pTableCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); tSkipListDestroyIter(pTableCheckInfo->iter); - + if (pTableCheckInfo->pDataCols != NULL) { tfree(pTableCheckInfo->pDataCols->buf); } - + tfree(pTableCheckInfo->pDataCols); tfree(pTableCheckInfo->pCompInfo); } - + STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, index); taosArrayClear(pQueryHandle->pTableCheckInfo); - + info.lastKey = key; taosArrayPush(pQueryHandle->pTableCheckInfo, &info); - + // update the query time window according to the chosen last timestamp pQueryHandle->window = (STimeWindow) {key, key}; } @@ -1759,13 +1796,13 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) { // filter the queried time stamp in the first place STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle; pQueryHandle->order = TSDB_ORDER_DESC; - + assert(pQueryHandle->window.skey == pQueryHandle->window.ekey); - + // starts from the buffer in case of descending timestamp order check data blocks // todo consider the query time window, current last_row does not apply the query time window size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); - + int32_t i = 0; while(i < numOfTables) { STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); @@ -1773,21 +1810,21 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) { pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL) { break; } - + i++; } - + // there are no data in all the tables if (i == numOfTables) { return; } - + STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i); taosArrayClear(pQueryHandle->pTableCheckInfo); - + info.lastKey = pQueryHandle->window.skey; taosArrayPush(pQueryHandle->pTableCheckInfo, &info); - + // update the query time window according to the chosen last timestamp pQueryHandle->window = (STimeWindow) {info.lastKey, TSKEY_INITIAL_VAL}; } @@ -1811,7 +1848,7 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int if ((key > maxKey && ASCENDING_TRAVERSE(pQueryHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pQueryHandle->order))) { tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pQueryHandle, key, pQueryHandle->window.skey, pQueryHandle->window.ekey); - + break; } @@ -1826,24 +1863,24 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int moveToNextRowInMem(pCheckInfo); break; } - + } while(moveToNextRowInMem(pCheckInfo)); assert(numOfRows <= maxRowsToRead); - + // if the buffer is not full in case of descending order query, move the data in the front of the buffer if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < maxRowsToRead) { int32_t emptySize = maxRowsToRead - numOfRows; - + for(int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); } } - + int64_t elapsedTime = taosGetTimestampUs() - st; - tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d", pQueryHandle, - elapsedTime, numOfRows, numOfCols); + tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d, %p", pQueryHandle, + elapsedTime, numOfRows, numOfCols, pQueryHandle->qinfo); return numOfRows; } @@ -1852,7 +1889,7 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle; SQueryFilePos* cur = &pHandle->cur; STable* pTable = NULL; - + // there are data in file if (pHandle->cur.fid >= 0) { STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot]; @@ -1874,13 +1911,13 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p */ int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataStatis** pBlockStatis) { STsdbQueryHandle* pHandle = (STsdbQueryHandle*) pQueryHandle; - + SQueryFilePos* c = &pHandle->cur; if (c->mixBlock) { *pBlockStatis = NULL; return TSDB_CODE_SUCCESS; } - + STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot]; assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0))); @@ -1900,7 +1937,7 @@ int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataSta for(int32_t i = 0; i < numOfCols; ++i) { pHandle->statis[i].colId = colIds[i]; } - + tsdbGetDataStatis(&pHandle->rhelper, pHandle->statis, numOfCols); // always load the first primary timestamp column data @@ -1949,31 +1986,31 @@ SArray* tsdbRetrieveDataBlock(TsdbQueryHandleT* pQueryHandle, SArray* pIdList) { } else { SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock); assert(pHandle->realNumOfRows <= binfo.rows); - + // data block has been loaded, todo extract method SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo; - + if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fileId == pHandle->cur.fid && pBlockLoadInfo->tid == pCheckInfo->pTableObj->tableId.tid) { return pHandle->pColumns; } else { // only load the file block SCompBlock* pBlock = pBlockInfo->compBlock; - doLoadFileDataBlock(pHandle, pBlock, pCheckInfo); - + doLoadFileDataBlock(pHandle, pBlock, pCheckInfo, pHandle->cur.slot); + // todo refactor int32_t numOfRows = copyDataFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1); - + // if the buffer is not full in case of descending order query, move the data in the front of the buffer if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) { int32_t emptySize = pHandle->outputCapacity - numOfRows; int32_t reqNumOfCols = taosArrayGetSize(pHandle->pColumns); - + for(int32_t i = 0; i < reqNumOfCols; ++i) { SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i); memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); } } - + return pHandle->pColumns; } } @@ -1984,11 +2021,11 @@ static int32_t getAllTableList(STable* pSuperTable, SArray* list) { SSkipListIterator* iter = tSkipListCreateIter(pSuperTable->pIndex); while (tSkipListIterNext(iter)) { SSkipListNode* pNode = tSkipListIterGet(iter); - + STable** pTable = (STable**) SL_GET_NODE_DATA((SSkipListNode*) pNode); taosArrayPush(list, pTable); } - + tSkipListDestroyIter(iter); return TSDB_CODE_SUCCESS; } @@ -1998,12 +2035,12 @@ static void destroyHelper(void* param) { return; } - + tQueryInfo* pInfo = (tQueryInfo*)param; if (pInfo->optr != TSDB_RELATION_IN) { tfree(pInfo->q); } - + // tVariantDestroy(&(pInfo->q)); free(param); } @@ -2015,7 +2052,7 @@ void filterPrepare(void* expr, void* param) { } pExpr->_node.info = calloc(1, sizeof(tQueryInfo)); - + STSchema* pTSSchema = (STSchema*) param; tQueryInfo* pInfo = pExpr->_node.info; tVariant* pCond = pExpr->_node.pRight->pVal; @@ -2025,7 +2062,7 @@ void filterPrepare(void* expr, void* param) { pInfo->optr = pExpr->_node.optr; pInfo->compare = getComparFunc(pSchema->type, pInfo->optr); pInfo->param = pTSSchema; - + if (pInfo->optr == TSDB_RELATION_IN) { pInfo->q = (char*) pCond->arr; } else { @@ -2038,25 +2075,24 @@ typedef struct STableGroupSupporter { int32_t numOfCols; SColIndex* pCols; STSchema* pTagSchema; -// void* tsdbMeta; } STableGroupSupporter; int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) { STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param; STable* pTable1 = *(STable**) p1; STable* pTable2 = *(STable**) p2; - + for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) { SColIndex* pColIndex = &pTableGroupSupp->pCols[i]; int32_t colIndex = pColIndex->colIndex; - + assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX); - + char * f1 = NULL; char * f2 = NULL; int32_t type = 0; int32_t bytes = 0; - + if (colIndex == TSDB_TBNAME_COLUMN_INDEX) { f1 = (char*) TABLE_NAME(pTable1); f2 = (char*) TABLE_NAME(pTable2); @@ -2090,14 +2126,14 @@ int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) { return ret; } } - + return 0; } void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, STableGroupSupporter* pSupp, __ext_compar_fn_t compareFn) { STable* pTable = taosArrayGetP(pTableList, 0); - + SArray* g = taosArrayInit(16, POINTER_BYTES); taosArrayPush(g, &pTable); tsdbRefTable(pTable); @@ -2105,10 +2141,10 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable for (int32_t i = 1; i < numOfTables; ++i) { STable** prev = taosArrayGet(pTableList, i - 1); STable** p = taosArrayGet(pTableList, i); - + int32_t ret = compareFn(prev, p, pSupp); assert(ret == 0 || ret == -1); - + tsdbRefTable(*p); assert((*p)->type == TSDB_CHILD_TABLE); @@ -2120,20 +2156,20 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable taosArrayPush(g, p); } } - + taosArrayPush(pGroups, &g); } SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols) { assert(pTableList != NULL); SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES); - + size_t size = taosArrayGetSize(pTableList); if (size == 0) { tsdbDebug("no qualified tables"); return pTableGroup; } - + if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table SArray* sa = taosArrayInit(size, POINTER_BYTES); for(int32_t i = 0; i < size; ++i) { @@ -2143,7 +2179,7 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC tsdbRefTable(*pTable); taosArrayPush(sa, pTable); } - + taosArrayPush(pTableGroup, &sa); tsdbDebug("all %zu tables belong to one group", size); } else { @@ -2151,18 +2187,18 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC pSupp->numOfCols = numOfOrderCols; pSupp->pTagSchema = pTagSchema; pSupp->pCols = pCols; - + taosqsort(pTableList->pData, size, POINTER_BYTES, pSupp, tableGroupComparFn); createTableGroupImpl(pTableGroup, pTableList, size, pSupp, tableGroupComparFn); tfree(pSupp); } - + return pTableGroup; } bool indexedNodeFilterFp(const void* pNode, void* param) { tQueryInfo* pInfo = (tQueryInfo*) param; - + STable* pTable = *(STable**)(SL_GET_NODE_DATA((SSkipListNode*)pNode)); char* val = NULL; @@ -2172,7 +2208,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) { } else { val = tdGetKVRowValOfCol(pTable->tagVal, pInfo->sch.colId); } - + int32_t ret = 0; if (val == NULL) { //the val is possible to be null, so check it out carefully ret = -1; // val is missing in table tags value pairs @@ -2209,7 +2245,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) { default: assert(false); } - + return true; } @@ -2239,7 +2275,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT goto _error; } - + if (pTable->type != TSDB_SUPER_TABLE) { tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", tid:%d, name:%s", tsdb, uid, pTable->tableId.tid, pTable->name->data); @@ -2252,7 +2288,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT //NOTE: not add ref count for super table SArray* res = taosArrayInit(8, POINTER_BYTES); STSchema* pTagSchema = tsdbGetTableTagSchema(pTable); - + // no tags and tbname condition, all child tables of this stable are involved if (tbnameCond == NULL && (pTagCond == NULL || len == 0)) { int32_t ret = getAllTableList(pTable, res); @@ -2263,7 +2299,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT pGroupInfo->numOfTables = taosArrayGetSize(res); pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols); - + tsdbDebug("%p no table name/tag condition, all tables belong to one group, numOfTables:%zu", tsdb, pGroupInfo->numOfTables); taosArrayDestroy(res); @@ -2299,7 +2335,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT } CATCH( code ) { CLEANUP_EXECUTE(); terrno = code; - goto _error; + goto _error; // TODO: more error handling } END_TRY @@ -2335,12 +2371,12 @@ int32_t tsdbGetOneTableGroup(TSDB_REPO_T* tsdb, uint64_t uid, STableGroupInfo* p pGroupInfo->numOfTables = 1; pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES); - + SArray* group = taosArrayInit(1, POINTER_BYTES); - + taosArrayPush(group, &pTable); taosArrayPush(pGroupInfo->pGroupList, &group); - + return TSDB_CODE_SUCCESS; _error: @@ -2428,8 +2464,8 @@ void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle) { tsdbDestroyHelper(&pQueryHandle->rhelper); SIOCostSummary* pCost = &pQueryHandle->cost; - tsdbDebug(":io-cost summary: statis-info:%"PRId64"us, datablock:%" PRId64"us, check data:%"PRId64"us, %p", - pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pQueryHandle->qinfo); + tsdbDebug("%p :io-cost summary: statis-info:%"PRId64"us, datablock:%" PRId64"us, check data:%"PRId64"us, %p", + pQueryHandle, pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pQueryHandle->qinfo); tfree(pQueryHandle); } diff --git a/src/util/inc/tcache.h b/src/util/inc/tcache.h index 5a3545fd8f20169f7dc5a7a57f2994db40337ef2..1e2aeae394a4c49c104fe24de4a4a643884bdc37 100644 --- a/src/util/inc/tcache.h +++ b/src/util/inc/tcache.h @@ -68,8 +68,6 @@ typedef struct { int64_t refreshTime; STrashElem * pTrash; char* name; -// void * tmrCtrl; -// void * pTimer; SCacheStatis statistics; SHashObj * pHashTable; __cache_free_fn_t freeFp; diff --git a/src/util/inc/tlist.h b/src/util/inc/tlist.h index a4ed9311e24bf5098f9cca8c05edd0998b8d6253..e8380294da47de1c9eb65059263ac820f3d816be 100644 --- a/src/util/inc/tlist.h +++ b/src/util/inc/tlist.h @@ -55,6 +55,8 @@ int tdListPrepend(SList *list, void *data); int tdListAppend(SList *list, void *data); SListNode *tdListPopHead(SList *list); SListNode *tdListPopTail(SList *list); +SListNode *tdListGetHead(SList *list); +SListNode *tsListGetTail(SList *list); SListNode *tdListPopNode(SList *list, SListNode *node); void tdListMove(SList *src, SList *dst); void tdListDiscard(SList *list); diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index 3d2df5f2146b3aa3e46b715d7c761a70239e7b74..dc9128d4a95d210e82aec4c01ef679540c9945d8 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -343,7 +343,7 @@ void* taosCacheUpdateExpireTimeByName(SCacheObj *pCacheObj, void *key, size_t ke SCacheDataNode **ptNode = (SCacheDataNode **)taosHashGet(pCacheObj->pHashTable, key, keyLen); if (ptNode != NULL) { T_REF_INC(*ptNode); - (*ptNode)->expireTime = taosGetTimestampMs() + (*ptNode)->lifespan; + (*ptNode)->expireTime = expireTime; // taosGetTimestampMs() + (*ptNode)->lifespan; } __cache_unlock(pCacheObj); @@ -381,7 +381,7 @@ void *taosCacheAcquireByData(SCacheObj *pCacheObj, void *data) { } void *taosCacheTransfer(SCacheObj *pCacheObj, void **data) { - if (pCacheObj == NULL || data == NULL) return NULL; + if (pCacheObj == NULL || data == NULL || (*data) == NULL) return NULL; size_t offset = offsetof(SCacheDataNode, data); SCacheDataNode *ptNode = (SCacheDataNode *)((char *)(*data) - offset); @@ -419,7 +419,7 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { // note: extend lifespan before dec ref count bool inTrashCan = pNode->inTrashCan; - if (pCacheObj->extendLifespan && (!inTrashCan)) { + if (pCacheObj->extendLifespan && (!inTrashCan) && (!_remove)) { atomic_store_64(&pNode->expireTime, pNode->lifespan + taosGetTimestampMs()); uDebug("cache:%s data:%p extend life time to %"PRId64 " before release", pCacheObj->name, pNode->data, pNode->expireTime); } @@ -457,8 +457,9 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { } else { // NOTE: once refcount is decrease, pNode may be freed by other thread immediately. int32_t ref = T_REF_DEC(pNode); - uDebug("cache:%s, key:%p, %p is released, refcnt:%d, in trashcan:%d", pCacheObj->name, pNode->key, pNode->data, ref, - inTrashCan); + + uDebug("cache:%s, key:%p, %p released, refcnt:%d, data in trancan:%d", pCacheObj->name, pNode->key, pNode->data, + ref, inTrashCan); } } @@ -572,6 +573,7 @@ void taosRemoveFromTrashCan(SCacheObj *pCacheObj, STrashElem *pElem) { free(pElem); } +// TODO add another lock when scanning trashcan void taosTrashCanEmpty(SCacheObj *pCacheObj, bool force) { __cache_wr_lock(pCacheObj); @@ -643,6 +645,7 @@ static void doCacheRefresh(SCacheObj* pCacheObj, int64_t time, __cache_free_fn_t __cache_wr_lock(pCacheObj); while (taosHashIterNext(pIter)) { SCacheDataNode *pNode = *(SCacheDataNode **)taosHashIterGet(pIter); + if (pNode->expireTime < time && T_REF_VAL_GET(pNode) <= 0) { taosCacheReleaseNode(pCacheObj, pNode); continue; @@ -674,6 +677,7 @@ void* taosCacheTimedRefresh(void *handle) { // check if current cache object will be deleted every 500ms. if (pCacheObj->deleting) { + uDebug("%s refresh threads quit", pCacheObj->name); break; } diff --git a/src/util/src/tcompare.c b/src/util/src/tcompare.c index 889d38ff207315ed489f7877e3ee9f566a5cde41..95645882547110cb36fdc16eafe8cc7d1fe09bcf 100644 --- a/src/util/src/tcompare.c +++ b/src/util/src/tcompare.c @@ -1,6 +1,6 @@ #include "taosdef.h" #include "tcompare.h" -#include +#include "tarray.h" #include "tutil.h" int32_t compareInt32Val(const void *pLeft, const void *pRight) { diff --git a/src/util/src/tlist.c b/src/util/src/tlist.c index f402c2307e68cee6100a28e53dce1935af44fb3f..8c2ad83de117aaf528565a711a2aa3732984e0a9 100644 --- a/src/util/src/tlist.c +++ b/src/util/src/tlist.c @@ -76,6 +76,7 @@ int tdListPrepend(SList *list, void *data) { SListNode *node = (SListNode *)malloc(sizeof(SListNode) + list->eleSize); if (node == NULL) return -1; + node->next = node->prev = NULL; memcpy((void *)(node->data), data, list->eleSize); tdListPrependNode(list, node); @@ -121,6 +122,22 @@ SListNode *tdListPopTail(SList *list) { return node; } +SListNode *tdListGetHead(SList *list) { + if (list == NULL || list->numOfEles == 0) { + return NULL; + } + + return list->head; +} + +SListNode *tsListGetTail(SList *list) { + if (list == NULL || list->numOfEles == 0) { + return NULL; + } + + return list->tail; +} + SListNode *tdListPopNode(SList *list, SListNode *node) { if (list->head == node) { list->head = node->next; diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 973df7c5a10f2ba84a77718602c8ffddd4b14134..066770e1bb9de5d463a1abd2b4078c3d41f9c574 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -71,11 +71,45 @@ static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle) { pRead->rpcMsg.msgType = TSDB_MSG_TYPE_QUERY; pRead->pCont = qhandle; pRead->contLen = 0; + pRead->rpcMsg.handle = NULL; atomic_add_fetch_32(&pVnode->refCount, 1); taosWriteQitem(pVnode->rqueue, TAOS_QTYPE_QUERY, pRead); } +static int32_t vnodeDumpQueryResult(SRspRet *pRet, void* pVnode, void* handle, bool* freeHandle) { + bool continueExec = false; + + int32_t code = TSDB_CODE_SUCCESS; + if ((code = qDumpRetrieveResult(handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { + if (continueExec) { + vDebug("QInfo:%p add to query task queue for exec", handle); + vnodePutItemIntoReadQueue(pVnode, handle); + pRet->qhandle = handle; + *freeHandle = false; + } else { + vDebug("QInfo:%p exec completed", handle); + *freeHandle = true; + } + } else { + pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); + memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); + *freeHandle = true; + } + + return code; +} + +static void vnodeBuildNoResultQueryRsp(SRspRet* pRet) { + pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); + pRet->len = sizeof(SRetrieveTableRsp); + + memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); + SRetrieveTableRsp* pRsp = pRet->rsp; + + pRsp->completed = true; +} + static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { void *pCont = pReadMsg->pCont; int32_t contLen = pReadMsg->contLen; @@ -98,6 +132,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { vWarn("QInfo:%p invalid qhandle, no matched query handle, conn:%p", (void*) killQueryMsg->qhandle, pReadMsg->rpcMsg.handle); } else { assert(*qhandle == (void*) killQueryMsg->qhandle); + qKillQuery(*qhandle); qReleaseQInfo(pVnode->qMgmt, (void**) &qhandle, true); } @@ -110,7 +145,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (contLen != 0) { qinfo_t pQInfo = NULL; - code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, pVnode, &pQInfo); + code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, &pQInfo); SQueryTableRsp *pRsp = (SQueryTableRsp *) rpcMallocCont(sizeof(SQueryTableRsp)); pRsp->code = code; @@ -133,7 +168,6 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { pRsp->qhandle = htobe64((uint64_t) pQInfo); } - pQInfo = NULL; if (handle != NULL && vnodeNotifyCurrentQhandle(pReadMsg->rpcMsg.handle, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) { vError("vgId:%d, QInfo:%p, query discarded since link is broken, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle); pRsp->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; @@ -153,16 +187,34 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { } else { assert(pCont != NULL); + handle = qAcquireQInfo(pVnode->qMgmt, (uint64_t) pCont); if (handle == NULL) { vWarn("QInfo:%p invalid qhandle in continuing exec query, conn:%p", (void*) pCont, pReadMsg->rpcMsg.handle); code = TSDB_CODE_QRY_INVALID_QHANDLE; } else { vDebug("vgId:%d, QInfo:%p, dnode continue exec query", pVnode->vgId, (void*) pCont); - code = TSDB_CODE_VND_ACTION_IN_PROGRESS; - qTableQuery(*handle); // do execute query + + bool freehandle = false; + bool buildRes = qTableQuery(*handle); // do execute query + + // build query rsp + if (buildRes) { + // update the connection info according to the retrieve connection + pReadMsg->rpcMsg.handle = qGetResultRetrieveMsg(*handle); + assert(pReadMsg->rpcMsg.handle != NULL); + + vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle); + code = vnodeDumpQueryResult(&pReadMsg->rspRet, pVnode, *handle, &freehandle); + + // todo test the error code case + if (code == TSDB_CODE_SUCCESS) { + code = TSDB_CODE_QRY_HAS_RSP; + } + } + + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freehandle); } - qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); } return code; @@ -176,7 +228,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { pRetrieve->qhandle = htobe64(pRetrieve->qhandle); pRetrieve->free = htons(pRetrieve->free); - vDebug("vgId:%d, QInfo:%p, retrieve msg is disposed", pVnode->vgId, (void*) pRetrieve->qhandle); + vDebug("vgId:%d, QInfo:%p, retrieve msg is disposed, free:%d, conn:%p", pVnode->vgId, (void*) pRetrieve->qhandle, pRetrieve->free, pReadMsg->rpcMsg.handle); memset(pRet, 0, sizeof(SRspRet)); @@ -185,16 +237,8 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (handle == NULL || (*handle) != (void*) pRetrieve->qhandle) { code = TSDB_CODE_QRY_INVALID_QHANDLE; vDebug("vgId:%d, invalid qhandle in fetch result, QInfo:%p", pVnode->vgId, (void*) pRetrieve->qhandle); - - pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); - pRet->len = sizeof(SRetrieveTableRsp); - - memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); - SRetrieveTableRsp* pRsp = pRet->rsp; - pRsp->numOfRows = 0; - pRsp->useconds = 0; - pRsp->completed = true; - + + vnodeBuildNoResultQueryRsp(pRet); return code; } @@ -203,35 +247,25 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { qKillQuery(*handle); qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); - pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); - pRet->len = sizeof(SRetrieveTableRsp); - - memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); - SRetrieveTableRsp* pRsp = pRet->rsp; - pRsp->numOfRows = 0; - pRsp->completed = true; - pRsp->useconds = 0; - + vnodeBuildNoResultQueryRsp(pRet); return code; } bool freeHandle = true; - code = qRetrieveQueryResultInfo(*handle); + bool buildRes = false; + + code = qRetrieveQueryResultInfo(*handle, &buildRes, pReadMsg->rpcMsg.handle); if (code != TSDB_CODE_SUCCESS) { //TODO handle malloc failure pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); - } else { // if failed to dump result, free qhandle immediately - if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len)) == TSDB_CODE_SUCCESS) { - if (qHasMoreResultsToRetrieve(*handle)) { - vnodePutItemIntoReadQueue(pVnode, *handle); - pRet->qhandle = *handle; - freeHandle = false; - } else { - qKillQuery(*handle); - freeHandle = true; - } + } else { // result is not ready, return immediately + if (!buildRes) { + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); + return TSDB_CODE_QRY_NOT_READY; } + + code = vnodeDumpQueryResult(pRet, pVnode, *handle, &freeHandle); } qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle); diff --git a/tests/script/general/parser/testSuite.sim b/tests/script/general/parser/testSuite.sim index c6981d29022dca56198d6655379472c905984285..1e73893793a9f011f6753e5745226e664ca253f2 100644 --- a/tests/script/general/parser/testSuite.sim +++ b/tests/script/general/parser/testSuite.sim @@ -93,6 +93,8 @@ run general/parser/groupby.sim sleep 2000 run general/parser/tags_filter.sim sleep 2000 +run general/parser/topbot.sim +sleep 2000 run general/parser/union.sim sleep 2000 run general/parser/sliding.sim diff --git a/tests/script/general/parser/topbot.sim b/tests/script/general/parser/topbot.sim new file mode 100644 index 0000000000000000000000000000000000000000..a0c46dbc65bac39b33cd54f0c61ac3ba96bf5736 --- /dev/null +++ b/tests/script/general/parser/topbot.sim @@ -0,0 +1,74 @@ +system sh/stop_dnodes.sh + +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c walLevel -v 0 +system sh/exec.sh -n dnode1 -s start +sleep 3000 +sql connect + +$dbPrefix = tb_db +$tbPrefix = tb_tb +$stbPrefix = tb_stb +$tbNum = 10 +$rowNum = 1000 +$totalNum = $tbNum * $rowNum +$loops = 200000 +$log = 10000 +$ts0 = 1537146000000 +$delta = 600000 +print ========== topbot.sim +$i = 0 +$db = $dbPrefix . $i +$stb = $stbPrefix . $i + +sql drop database $db -x step1 +step1: +sql create database $db cache 16 maxtables 200 +print ====== create tables +sql use $db +sql create table $stb (ts timestamp, c1 int, c2 bigint, c3 float, c4 double, c5 smallint, c6 tinyint, c7 bool, c8 binary(10), c9 nchar(10)) tags(t1 int) + +$i = 0 +$ts = $ts0 +$halfNum = $tbNum / 2 +while $i < $halfNum + $tbId = $i + $halfNum + $tb = $tbPrefix . $i + $tb1 = $tbPrefix . $tbId + sql create table $tb using $stb tags( $i ) + sql create table $tb1 using $stb tags( $tbId ) + + $x = 0 + while $x < $rowNum + $xs = $x * $delta + $ts = $ts0 + $xs + $c = $x / 10 + $c = $c * 10 + $c = $x - $c + $binary = 'binary . $c + $binary = $binary . ' + $nchar = 'nchar . $c + $nchar = $nchar . ' + sql insert into $tb values ( $ts , $c , $c , $c , $c , $c , $c , true, $binary , $nchar ) + sql insert into $tb1 values ( $ts , $c , NULL , $c , NULL , $c , $c , true, $binary , $nchar ) + $x = $x + 1 + endw + + $i = $i + 1 +endw +print ====== tables created + +sql use $db +##### select from table +print ====== select top/bot from table and check num of rows returned +sql select top(c1, 100) from tb_stb0 +if $row != 100 then + return -1 +endi + +sql select last(c2) from tb_tb9 +if $row != 1 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file