未验证 提交 38598c5e 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #2784 from taosdata/feature/query

Feature/query
......@@ -31,8 +31,8 @@ extern "C" {
#include "tutil.h"
#include "qExecutor.h"
#include "qSqlparser.h"
#include "qTsbuf.h"
#include "qsqlparser.h"
#include "tcmdtype.h"
// forward declaration
......
......@@ -430,7 +430,7 @@ void tscTableMetaCallBack(void *param, TAOS_RES *res, int code) {
pRes->code = code;
if (code != TSDB_CODE_SUCCESS) {
tscError("%p ge tableMeta failed, code:%s", pSql, tstrerror(code));
tscError("%p get tableMeta failed, code:%s", pSql, tstrerror(code));
goto _error;
} else {
tscDebug("%p get tableMeta successfully", pSql);
......
......@@ -2131,6 +2131,11 @@ static STopBotInfo *getTopBotOutputInfo(SQLFunctionCtx *pCtx) {
}
bool topbot_datablock_filter(SQLFunctionCtx *pCtx, int32_t functionId, const char *minval, const char *maxval) {
SResultInfo *pResInfo = GET_RES_INFO(pCtx);
if (pResInfo == NULL) {
return true;
}
STopBotInfo *pTopBotInfo = getTopBotOutputInfo(pCtx);
// required number of results are not reached, continue load data block
......
......@@ -691,9 +691,15 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr
pModel = createColumnModel(pSchema, size, capacity);
int32_t pg = DEFAULT_PAGE_SIZE;
int32_t overhead = sizeof(tFilePage);
while((pg - overhead) < pModel->rowSize * 2) {
pg *= 2;
}
size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups;
for (int32_t i = 0; i < numOfSubs; ++i) {
(*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel);
(*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pg, pModel);
(*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL;
}
......
......@@ -1505,12 +1505,11 @@ static int32_t tscReissueSubquery(SRetrieveSupport *trsupport, SSqlObj *pSql, in
SSqlObj *pNew = tscCreateSqlObjForSubquery(trsupport->pParentSql, trsupport, pSql);
// todo add to async res or not??
if (pNew == NULL) {
tscError("%p sub:%p failed to create new subquery due to out of memory, abort retry, vgId:%d, orderOfSub:%d",
trsupport->pParentSql, pSql, pVgroup->vgId, trsupport->subqueryIndex);
tscError("%p sub:%p failed to create new subquery due to error:%s, abort retry, vgId:%d, orderOfSub:%d",
trsupport->pParentSql, pSql, tstrerror(terrno), pVgroup->vgId, trsupport->subqueryIndex);
pParentSql->res.code = TSDB_CODE_TSC_OUT_OF_MEMORY;
pParentSql->res.code = terrno;
trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY;
return pParentSql->res.code;
......
......@@ -148,7 +148,7 @@ void taos_init_imp() {
refreshTime = refreshTime < 10 ? 10 : refreshTime;
if (tscCacheHandle == NULL) {
tscCacheHandle = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, NULL, "client");
tscCacheHandle = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, NULL, "tableMeta");
}
tscDebug("client is initialized successfully");
......
......@@ -356,9 +356,9 @@ void tscPartiallyFreeSqlObj(SSqlObj* pSql) {
// pSql->sqlstr will be used by tscBuildQueryStreamDesc
if (pObj->signature == pObj) {
pthread_mutex_lock(&pObj->mutex);
//pthread_mutex_lock(&pObj->mutex);
tfree(pSql->sqlstr);
pthread_mutex_unlock(&pObj->mutex);
//pthread_mutex_unlock(&pObj->mutex);
}
tscFreeSqlResult(pSql);
......@@ -1675,6 +1675,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
SSqlObj* pNew = (SSqlObj*)calloc(1, sizeof(SSqlObj));
if (pNew == NULL) {
tscError("%p new subquery failed, tableIndex:%d", pSql, tableIndex);
terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
return NULL;
}
......@@ -1688,6 +1689,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex);
free(pNew);
terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
return NULL;
}
......@@ -1706,6 +1708,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
if (tscAddSubqueryInfo(pnCmd) != TSDB_CODE_SUCCESS) {
tscFreeSqlObj(pNew);
terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
return NULL;
}
......@@ -1743,6 +1746,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
if (tscAllocPayload(pnCmd, TSDB_DEFAULT_PAYLOAD_SIZE) != TSDB_CODE_SUCCESS) {
tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex);
tscFreeSqlObj(pNew);
terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
return NULL;
}
......@@ -1827,8 +1831,16 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void
}
if (pFinalInfo->pTableMeta == NULL) {
tscError("%p new subquery failed for get tableMeta is NULL from cache", pSql);
tscError("%p new subquery failed since no tableMeta in cache, name:%s", pSql, name);
tscFreeSqlObj(pNew);
if (pPrevSql != NULL) {
assert(pPrevSql->res.code != TSDB_CODE_SUCCESS);
terrno = pPrevSql->res.code;
} else {
terrno = TSDB_CODE_TSC_APP_ERROR;
}
return NULL;
}
......
......@@ -49,7 +49,7 @@ static taos_qset readQset;
int32_t dnodeInitVnodeRead() {
readQset = taosOpenQset();
readPool.min = 2;
readPool.min = tsNumOfCores;
readPool.max = tsNumOfCores * tsNumOfThreadsPerCore;
if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min;
readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max);
......@@ -206,10 +206,14 @@ static void *dnodeProcessReadQueue(void *param) {
taosMsg[pReadMsg->rpcMsg.msgType], type);
int32_t code = vnodeProcessRead(pVnode, pReadMsg);
if (type == TAOS_QTYPE_RPC) {
if (type == TAOS_QTYPE_RPC && code != TSDB_CODE_QRY_NOT_READY) {
dnodeSendRpcReadRsp(pVnode, pReadMsg, code);
} else {
dnodeDispatchNonRspMsg(pVnode, pReadMsg, code);
if (code == TSDB_CODE_QRY_HAS_RSP) {
dnodeSendRpcReadRsp(pVnode, pReadMsg, TSDB_CODE_SUCCESS);
} else {
dnodeDispatchNonRspMsg(pVnode, pReadMsg, code);
}
}
taosFreeQitem(pReadMsg);
......
......@@ -28,7 +28,7 @@ typedef void* qinfo_t;
* @param qinfo
* @return
*/
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMsg, void* param, qinfo_t* qinfo);
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMsg, qinfo_t* qinfo);
/**
......@@ -38,7 +38,10 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMs
* @param qinfo
* @return
*/
void qTableQuery(qinfo_t qinfo);
bool qTableQuery(qinfo_t qinfo);
void* pGetRspMsg(qinfo_t qinfo);
/**
* Retrieve the produced results information, if current query is not paused or completed,
......@@ -48,7 +51,7 @@ void qTableQuery(qinfo_t qinfo);
* @param qinfo
* @return
*/
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo);
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext);
/**
*
......@@ -60,16 +63,9 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo);
* @param contLen payload length
* @return
*/
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen);
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec);
/**
* Decide if more results will be produced or not, NOTE: this function will increase the ref count of QInfo,
* so it can be only called once for each retrieve
*
* @param qinfo
* @return
*/
bool qHasMoreResultsToRetrieve(qinfo_t qinfo);
void* qGetResultRetrieveMsg(qinfo_t qinfo);
/**
* kill current ongoing query and free query handle automatically
......
......@@ -216,6 +216,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_QRY_OUT_OF_MEMORY, 0, 0x0703, "query out
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_APP_ERROR, 0, 0x0704, "query app error")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_DUP_JOIN_KEY, 0, 0x0705, "query duplicated join key")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_EXCEED_TAGS_LIMIT, 0, 0x0706, "query tag conditon too many")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_NOT_READY, 0, 0x0707, "query not ready")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_HAS_RSP, 0, 0x0708, "query should response")
// grant
TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_EXPIRED, 0, 0x0800, "grant expired")
......
......@@ -68,7 +68,7 @@ int32_t mnodeInitProfile() {
mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_STREAM, mnodeProcessKillStreamMsg);
mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_CONN, mnodeProcessKillConnectionMsg);
tsMnodeConnCache = taosCacheInit(TSDB_DATA_TYPE_INT, CONN_CHECK_TIME, false, mnodeFreeConn, "conn");
tsMnodeConnCache = taosCacheInit(TSDB_DATA_TYPE_INT, CONN_CHECK_TIME, true, mnodeFreeConn, "conn");
return 0;
}
......@@ -119,7 +119,7 @@ SConnObj *mnodeAccquireConn(int32_t connId, char *user, uint32_t ip, uint16_t po
return NULL;
}
if (pConn->ip != ip || pConn->port != port /* || strcmp(pConn->user, user) != 0 */) {
if (/* pConn->ip != ip || */ pConn->port != port /* || strcmp(pConn->user, user) != 0 */) {
mError("connId:%d, incoming conn user:%s ip:%s:%u, not match exist conn user:%s ip:%s:%u", connId, user,
taosIpStr(ip), port, pConn->user, taosIpStr(pConn->ip), pConn->port);
taosCacheRelease(tsMnodeConnCache, (void **)&pConn, false);
......
......@@ -58,7 +58,7 @@ static void httpDestroyContext(void *data) {
}
bool httpInitContexts() {
tsHttpServer.contextCache = taosCacheInit(TSDB_DATA_TYPE_BIGINT, 2, false, httpDestroyContext, "restc");
tsHttpServer.contextCache = taosCacheInit(TSDB_DATA_TYPE_BIGINT, 2, true, httpDestroyContext, "restc");
if (tsHttpServer.contextCache == NULL) {
httpError("failed to init context cache");
return false;
......
......@@ -20,8 +20,8 @@
#include "hash.h"
#include "qFill.h"
#include "qResultbuf.h"
#include "qSqlparser.h"
#include "qTsbuf.h"
#include "qsqlparser.h"
#include "query.h"
#include "taosdef.h"
#include "tarray.h"
......@@ -43,7 +43,7 @@ typedef struct SSqlGroupbyExpr {
typedef struct SPosInfo {
int32_t pageId;
int16_t rowId;
int32_t rowId;
} SPosInfo;
typedef struct SWindowStatus {
......@@ -177,13 +177,18 @@ typedef struct SQueryRuntimeEnv {
SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file
} SQueryRuntimeEnv;
enum {
QUERY_RESULT_NOT_READY = 1,
QUERY_RESULT_READY = 2,
};
typedef struct SQInfo {
void* signature;
int32_t pointsInterpo;
int32_t code; // error code to returned to client
sem_t dataReady;
// sem_t dataReady;
void* tsdb;
void* param;
int32_t vgId;
STableGroupInfo tableGroupInfo; // table id list < only includes the STable list>
STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray<STableQueryInfo*> structure
......@@ -200,8 +205,11 @@ typedef struct SQInfo {
*/
int32_t tableIndex;
int32_t numOfGroupResultPages;
void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables;
void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables;
pthread_mutex_t lock; // used to synchronize the rsp/query threads
int32_t dataReady; // denote if query result is ready or not
void* rspContext; // response context
} SQInfo;
#endif // TDENGINE_QUERYEXECUTOR_H
......@@ -19,7 +19,6 @@
extern "C" {
#endif
#include "os.h"
#include "taosmsg.h"
......@@ -28,9 +27,9 @@ extern "C" {
#include "tdataformat.h"
#include "talgo.h"
#define DEFAULT_PAGE_SIZE (1024L*4) // 16k larger than the SHistoInfo
#define MAX_TMPFILE_PATH_LENGTH PATH_MAX
#define MAX_TMPFILE_PATH_LENGTH PATH_MAX
#define INITIAL_ALLOCATION_BUFFER_SIZE 64
#define DEFAULT_PAGE_SIZE (4096L) // 16k larger than the SHistoInfo
typedef enum EXT_BUFFER_FLUSH_MODEL {
/*
......@@ -126,7 +125,7 @@ typedef struct tExtMemBuffer {
* @param pModel
* @return
*/
tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel);
tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, int32_t pagesize, SColumnModel *pModel);
/**
*
......
......@@ -13,50 +13,85 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODEQUERYUTIL_H
#define TDENGINE_VNODEQUERYUTIL_H
#ifndef TDENGINE_QRESULTBUF_H
#define TDENGINE_QRESULTBUF_H
#ifdef __cplusplus
extern "C" {
#endif
#include <tlist.h>
#include "hash.h"
#include "os.h"
#include "qExtbuffer.h"
#include "tlockfree.h"
typedef struct SArray* SIDList;
typedef struct SPageDiskInfo {
int32_t offset;
int32_t length;
} SPageDiskInfo;
typedef struct SPageInfo {
SListNode* pn; // point to list node
int32_t pageId;
SPageDiskInfo info;
void* pData;
bool used; // set current page is in used
} SPageInfo;
typedef struct SFreeListItem {
int32_t offset;
int32_t len;
} SFreeListItem;
typedef struct SResultBufStatis {
int32_t flushBytes;
int32_t loadBytes;
int32_t getPages;
int32_t releasePages;
int32_t flushPages;
} SResultBufStatis;
typedef struct SDiskbasedResultBuf {
int32_t numOfRowsPerPage;
int32_t numOfPages;
int64_t totalBufSize;
int32_t fd; // data file fd
int64_t fileSize; // disk file size
FILE* file;
int32_t allocateId; // allocated page id
int32_t incStep; // minimum allocated pages
void* pBuf; // mmap buffer pointer
char* path; // file path
int32_t pageSize; // current used page size
int32_t inMemPages; // numOfPages that are allocated in memory
SHashObj* idsTable; // id hash table
SIDList list; // for each id, there is a page id list
void* iBuf; // inmemory buf
void* handle; // for debug purpose
SHashObj* groupSet; // id hash table
SHashObj* all;
SList* lruList;
void* emptyDummyIdList; // dummy id list
void* assistBuf; // assistant buffer for compress/decompress data
SArray* pFree; // free area in file
bool comp; // compressed before flushed to disk
int32_t nextPos; // next page flush position
const void* handle; // for debug purpose
SResultBufStatis statis;
} SDiskbasedResultBuf;
#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L)
#define DEFAULT_INTERN_BUF_PAGE_SIZE (4096L)
#define DEFAULT_INMEM_BUF_PAGES 10
#define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1}
/**
* create disk-based result buffer
* @param pResultBuf
* @param size
* @param rowSize
* @param pagesize
* @param inMemPages
* @param handle
* @return
*/
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, int32_t pagesize,
int32_t inMemPages, void* handle);
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t rowSize, int32_t pagesize,
int32_t inMemBufSize, const void* handle);
/**
*
......@@ -72,7 +107,7 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32
* @param pResultBuf
* @return
*/
int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf);
size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf);
/**
*
......@@ -88,42 +123,52 @@ SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId);
* @param id
* @return
*/
static FORCE_INLINE tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) {
if (id < pResultBuf->inMemPages) {
return (tFilePage*) ((char*) pResultBuf->iBuf + id * pResultBuf->pageSize);
} else {
return (tFilePage*) ((char*) pResultBuf->pBuf + (id - pResultBuf->inMemPages) * pResultBuf->pageSize);
}
}
tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id);
/**
* release the referenced buf pages
* @param pResultBuf
* @param page
*/
void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page);
/**
*
* @param pResultBuf
* @param pi
*/
void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi);
/**
* get the total buffer size in the format of disk file
* @param pResultBuf
* @return
*/
int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf);
size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf);
/**
* get the number of groups in the result buffer
* @param pResultBuf
* @return
*/
int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf);
size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf);
/**
* destroy result buffer
* @param pResultBuf
*/
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle);
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf);
/**
*
* @param pList
* @return
*/
int32_t getLastPageId(SIDList pList);
SPageInfo* getLastPageInfo(SIDList pList);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODEQUERYUTIL_H
#endif // TDENGINE_QRESULTBUF_H
......@@ -45,13 +45,14 @@ bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot);
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize);
static FORCE_INLINE char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
static FORCE_INLINE char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult,
tFilePage* page) {
assert(pResult != NULL && pRuntimeEnv != NULL);
SQuery *pQuery = pRuntimeEnv->pQuery;
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
// tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
pQuery->pSelectExpr[columnIndex].bytes * realRowId;
}
......
......@@ -18,8 +18,8 @@
#include "exception.h"
#include "qAst.h"
#include "qSqlparser.h"
#include "qSyntaxtreefunction.h"
#include "qsqlparser.h"
#include "taosdef.h"
#include "taosmsg.h"
#include "tarray.h"
......
......@@ -221,7 +221,7 @@ void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
}
static int32_t getGroupResultId(int32_t groupIndex) {
int32_t base = 200000;
int32_t base = 20000000;
return base + (groupIndex * 10000);
}
......@@ -478,10 +478,14 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult
if (taosArrayGetSize(list) == 0) {
pData = getNewDataBuf(pResultBuf, sid, &pageId);
} else {
pageId = getLastPageId(list);
pData = getResBufPage(pResultBuf, pageId);
SPageInfo* pi = getLastPageInfo(list);
pData = getResBufPage(pResultBuf, pi->pageId);
pageId = pi->pageId;
if (pData->num >= numOfRowsPerPage) {
// release current page first, and prepare the next one
releaseResBufPageInfo(pResultBuf, pi);
pData = getNewDataBuf(pResultBuf, sid, &pageId);
if (pData != NULL) {
assert(pData->num == 0); // number of elements must be 0 for new allocated buffer
......@@ -497,6 +501,8 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult
if (pWindowRes->pos.pageId == -1) { // not allocated yet, allocate new buffer
pWindowRes->pos.pageId = pageId;
pWindowRes->pos.rowId = pData->num++;
assert(pWindowRes->pos.pageId >= 0);
}
return 0;
......@@ -1490,8 +1496,6 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order
goto _clean;
}
qDebug("QInfo:%p setup runtime env1", GET_QINFO_ADDR(pRuntimeEnv));
pRuntimeEnv->offset[0] = 0;
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
......@@ -1536,8 +1540,6 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order
}
}
qDebug("QInfo:%p setup runtime env2", GET_QINFO_ADDR(pRuntimeEnv));
// set the order information for top/bottom query
int32_t functionId = pCtx->functionId;
......@@ -1558,25 +1560,19 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order
}
}
qDebug("QInfo:%p setup runtime env3", GET_QINFO_ADDR(pRuntimeEnv));
char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
// set the intermediate result output buffer
setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
qDebug("QInfo:%p setup runtime env4", GET_QINFO_ADDR(pRuntimeEnv));
// if it is group by normal column, do not set output buffer, the output buffer is pResult
if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
resetCtxOutputBuf(pRuntimeEnv);
}
qDebug("QInfo:%p setup runtime env5", GET_QINFO_ADDR(pRuntimeEnv));
setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
qDebug("QInfo:%p init completed", GET_QINFO_ADDR(pRuntimeEnv));
qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
return TSDB_CODE_SUCCESS;
_clean:
......@@ -1615,7 +1611,7 @@ static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
destroyResultBuf(pRuntimeEnv->pResultBuf);
tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
......@@ -2111,9 +2107,6 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle,
}
if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
#if defined(_DEBUG_VIEW)
qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
#endif
// current block has been discard due to filter applied
pRuntimeEnv->summary.discardBlocks += 1;
qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
......@@ -2446,6 +2439,8 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes
SQuery * pQuery = pRuntimeEnv->pQuery;
SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
if (!mergeFlag) {
......@@ -2458,7 +2453,7 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes
pCtx[i].hasNull = true;
pCtx[i].nStartQueryTimestamp = timestamp;
pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
// in case of tag column, the tag information should be extracted from input buffer
if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
......@@ -2615,14 +2610,16 @@ int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param)
SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
TSKEY leftTimestamp = GET_INT64_VAL(b1);
SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
TSKEY rightTimestamp = GET_INT64_VAL(b2);
if (leftTimestamp == rightTimestamp) {
......@@ -2685,35 +2682,26 @@ void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
int32_t total = 0;
int32_t size = taosArrayGetSize(list);
for (int32_t i = 0; i < size; ++i) {
int32_t* pgId = taosArrayGet(list, i);
tFilePage *pData = getResBufPage(pResultBuf, *pgId);
total += pData->num;
}
int32_t rows = total;
int32_t offset = 0;
for (int32_t j = 0; j < size; ++j) {
int32_t* pgId = taosArrayGet(list, j);
tFilePage *pData = getResBufPage(pResultBuf, *pgId);
SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
tFilePage *pData = getResBufPage(pResultBuf, pi->pageId);
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
char * pDest = pQuery->sdata[i]->data;
memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
bytes * pData->num);
memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num, bytes * pData->num);
}
// rows += pData->num;
offset += pData->num;
}
assert(pQuery->rec.rows == 0);
pQuery->rec.rows += rows;
pQuery->rec.rows += offset;
pQInfo->offset += 1;
}
......@@ -2777,7 +2765,6 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
assert(pQInfo->numOfGroupResultPages == 0);
return 0;
} else if (numOfTables == 1) { // no need to merge results since only one table in each group
}
SCompSupporter cs = {pTableList, posList, pQInfo};
......@@ -2802,8 +2789,9 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
TSKEY ts = GET_INT64_VAL(b);
assert(ts == pWindowRes->window.skey);
......@@ -3517,9 +3505,11 @@ void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult
SQuery *pQuery = pRuntimeEnv->pQuery;
// Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
......@@ -3542,6 +3532,8 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *
SQuery *pQuery = pRuntimeEnv->pQuery;
// Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
......@@ -3550,7 +3542,7 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *
continue;
}
pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
pCtx->currentStage = 0;
int32_t functionId = pCtx->functionId;
......@@ -3713,11 +3705,13 @@ static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_
pQInfo->groupIndex += 1;
}
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);
for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
char *out = pQuery->sdata[j]->data + numOfResult * size;
char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
}
......@@ -4238,10 +4232,10 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo
int32_t ps = DEFAULT_PAGE_SIZE;
int32_t rowsize = 0;
getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
int32_t TWOMB = 1024*1024*2;
if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
int32_t numOfPages = getInitialPageNum(pQInfo);
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, numOfPages, rowsize, ps, numOfPages, pQInfo);
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
......@@ -4269,8 +4263,7 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo
} else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
int32_t numOfResultRows = getInitialPageNum(pQInfo);
getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, numOfResultRows, rowsize, ps, numOfResultRows, pQInfo);
code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
......@@ -5894,16 +5887,11 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList,
}
pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
pQInfo->dataReady = QUERY_RESULT_NOT_READY;
pthread_mutex_init(&pQInfo->lock, NULL);
pQuery->pos = -1;
pQuery->window = pQueryMsg->window;
if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
int32_t code = TAOS_SYSTEM_ERROR(errno);
qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
goto _cleanup;
}
colIdCheck(pQuery);
qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
......@@ -5945,7 +5933,7 @@ static bool isValidQInfo(void *param) {
return (sig == (uint64_t)pQInfo);
}
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param) {
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
int32_t code = TSDB_CODE_SUCCESS;
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
......@@ -5965,18 +5953,12 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ
pQuery->window.ekey, pQuery->order.order);
setQueryStatus(pQuery, QUERY_COMPLETED);
pQInfo->tableqinfoGroupInfo.numOfTables = 0;
sem_post(&pQInfo->dataReady);
return TSDB_CODE_SUCCESS;
}
pQInfo->param = param;
if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
setQueryStatus(pQuery, QUERY_COMPLETED);
sem_post(&pQInfo->dataReady);
return TSDB_CODE_SUCCESS;
}
......@@ -6018,7 +6000,6 @@ static void freeQInfo(SQInfo *pQInfo) {
tfree(pQuery->sdata[col]);
}
sem_destroy(&(pQInfo->dataReady));
teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
......@@ -6170,7 +6151,7 @@ typedef struct SQueryMgmt {
pthread_mutex_t lock;
} SQueryMgmt;
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, qinfo_t* pQInfo) {
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
assert(pQueryMsg != NULL && tsdb != NULL);
int32_t code = TSDB_CODE_SUCCESS;
......@@ -6266,7 +6247,7 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, vo
goto _over;
}
code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param);
code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
_over:
free(tagCond);
......@@ -6306,24 +6287,32 @@ void qDestroyQueryInfo(qinfo_t qHandle) {
freeQInfo(pQInfo);
}
void qTableQuery(qinfo_t qinfo) {
static void setQueryResultReady(SQInfo* pQInfo) {
pthread_mutex_lock(&pQInfo->lock);
pQInfo->dataReady = QUERY_RESULT_READY;
pthread_mutex_unlock(&pQInfo->lock);
}
bool qTableQuery(qinfo_t qinfo) {
SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || pQInfo->signature != pQInfo) {
qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
return;
return false;
}
if (IS_QUERY_KILLED(pQInfo)) {
qDebug("QInfo:%p it is already killed, abort", pQInfo);
sem_post(&pQInfo->dataReady);
return;
setQueryResultReady(pQInfo);
return false;
}
if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
setQueryResultReady(pQInfo);
qDebug("QInfo:%p no table exists for query, abort", pQInfo);
sem_post(&pQInfo->dataReady);
return;
return false;
}
// error occurs, record the error code and return to client
......@@ -6331,8 +6320,9 @@ void qTableQuery(qinfo_t qinfo) {
if (ret != TSDB_CODE_SUCCESS) {
pQInfo->code = ret;
qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
sem_post(&pQInfo->dataReady);
return;
setQueryResultReady(pQInfo);
return false;
}
qDebug("QInfo:%p query task is launched", pQInfo);
......@@ -6357,10 +6347,20 @@ void qTableQuery(qinfo_t qinfo) {
pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
}
sem_post(&pQInfo->dataReady);
bool buildRes = false;
pthread_mutex_lock(&pQInfo->lock);
pQInfo->dataReady = QUERY_RESULT_READY;
if (pQInfo->rspContext != NULL) {
buildRes = true;
}
pthread_mutex_unlock(&pQInfo->lock);
return buildRes;
}
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
......@@ -6373,11 +6373,21 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
return pQInfo->code;
}
sem_wait(&pQInfo->dataReady);
qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
pQInfo->code);
int32_t code = TSDB_CODE_SUCCESS;
pthread_mutex_lock(&pQInfo->lock);
if (pQInfo->dataReady == QUERY_RESULT_READY) {
*buildRes = true;
qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
pQInfo->code);
} else {
*buildRes = false;
qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
pQInfo->rspContext = pRspContext;
}
return pQInfo->code;
code = pQInfo->code;
pthread_mutex_unlock(&pQInfo->lock);
return code;
}
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
......@@ -6389,6 +6399,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
}
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
bool ret = false;
if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
ret = false;
......@@ -6407,7 +6418,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
return ret;
}
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
SQInfo *pQInfo = (SQInfo *)qinfo;
if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
......@@ -6417,8 +6428,10 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co
SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
size_t size = getResultSize(pQInfo, &pQuery->rec.rows);
size += sizeof(int32_t);
size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
*contLen = size + sizeof(SRetrieveTableRsp);
// todo proper handle failed to allocate memory,
......@@ -6427,6 +6440,7 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co
if (*pRsp == NULL) {
return TSDB_CODE_QRY_OUT_OF_MEMORY;
}
(*pRsp)->numOfRows = htonl(pQuery->rec.rows);
int32_t code = pQInfo->code;
......@@ -6434,8 +6448,8 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co
(*pRsp)->offset = htobe64(pQuery->limit.offset);
(*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
} else {
(*pRsp)->offset = 0;
(*pRsp)->useconds = 0;
(*pRsp)->offset = 0;
}
(*pRsp)->precision = htons(pQuery->precision);
......@@ -6446,10 +6460,20 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co
code = pQInfo->code;
}
pQInfo->rspContext = NULL;
pQInfo->dataReady = QUERY_RESULT_NOT_READY;
if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
(*pRsp)->completed = 1; // notify no more result to client
}
if (qHasMoreResultsToRetrieve(pQInfo)) {
*continueExec = true;
} else { // failed to dump result, free qhandle immediately
*continueExec = false;
qKillQuery(pQInfo);
}
return code;
}
......@@ -6460,7 +6484,7 @@ int32_t qKillQuery(qinfo_t qinfo) {
return TSDB_CODE_QRY_INVALID_QHANDLE;
}
sem_post(&pQInfo->dataReady);
// sem_post(&pQInfo->dataReady);
setQueryKilled(pQInfo);
return TSDB_CODE_SUCCESS;
}
......@@ -6607,6 +6631,13 @@ static void buildTagQueryResult(SQInfo* pQInfo) {
setQueryStatus(pQuery, QUERY_COMPLETED);
}
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
SQInfo* pQInfo = (SQInfo*) qinfo;
assert(pQInfo != NULL);
return pQInfo->rspContext;
}
void freeqinfoFn(void *qhandle) {
void** handle = qhandle;
if (handle == NULL || *handle == NULL) {
......@@ -6618,19 +6649,21 @@ void freeqinfoFn(void *qhandle) {
}
void* qOpenQueryMgmt(int32_t vgId) {
const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool
const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool
char cacheName[128] = {0};
sprintf(cacheName, "qhandle_%d", vgId);
SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt));
SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
pQueryMgmt->closed = false;
pQueryMgmt->vgId = vgId;
pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
pQueryHandle->closed = false;
pthread_mutex_init(&pQueryHandle->lock, NULL);
pthread_mutex_init(&pQueryMgmt->lock, NULL);
qDebug("vgId:%d, open querymgmt success", vgId);
return pQueryHandle;
return pQueryMgmt;
}
static void queryMgmtKillQueryFn(void* handle) {
......@@ -6670,7 +6703,7 @@ void qCleanupQueryMgmt(void* pQMgmt) {
pthread_mutex_destroy(&pQueryMgmt->lock);
tfree(pQueryMgmt);
qDebug("vgId:%d querymgmt cleanup completed", vgId);
qDebug("vgId:%d queryMgmt cleanup completed", vgId);
}
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
......@@ -6727,3 +6760,4 @@ void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
return 0;
}
......@@ -28,10 +28,10 @@
/*
* SColumnModel is deeply copy
*/
tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel) {
tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, int32_t pagesize, SColumnModel *pModel) {
tExtMemBuffer* pMemBuffer = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer));
pMemBuffer->pageSize = DEFAULT_PAGE_SIZE;
pMemBuffer->pageSize = pagesize;
pMemBuffer->inMemCapacity = ALIGN8(inMemSize) / pMemBuffer->pageSize;
pMemBuffer->nElemSize = elemSize;
......
......@@ -14,7 +14,7 @@
*/
#include "os.h"
#include "qsqlparser.h"
#include "qSqlparser.h"
#include "queryLog.h"
#include "taosdef.h"
#include "taosmsg.h"
......
......@@ -535,7 +535,7 @@ void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) {
if (pSeg->pBuffer[slotIdx] == NULL) {
pSeg->pBuffer[slotIdx] = createExtMemBuffer(pBucket->numOfTotalPages * pBucket->pageSize, pBucket->nElemSize,
pBucket->pOrderDesc->pColumnModel);
pBucket->pageSize, pBucket->pOrderDesc->pColumnModel);
pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL;
pBucket->pOrderDesc->pColumnModel->capacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage;
}
......
#include "qResultbuf.h"
#include "stddef.h"
#include "tscompression.h"
#include "hash.h"
#include "qExtbuffer.h"
#include "queryLog.h"
#include "taoserror.h"
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize,
int32_t pagesize, int32_t inMemPages, void* handle) {
#define GET_DATA_PAYLOAD(_p) ((_p)->pData + POINTER_BYTES)
int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t rowSize, int32_t pagesize,
int32_t inMemBufSize, const void* handle) {
*pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf));
SDiskbasedResultBuf* pResBuf = *pResultBuf;
if (pResBuf == NULL) {
return TSDB_CODE_COM_OUT_OF_MEMORY;
}
pResBuf->pageSize = pagesize;
pResBuf->numOfPages = inMemPages; // all pages are in buffer in the first place
pResBuf->inMemPages = inMemPages;
assert(inMemPages <= numOfPages);
pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize;
pResBuf->pageSize = pagesize;
pResBuf->numOfPages = 0; // all pages are in buffer in the first place
pResBuf->totalBufSize = 0;
pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit.
pResBuf->allocateId = -1;
pResBuf->comp = true;
pResBuf->file = NULL;
pResBuf->handle = handle;
pResBuf->fileSize = 0;
pResBuf->totalBufSize = pResBuf->numOfPages * pagesize;
pResBuf->incStep = 4;
pResBuf->allocateId = -1;
// at least more than 2 pages must be in memory
assert(inMemBufSize >= pagesize * 2);
pResBuf->iBuf = calloc(pResBuf->inMemPages, pResBuf->pageSize);
pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize;
pResBuf->lruList = tdListNew(POINTER_BYTES);
// init id hash table
pResBuf->idsTable = taosHashInit(numOfPages, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
pResBuf->list = taosArrayInit(numOfPages, POINTER_BYTES);
pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES
pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
char path[PATH_MAX] = {0};
getTmpfilePath("tsdb_qbuf", path);
getTmpfilePath("qbuf", path);
pResBuf->path = strdup(path);
pResBuf->fd = FD_INITIALIZER;
pResBuf->pBuf = NULL;
pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t));
qDebug("QInfo:%p create resBuf for output, page size:%d, initial pages:%d, %" PRId64 "bytes", handle,
pResBuf->pageSize, pResBuf->numOfPages, pResBuf->totalBufSize);
qDebug("QInfo:%p create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", handle, pResBuf->pageSize,
pResBuf->inMemPages, pResBuf->path);
return TSDB_CODE_SUCCESS;
}
int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->idsTable); }
int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; }
#define NUM_OF_PAGES_ON_DISK(_r) ((_r)->numOfPages - (_r)->inMemPages)
#define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize)
static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) {
pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR | O_TRUNC, 0666);
if (!FD_VALID(pResultBuf->fd)) {
static int32_t createDiskFile(SDiskbasedResultBuf* pResultBuf) {
pResultBuf->file = fopen(pResultBuf->path, "wb+");
if (pResultBuf->file == NULL) {
qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno));
return TAOS_SYSTEM_ERROR(errno);
}
assert(pResultBuf->numOfPages == pResultBuf->inMemPages);
pResultBuf->numOfPages += pResultBuf->incStep;
return TSDB_CODE_SUCCESS;
}
int32_t ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize);
if (ret != TSDB_CODE_SUCCESS) {
qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno));
return TAOS_SYSTEM_ERROR(errno);
static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing
if (!pResultBuf->comp) {
*dst = srcSize;
return data;
}
pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0);
if (pResultBuf->pBuf == MAP_FAILED) {
qError("QInfo:%p failed to map temp file: %s. %s", pResultBuf->handle, pResultBuf->path, strerror(errno));
return TAOS_SYSTEM_ERROR(errno);
*dst = tsCompressString(data, srcSize, 1, pResultBuf->assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0);
memcpy(data, pResultBuf->assistBuf, *dst);
return data;
}
static char* doDecompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing
if (!pResultBuf->comp) {
*dst = srcSize;
return data;
}
pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize;
return TSDB_CODE_SUCCESS;
*dst = tsDecompressString(data, srcSize, 1, pResultBuf->assistBuf, pResultBuf->pageSize, ONE_STAGE_COMP, NULL, 0);
memcpy(data, pResultBuf->assistBuf, *dst);
return data;
}
static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNumOfPages) {
assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize);
int32_t ret = TSDB_CODE_SUCCESS;
static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t size) {
if (pResultBuf->pFree == NULL) {
return pResultBuf->nextPos;
} else {
int32_t offset = -1;
size_t num = taosArrayGetSize(pResultBuf->pFree);
for(int32_t i = 0; i < num; ++i) {
SFreeListItem* pi = taosArrayGet(pResultBuf->pFree, i);
if (pi->len >= size) {
offset = pi->offset;
pi->offset += size;
pi->len -= size;
return offset;
}
}
// no available recycle space, allocate new area in file
return pResultBuf->nextPos;
}
}
static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
assert(!pg->used && pg->pData != NULL);
int32_t size = -1;
char* t = doCompressData(GET_DATA_PAYLOAD(pg), pResultBuf->pageSize, &size, pResultBuf);
// this page is flushed to disk for the first time
if (pg->info.offset == -1) {
pg->info.offset = allocatePositionInFile(pResultBuf, size);
pResultBuf->nextPos += size;
if (pResultBuf->pBuf == NULL) {
assert(pResultBuf->fd == FD_INITIALIZER);
fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
/*int32_t ret =*/ fwrite(t, 1, size, pResultBuf->file);
if ((ret = createDiskResidesBuf(pResultBuf)) != TSDB_CODE_SUCCESS) {
return ret;
if (pResultBuf->fileSize < pg->info.offset + pg->info.length) {
pResultBuf->fileSize = pg->info.offset + pg->info.length;
}
} else {
ret = munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf));
pResultBuf->numOfPages += incNumOfPages;
/*
* disk-based output buffer is exhausted, try to extend the disk-based buffer, the available disk space may
* be insufficient
*/
ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize);
if (ret != TSDB_CODE_SUCCESS) {
// dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile,
// strerror(errno));
return TSDB_CODE_QRY_NO_DISKSPACE;
// length becomes greater, current space is not enough, allocate new place, otherwise, do nothing
if (pg->info.length < size) {
// 1. add current space to free list
taosArrayPush(pResultBuf->pFree, &pg->info);
// 2. allocate new position, and update the info
pg->info.offset = allocatePositionInFile(pResultBuf, size);
pResultBuf->nextPos += size;
}
pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize;
pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0);
//3. write to disk.
fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
fwrite(t, size, 1, pResultBuf->file);
if (pResultBuf->pBuf == MAP_FAILED) {
// dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno));
return TSDB_CODE_QRY_OUT_OF_MEMORY;
if (pResultBuf->fileSize < pg->info.offset + pg->info.length) {
pResultBuf->fileSize = pg->info.offset + pg->info.length;
}
}
return TSDB_CODE_SUCCESS;
char* ret = pg->pData;
memset(ret, 0, pResultBuf->pageSize);
pg->pData = NULL;
pg->info.length = size;
pResultBuf->statis.flushBytes += pg->info.length;
return ret;
}
#define NO_AVAILABLE_PAGES(_b) ((_b)->allocateId == (_b)->numOfPages - 1)
static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
int32_t ret = TSDB_CODE_SUCCESS;
assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages);
static FORCE_INLINE int32_t getGroupIndex(SDiskbasedResultBuf* pResultBuf, int32_t groupId) {
assert(pResultBuf != NULL);
if (pResultBuf->file == NULL) {
if ((ret = createDiskFile(pResultBuf)) != TSDB_CODE_SUCCESS) {
terrno = ret;
return NULL;
}
}
return doFlushPageToDisk(pResultBuf, pg);
}
// load file block data in disk
static char* loadPageFromDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) {
int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET);
ret = fread(GET_DATA_PAYLOAD(pg), 1, pg->info.length, pResultBuf->file);
if (ret != pg->info.length) {
terrno = errno;
return NULL;
}
pResultBuf->statis.loadBytes += pg->info.length;
char* p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t));
int32_t fullSize = 0;
doDecompressData(GET_DATA_PAYLOAD(pg), pg->info.length, &fullSize, pResultBuf);
return GET_DATA_PAYLOAD(pg);
}
#define NO_AVAILABLE_PAGES(_b) ((_b)->numOfPages >= (_b)->inMemPages)
static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) {
assert(taosHashGet(pResultBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL);
SArray* pa = taosArrayInit(1, POINTER_BYTES);
int32_t ret = taosHashPut(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES);
assert(ret == 0);
return pa;
}
static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) {
SIDList list = NULL;
char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t));
if (p == NULL) { // it is a new group id
return -1;
list = addNewGroup(pResultBuf, groupId);
} else {
list = (SIDList) (*p);
}
int32_t slot = GET_INT32_VAL(p);
assert(slot >= 0 && slot < taosHashGetSize(pResultBuf->idsTable));
pResultBuf->numOfPages += 1;
return slot;
SPageInfo* ppi = malloc(sizeof(SPageInfo));//{ .info = PAGE_INFO_INITIALIZER, .pageId = pageId, .pn = NULL};
ppi->info = PAGE_INFO_INITIALIZER;
ppi->pageId = pageId;
ppi->pData = NULL;
ppi->pn = NULL;
ppi->used = true;
return *(SPageInfo**) taosArrayPush(list, &ppi);
}
static int32_t addNewGroupId(SDiskbasedResultBuf* pResultBuf, int32_t groupId) {
int32_t num = getNumOfResultBufGroupId(pResultBuf); // the num is the newest allocated group id slot
taosHashPut(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &num, sizeof(int32_t));
static SListNode* getEldestUnrefedPage(SDiskbasedResultBuf* pResultBuf) {
SListIter iter = {0};
tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD);
SListNode* pn = NULL;
while((pn = tdListNext(&iter)) != NULL) {
assert(pn != NULL);
SPageInfo* pageInfo = *(SPageInfo**) pn->data;
assert(pageInfo->pageId >= 0 && pageInfo->pn == pn);
SArray* pa = taosArrayInit(1, sizeof(int32_t));
taosArrayPush(pResultBuf->list, &pa);
if (!pageInfo->used) {
break;
}
}
assert(taosArrayGetSize(pResultBuf->list) == taosHashGetSize(pResultBuf->idsTable));
return num;
return pn;
}
static void registerPageId(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) {
int32_t slot = getGroupIndex(pResultBuf, groupId);
if (slot < 0) {
slot = addNewGroupId(pResultBuf, groupId);
static char* evicOneDataPage(SDiskbasedResultBuf* pResultBuf) {
char* bufPage = NULL;
SListNode* pn = getEldestUnrefedPage(pResultBuf);
// all pages are referenced by user, try to allocate new space
if (pn == NULL) {
int32_t prev = pResultBuf->inMemPages;
pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5;
qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev,
pResultBuf->inMemPages, pResultBuf->pageSize);
} else {
pResultBuf->statis.flushPages += 1;
tdListPopNode(pResultBuf->lruList, pn);
SPageInfo* d = *(SPageInfo**) pn->data;
assert(d->pn == pn);
d->pn = NULL;
tfree(pn);
bufPage = flushPageToDisk(pResultBuf, d);
}
SIDList pList = taosArrayGetP(pResultBuf->list, slot);
taosArrayPush(pList, &pageId);
return bufPage;
}
static void lruListPushFront(SList *pList, SPageInfo* pi) {
tdListPrepend(pList, &pi);
SListNode* front = tdListGetHead(pList);
pi->pn = front;
}
static void lruListMoveToFront(SList *pList, SPageInfo* pi) {
tdListPopNode(pList, pi->pn);
tdListPrependNode(pList, pi->pn);
}
tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) {
pResultBuf->statis.getPages += 1;
char* availablePage = NULL;
if (NO_AVAILABLE_PAGES(pResultBuf)) {
if (extendDiskFileSize(pResultBuf, pResultBuf->incStep) != TSDB_CODE_SUCCESS) {
return NULL;
}
availablePage = evicOneDataPage(pResultBuf);
}
// register new id in this group
*pageId = (++pResultBuf->allocateId);
registerPageId(pResultBuf, groupId, *pageId);
// clear memory for the new page
tFilePage* page = getResBufPage(pResultBuf, *pageId);
memset(page, 0, pResultBuf->pageSize);
return page;
// register page id info
SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId);
// add to LRU list
assert(listNEles(pResultBuf->lruList) < pResultBuf->inMemPages && pResultBuf->inMemPages > 0);
lruListPushFront(pResultBuf->lruList, pi);
// add to hash map
taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES);
// allocate buf
if (availablePage == NULL) {
pi->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES);
} else {
pi->pData = availablePage;
}
pResultBuf->totalBufSize += pResultBuf->pageSize;
((void**)pi->pData)[0] = pi;
pi->used = true;
return GET_DATA_PAYLOAD(pi);
}
tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) {
assert(pResultBuf != NULL && id >= 0);
pResultBuf->statis.getPages += 1;
SPageInfo** pi = taosHashGet(pResultBuf->all, &id, sizeof(int32_t));
assert(pi != NULL && *pi != NULL);
if ((*pi)->pData != NULL) { // it is in memory
// no need to update the LRU list if only one page exists
if (pResultBuf->numOfPages == 1) {
(*pi)->used = true;
return GET_DATA_PAYLOAD(*pi);
}
SPageInfo** pInfo = (SPageInfo**) ((*pi)->pn->data);
assert(*pInfo == *pi);
lruListMoveToFront(pResultBuf->lruList, (*pi));
(*pi)->used = true;
return GET_DATA_PAYLOAD(*pi);
} else { // not in memory
assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0);
char* availablePage = NULL;
if (NO_AVAILABLE_PAGES(pResultBuf)) {
availablePage = evicOneDataPage(pResultBuf);
}
if (availablePage == NULL) {
(*pi)->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES);
} else {
(*pi)->pData = availablePage;
}
((void**)((*pi)->pData))[0] = (*pi);
lruListPushFront(pResultBuf->lruList, *pi);
loadPageFromDisk(pResultBuf, *pi);
return GET_DATA_PAYLOAD(*pi);
}
}
int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; }
void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) {
assert(pResultBuf != NULL && page != NULL);
char* p = (char*) page - POINTER_BYTES;
SPageInfo* ppi = ((SPageInfo**) p)[0];
releaseResBufPageInfo(pResultBuf, ppi);
}
void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi) {
assert(pi->pData != NULL && pi->used);
pi->used = false;
pResultBuf->statis.releasePages += 1;
}
size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; }
size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->groupSet); }
size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; }
SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) {
int32_t slot = getGroupIndex(pResultBuf, groupId);
if (slot < 0) {
assert(pResultBuf != NULL);
char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t));
if (p == NULL) { // it is a new group id
return pResultBuf->emptyDummyIdList;
} else {
return taosArrayGetP(pResultBuf->list, slot);
return (SArray*) (*p);
}
}
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) {
void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) {
if (pResultBuf == NULL) {
return;
}
if (FD_VALID(pResultBuf->fd)) {
qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file created:%s, file size:%d", handle,
pResultBuf->totalBufSize, pResultBuf->path, FILE_SIZE_ON_DISK(pResultBuf));
if (pResultBuf->file != NULL) {
qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file size:%"PRId64" bytes",
pResultBuf->handle, pResultBuf->totalBufSize, pResultBuf->fileSize);
close(pResultBuf->fd);
munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf));
pResultBuf->pBuf = NULL;
fclose(pResultBuf->file);
} else {
qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", handle,
qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", pResultBuf->handle,
pResultBuf->totalBufSize);
}
unlink(pResultBuf->path);
tfree(pResultBuf->path);
size_t size = taosArrayGetSize(pResultBuf->list);
for (int32_t i = 0; i < size; ++i) {
SArray* pa = taosArrayGetP(pResultBuf->list, i);
taosArrayDestroy(pa);
SHashMutableIterator* iter = taosHashCreateIter(pResultBuf->groupSet);
while(taosHashIterNext(iter)) {
SArray** p = (SArray**) taosHashIterGet(iter);
size_t n = taosArrayGetSize(*p);
for(int32_t i = 0; i < n; ++i) {
SPageInfo* pi = taosArrayGetP(*p, i);
tfree(pi->pData);
tfree(pi);
}
taosArrayDestroy(*p);
}
taosArrayDestroy(pResultBuf->list);
taosHashDestroyIter(iter);
tdListFree(pResultBuf->lruList);
taosArrayDestroy(pResultBuf->emptyDummyIdList);
taosHashCleanup(pResultBuf->idsTable);
taosHashCleanup(pResultBuf->groupSet);
taosHashCleanup(pResultBuf->all);
tfree(pResultBuf->iBuf);
tfree(pResultBuf->assistBuf);
tfree(pResultBuf);
}
int32_t getLastPageId(SIDList pList) {
SPageInfo* getLastPageInfo(SIDList pList) {
size_t size = taosArrayGetSize(pList);
return *(int32_t*) taosArrayGet(pList, size - 1);
return (SPageInfo*) taosArrayGetP(pList, size - 1);
}
......@@ -236,11 +236,13 @@ void clearTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindow
if (pWindowRes == NULL) {
return;
}
tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutput; ++i) {
SResultInfo *pResultInfo = &pWindowRes->resultInfo[i];
char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
size_t size = pRuntimeEnv->pQuery->pSelectExpr[i].bytes;
memset(s, 0, size);
......@@ -277,8 +279,11 @@ void copyTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *dst, con
memcpy(pDst->interResultBuf, pSrc->interResultBuf, pDst->bufLen);
// copy the output buffer data from src to dst, the position info keep unchanged
char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst);
char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src);
tFilePage *dstpage = getResBufPage(pRuntimeEnv->pResultBuf, dst->pos.pageId);
char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst, dstpage);
tFilePage *srcpage = getResBufPage(pRuntimeEnv->pResultBuf, src->pos.pageId);
char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src, srcpage);
size_t s = pRuntimeEnv->pQuery->pSelectExpr[i].bytes;
memcpy(dstBuf, srcBuf, s);
......
......@@ -30,7 +30,7 @@
#include <string.h>
#include <assert.h>
#include <stdbool.h>
#include "qsqlparser.h"
#include "qSqlparser.h"
#include "tcmdtype.h"
#include "tstoken.h"
#include "ttokendef.h"
......
......@@ -18,17 +18,144 @@ void simpleTest() {
tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId);
ASSERT_TRUE(pBufPage != NULL);
ASSERT_EQ(getNumOfRowsPerPage(pResultBuf), (16384L - sizeof(int64_t))/64);
ASSERT_EQ(getResBufSize(pResultBuf), 1000*16384L);
ASSERT_EQ(getResBufSize(pResultBuf), 1024);
SIDList list = getDataBufPagesIdList(pResultBuf, groupId);
ASSERT_EQ(taosArrayGetSize(list), 1);
ASSERT_EQ(getNumOfResultBufGroupId(pResultBuf), 1);
destroyResultBuf(pResultBuf, NULL);
releaseResBufPage(pResultBuf, pBufPage);
tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t == pBufPage1);
tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t1 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t1 == pBufPage2);
tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t2 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t2 == pBufPage3);
tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t3 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t3 == pBufPage4);
tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t4 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t4 == pBufPage5);
destroyResultBuf(pResultBuf);
}
void writeDownTest() {
SDiskbasedResultBuf* pResultBuf = NULL;
int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1000, 64, 1024, 4, NULL);
int32_t pageId = 0;
int32_t writePageId = 0;
int32_t groupId = 0;
int32_t nx = 12345;
tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId);
ASSERT_TRUE(pBufPage != NULL);
*(int32_t*)(pBufPage->data) = nx;
writePageId = pageId;
releaseResBufPage(pResultBuf, pBufPage);
tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t1 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t1 == pBufPage1);
ASSERT_TRUE(pageId == 1);
tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t2 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t2 == pBufPage2);
ASSERT_TRUE(pageId == 2);
tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t3 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t3 == pBufPage3);
ASSERT_TRUE(pageId == 3);
tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t4 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t4 == pBufPage4);
ASSERT_TRUE(pageId == 4);
releaseResBufPage(pResultBuf, t4);
// flush the written page to disk, and read it out again
tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId);
ASSERT_EQ(*(int32_t*)pBufPagex->data, nx);
SArray* pa = getDataBufPagesIdList(pResultBuf, groupId);
ASSERT_EQ(taosArrayGetSize(pa), 5);
destroyResultBuf(pResultBuf);
}
void recyclePageTest() {
SDiskbasedResultBuf* pResultBuf = NULL;
int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1000, 64, 1024, 4, NULL);
int32_t pageId = 0;
int32_t writePageId = 0;
int32_t groupId = 0;
int32_t nx = 12345;
tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId);
ASSERT_TRUE(pBufPage != NULL);
releaseResBufPage(pResultBuf, pBufPage);
tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t1 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t1 == pBufPage1);
ASSERT_TRUE(pageId == 1);
tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t2 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t2 == pBufPage2);
ASSERT_TRUE(pageId == 2);
tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t3 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t3 == pBufPage3);
ASSERT_TRUE(pageId == 3);
tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t4 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t4 == pBufPage4);
ASSERT_TRUE(pageId == 4);
releaseResBufPage(pResultBuf, t4);
releaseResBufPage(pResultBuf, t4);
tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId);
tFilePage* t5 = getResBufPage(pResultBuf, pageId);
ASSERT_TRUE(t5 == pBufPage5);
ASSERT_TRUE(pageId == 5);
// flush the written page to disk, and read it out again
tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId);
*(int32_t*)(pBufPagex->data) = nx;
writePageId = pageId; // update the data
releaseResBufPage(pResultBuf, pBufPagex);
tFilePage* pBufPagex1 = getResBufPage(pResultBuf, 1);
SArray* pa = getDataBufPagesIdList(pResultBuf, groupId);
ASSERT_EQ(taosArrayGetSize(pa), 6);
destroyResultBuf(pResultBuf);
}
} // namespace
TEST(testCase, resultBufferTest) {
srand(time(NULL));
simpleTest();
writeDownTest();
recyclePageTest();
}
......@@ -210,7 +210,7 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab
if (pQueryHandle->pColumns == NULL) {
goto out_of_memory;
}
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData colInfo = {{0}, 0};
......@@ -222,29 +222,29 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab
taosArrayPush(pQueryHandle->pColumns, &colInfo);
pQueryHandle->statis[i].colId = colInfo.info.colId;
}
pQueryHandle->pTableCheckInfo = taosArrayInit(groupList->numOfTables, sizeof(STableCheckInfo));
if (pQueryHandle->pTableCheckInfo == NULL) {
goto out_of_memory;
}
STsdbMeta* pMeta = tsdbGetMeta(tsdb);
assert(pMeta != NULL);
for (int32_t i = 0; i < sizeOfGroup; ++i) {
SArray* group = *(SArray**) taosArrayGet(groupList->pGroupList, i);
size_t gsize = taosArrayGetSize(group);
assert(gsize > 0);
for (int32_t j = 0; j < gsize; ++j) {
STable* pTable = (STable*) taosArrayGetP(group, j);
STableCheckInfo info = {
.lastKey = pQueryHandle->window.skey,
.tableId = pTable->tableId,
.pTableObj = pTable,
};
assert(info.pTableObj != NULL && (info.pTableObj->type == TSDB_NORMAL_TABLE ||
info.pTableObj->type == TSDB_CHILD_TABLE || info.pTableObj->type == TSDB_STREAM_TABLE));
......@@ -280,17 +280,17 @@ TsdbQueryHandleT tsdbQueryLastRow(TSDB_REPO_T *tsdb, STsdbQueryCond *pCond, STab
SArray* tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle) {
assert(pHandle != NULL);
STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) pHandle;
size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
SArray* res = taosArrayInit(size, POINTER_BYTES);
for(int32_t i = 0; i < size; ++i) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
taosArrayPush(res, &pCheckInfo->pTableObj);
}
return res;
}
......@@ -306,11 +306,11 @@ TsdbQueryHandleT tsdbQueryRowsInExternalWindow(TSDB_REPO_T *tsdb, STsdbQueryCond
static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCheckInfo) {
STable* pTable = pCheckInfo->pTableObj;
assert(pTable != NULL);
if (pCheckInfo->initBuf) {
return true;
}
pCheckInfo->initBuf = true;
int32_t order = pHandle->order;
......@@ -318,7 +318,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh
if (pHandle->mem == NULL && pHandle->imem == NULL) {
return false;
}
assert(pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL);
// TODO: add uid check
......@@ -338,17 +338,17 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh
if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) {
return false;
}
bool memEmpty = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter));
bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter));
if (memEmpty && imemEmpty) { // buffer is empty
return false;
}
if (!memEmpty) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter);
assert(node != NULL);
SDataRow row = SL_GET_NODE_DATA(node);
TSKEY key = dataRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64", tid:%d check data in mem from skey:%" PRId64 ", order:%d, %p", pHandle,
......@@ -357,11 +357,11 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh
tsdbDebug("%p uid:%"PRId64", tid:%d no data in mem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid,
pHandle->qinfo);
}
if (!imemEmpty) {
SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter);
assert(node != NULL);
SDataRow row = SL_GET_NODE_DATA(node);
TSKEY key = dataRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64", tid:%d check data in imem from skey:%" PRId64 ", order:%d, %p", pHandle,
......@@ -370,7 +370,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh
tsdbDebug("%p uid:%"PRId64", tid:%d no data in imem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid,
pHandle->qinfo);
}
return true;
}
......@@ -473,7 +473,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) {
size_t size = taosArrayGetSize(pHandle->pTableCheckInfo);
assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1);
pHandle->cur.fid = -1;
STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex);
STable* pTable = pCheckInfo->pTableObj;
......@@ -491,17 +491,17 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) {
pCheckInfo->lastKey = dataRowKey(row); // first timestamp in buffer
tsdbDebug("%p uid:%" PRId64", tid:%d check data in buffer from skey:%" PRId64 ", order:%d, %p", pHandle,
pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pCheckInfo->lastKey, pHandle->order, pHandle->qinfo);
// all data in mem are checked already.
if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) ||
(pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) {
return false;
}
int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1;
STimeWindow* win = &pHandle->cur.win;
pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle);
// update the last key value
pCheckInfo->lastKey = win->ekey + step;
pHandle->cur.lastKey = win->ekey + step;
......@@ -510,7 +510,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) {
if (!ASCENDING_TRAVERSE(pHandle->order)) {
SWAP(win->skey, win->ekey, TSKEY);
}
return true;
}
......@@ -519,31 +519,31 @@ static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precisio
if (key == TSKEY_INITIAL_VAL) {
return INT32_MIN;
}
int64_t fid = (int64_t)(key / (daysPerFile * tsMsPerDay[precision])); // set the starting fileId
if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32
fid = INT32_MIN;
}
if (fid > 0L && fid > INT32_MAX) {
fid = INT32_MAX;
}
return fid;
}
static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) {
int32_t firstSlot = 0;
int32_t lastSlot = numOfBlocks - 1;
int32_t midSlot = firstSlot;
while (1) {
numOfBlocks = lastSlot - firstSlot + 1;
midSlot = (firstSlot + (numOfBlocks >> 1));
if (numOfBlocks == 1) break;
if (skey > pBlock[midSlot].keyLast) {
if (numOfBlocks == 2) break;
if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break;
......@@ -555,7 +555,7 @@ static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSK
break; // got the slot
}
}
return midSlot;
}
......@@ -644,7 +644,7 @@ static int32_t getFileCompInfo(STsdbQueryHandle* pQueryHandle, int32_t* numOfBlo
.uid = (_checkInfo)->tableId.uid})
static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock, STableCheckInfo* pCheckInfo) {
static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock, STableCheckInfo* pCheckInfo, int32_t slotIndex) {
STsdbRepo *pRepo = pQueryHandle->pTsdb;
bool blockLoaded = false;
int64_t st = taosGetTimestampUs();
......@@ -678,8 +678,9 @@ static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlo
int64_t elapsedTime = (taosGetTimestampUs() - st);
pQueryHandle->cost.blockLoadTime += elapsedTime;
tsdbDebug("%p load file block into buffer, elapsed time:%"PRId64 " us", pQueryHandle, elapsedTime);
tsdbDebug("%p load file block into buffer, index:%d, brange:%"PRId64"-%"PRId64" , rows:%d, elapsed time:%"PRId64 " us, %p",
pQueryHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime, pQueryHandle->qinfo);
return blockLoaded;
}
......@@ -692,18 +693,17 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock*
TSKEY key = (row != NULL)? dataRowKey(row):TSKEY_INITIAL_VAL;
cur->pos = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:(binfo.rows-1);
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) {
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) {
// do not load file block into buffer
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1;
cur->rows = tsdbReadRowsFromCache(pCheckInfo, binfo.window.skey - step,
pQueryHandle->outputCapacity, &cur->win, pQueryHandle);
cur->rows = tsdbReadRowsFromCache(pCheckInfo, binfo.window.skey - step, pQueryHandle->outputCapacity, &cur->win, pQueryHandle);
pQueryHandle->realNumOfRows = cur->rows;
// update the last key value
......@@ -711,13 +711,13 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock*
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
SWAP(cur->win.skey, cur->win.ekey, TSKEY);
}
cur->mixBlock = true;
cur->blockCompleted = false;
return;
}
doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo);
doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot);
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else {
/*
......@@ -742,39 +742,41 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock
SQueryFilePos* cur = &pQueryHandle->cur;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
// query ended in current block
// query ended in/started from current block
if (pQueryHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) {
if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) {
if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) {
return false;
}
SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0];
assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows);
if (pCheckInfo->lastKey > pBlock->keyFirst) {
cur->pos =
binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order);
} else {
cur->pos = 0;
}
assert(pCheckInfo->lastKey <= pBlock->keyLast);
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else { // the whole block is loaded in to buffer
handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo);
}
} else { //desc order, query ended in current block
if (pQueryHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) {
if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) {
if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) {
return false;
}
SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0];
if (pCheckInfo->lastKey < pBlock->keyLast) {
cur->pos = binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order);
} else {
cur->pos = pBlock->numOfRows - 1;
}
assert(pCheckInfo->lastKey >= pBlock->keyFirst);
doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock);
} else {
handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo);
......@@ -790,7 +792,7 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
TSKEY* keyList;
assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
if (num <= 0) return -1;
keyList = (TSKEY*)pValue;
......@@ -849,13 +851,19 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) {
static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) {
char* pData = NULL;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1;
SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0];
TSKEY* tsArray = pCols->cols[0].pData;
int32_t num = end - start + 1;
assert(num >= 0);
if (num == 0) {
return numOfRows;
}
int32_t requiredNumOfCols = taosArrayGetSize(pQueryHandle->pColumns);
//data in buffer has greater timestamp, copy data in file block
int32_t i = 0, j = 0;
while(i < requiredNumOfCols && j < pCols->numOfCols) {
......@@ -928,7 +936,7 @@ static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t cap
i++;
}
pQueryHandle->cur.win.ekey = tsArray[end];
pQueryHandle->cur.lastKey = tsArray[end] + step;
......@@ -995,24 +1003,99 @@ static void copyOneRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity,
}
}
static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols) {
if (numOfRows == 0 || ASCENDING_TRAVERSE(pQueryHandle->order)) {
return;
}
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (numOfRows < pQueryHandle->outputCapacity) {
int32_t emptySize = pQueryHandle->outputCapacity - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
}
static void getQualifiedRowsPos(STsdbQueryHandle* pQueryHandle, int32_t startPos, int32_t endPos,
int32_t numOfExisted, int32_t *start, int32_t *end) {
*start = -1;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
int32_t remain = endPos - startPos + 1;
if (remain + numOfExisted > pQueryHandle->outputCapacity) {
*end = (pQueryHandle->outputCapacity - numOfExisted) + startPos - 1;
} else {
*end = endPos;
}
*start = startPos;
} else {
int32_t remain = (startPos - endPos) + 1;
if (remain + numOfExisted > pQueryHandle->outputCapacity) {
*end = startPos + 1 - (pQueryHandle->outputCapacity - numOfExisted);
} else {
*end = endPos;
}
*start = *end;
*end = startPos;
}
}
static void updateInfoAfterMerge(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, int32_t numOfRows, int32_t endPos) {
SQueryFilePos* cur = &pQueryHandle->cur;
pCheckInfo->lastKey = cur->lastKey;
pQueryHandle->realNumOfRows = numOfRows;
cur->rows = numOfRows;
cur->pos = endPos;
}
static void doCheckGeneratedBlockRange(STsdbQueryHandle* pQueryHandle) {
SQueryFilePos* cur = &pQueryHandle->cur;
if (cur->rows > 0) {
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
assert(cur->win.skey >= pQueryHandle->window.skey && cur->win.ekey <= pQueryHandle->window.ekey);
} else {
assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey);
}
SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, 0);
assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]);
} else {
cur->win = pQueryHandle->window;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1;
cur->lastKey = pQueryHandle->window.ekey + step;
}
}
// only return the qualified data to client in terms of query time window, data rows in the same block but do not
// be included in the query time window will be discarded
static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SCompBlock* pBlock) {
SQueryFilePos* cur = &pQueryHandle->cur;
SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock);
initTableMemIterator(pQueryHandle, pCheckInfo);
SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0];
assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
cur->pos >= 0 && cur->pos < pBlock->numOfRows);
TSKEY* tsArray = pCols->cols[0].pData;
// for search the endPos, so the order needs to reverse
int32_t order = (pQueryHandle->order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC;
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1;
int32_t numOfCols = taosArrayGetSize(pQueryHandle->pColumns);
int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle);
STable* pTable = pCheckInfo->pTableObj;
int32_t endPos = cur->pos;
if (ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey > blockInfo.window.ekey) {
endPos = blockInfo.rows - 1;
cur->mixBlock = (cur->pos != 0);
......@@ -1024,48 +1107,36 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo*
endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pQueryHandle->window.ekey, order);
cur->mixBlock = true;
}
// compared with the data from in-memory buffer, to generate the correct timestamp array list
int32_t pos = cur->pos;
assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == 0);
TSKEY* tsArray = pCols->cols[0].pData;
int32_t numOfRows = 0;
pQueryHandle->cur.win = TSWINDOW_INITIALIZER;
int32_t pos = cur->pos;
cur->win = TSWINDOW_INITIALIZER;
// no data in buffer, load data from file directly
if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) {
int32_t start = cur->pos;
int32_t end = endPos;
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
end = cur->pos;
start = endPos;
}
cur->win.skey = tsArray[start];
cur->win.ekey = tsArray[end];
// todo opt in case of no data in buffer
SWAP(start, end, int32_t);
}
numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < pQueryHandle->outputCapacity) {
int32_t emptySize = pQueryHandle->outputCapacity - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
// the time window should always be right order: skey <= ekey
cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]};
cur->lastKey = tsArray[endPos];
pos += (end - start + 1) * step;
cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order)));
pCheckInfo->lastKey = cur->lastKey;
pQueryHandle->realNumOfRows = numOfRows;
cur->rows = numOfRows;
cur->blockCompleted =
(((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order)));
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
moveDataToFront(pQueryHandle, numOfRows, numOfCols);
updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos);
doCheckGeneratedBlockRange(pQueryHandle);
return;
} else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) {
SSkipListNode* node = NULL;
......@@ -1111,30 +1182,18 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo*
if (tsArray[end] == key) { // the value of key in cache equals to the end timestamp value, ignore it
moveToNextRowInMem(pCheckInfo);
}
int32_t start = -1;
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
int32_t remain = end - pos + 1;
if (remain + numOfRows > pQueryHandle->outputCapacity) {
end = (pQueryHandle->outputCapacity - numOfRows) + pos - 1;
}
start = pos;
} else {
int32_t remain = (pos - end) + 1;
if (remain + numOfRows > pQueryHandle->outputCapacity) {
end = pos + 1 - (pQueryHandle->outputCapacity - numOfRows);
}
int32_t qstart = 0, qend = 0;
getQualifiedRowsPos(pQueryHandle, pos, end, numOfRows, &qstart, &qend);
start = end;
end = pos;
}
numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, qstart, qend);
pos += (qend - qstart + 1) * step;
numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end);
pos += (end - start + 1) * step;
cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? tsArray[qend]:tsArray[qstart];
cur->lastKey = cur->win.ekey + step;
}
} while (numOfRows < pQueryHandle->outputCapacity);
if (numOfRows < pQueryHandle->outputCapacity) {
/**
* if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT
......@@ -1148,54 +1207,29 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo*
cur->win.skey = tsArray[pos];
}
int32_t start = -1;
int32_t end = -1;
// all remain data are qualified, but check the remain capacity in the first place.
if (ASCENDING_TRAVERSE(pQueryHandle->order)) {
int32_t remain = endPos - pos + 1;
if (remain + numOfRows > pQueryHandle->outputCapacity) {
endPos = (pQueryHandle->outputCapacity - numOfRows) + pos - 1;
}
start = pos;
end = endPos;
} else {
int32_t remain = pos + 1;
if (remain + numOfRows > pQueryHandle->outputCapacity) {
endPos = pos + 1 - (pQueryHandle->outputCapacity - numOfRows);
}
start = endPos;
end = pos;
}
int32_t start = -1, end = -1;
getQualifiedRowsPos(pQueryHandle, pos, endPos, numOfRows, &start, &end);
numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end);
pos += (end - start + 1) * step;
cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? tsArray[end]:tsArray[start];
cur->lastKey = cur->win.ekey + step;
}
}
}
cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order)));
cur->blockCompleted =
(((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order)));
if (!ASCENDING_TRAVERSE(pQueryHandle->order)) {
SWAP(cur->win.skey, cur->win.ekey, TSKEY);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (numOfRows < pQueryHandle->outputCapacity) {
int32_t emptySize = pQueryHandle->outputCapacity - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
}
pCheckInfo->lastKey = cur->lastKey;
pQueryHandle->realNumOfRows = numOfRows;
cur->rows = numOfRows;
cur->pos = pos;
moveDataToFront(pQueryHandle, numOfRows, numOfCols);
updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos);
doCheckGeneratedBlockRange(pQueryHandle);
tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, brange:%"PRIu64"-%"PRIu64" rows:%d, %p", pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->win.skey,
cur->win.ekey, cur->rows, pQueryHandle->qinfo);
......@@ -1332,16 +1366,16 @@ static int32_t createDataBlocksInfo(STsdbQueryHandle* pQueryHandle, int32_t numO
cleanBlockOrderSupporter(&sup, 0);
return TSDB_CODE_TDB_OUT_OF_MEMORY;
}
int32_t cnt = 0;
int32_t numOfQualTables = 0;
for (int32_t j = 0; j < numOfTables; ++j) {
STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pQueryHandle->pTableCheckInfo, j);
if (pTableCheck->numOfBlocks <= 0) {
continue;
}
SCompBlock* pBlock = pTableCheck->pCompInfo->blocks;
sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks;
......@@ -1437,37 +1471,39 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex
// current file are not overlapped with query time window, ignore remain files
if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) ||
(!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) {
tsdbDebug("%p remain files are not qualified for qrange:%"PRId64"-%"PRId64", ignore, %p", pQueryHandle, pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qinfo)
(!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) {
tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %p", pQueryHandle,
pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qinfo);
pQueryHandle->pFileGroup = NULL;
assert(pQueryHandle->numOfBlocks == 0);
break;
}
if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) {
break;
}
tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks,
numOfTables, pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo);
tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks, numOfTables,
pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo);
assert(numOfBlocks >= 0);
if (numOfBlocks == 0) {
continue;
}
// todo return error code to query engine
if (createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks) != TSDB_CODE_SUCCESS) {
if ((code = createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks)) != TSDB_CODE_SUCCESS) {
break;
}
assert(numOfBlocks >= pQueryHandle->numOfBlocks);
if (pQueryHandle->numOfBlocks > 0) {
break;
}
}
// no data in file anymore
if (pQueryHandle->numOfBlocks <= 0) {
if (pQueryHandle->numOfBlocks <= 0 || code != TSDB_CODE_SUCCESS) {
if (code == TSDB_CODE_SUCCESS) {
assert(pQueryHandle->pFileGroup == NULL);
}
......@@ -1476,10 +1512,11 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex
*exists = false;
return code;
}
assert(pQueryHandle->pFileGroup != NULL && pQueryHandle->numOfBlocks > 0);
cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1;
cur->fid = pQueryHandle->pFileGroup->fileId;
STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot];
*exists = loadFileDataBlock(pQueryHandle, pBlockInfo->compBlock, pBlockInfo->pTableCheckInfo);
......@@ -1495,7 +1532,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists
pQueryHandle->locateStart = true;
STsdbCfg* pCfg = &pQueryHandle->pTsdb->config;
int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision);
tsdbInitFileGroupIter(pFileHandle, &pQueryHandle->fileIter, pQueryHandle->order);
tsdbSeekFileGroupIter(&pQueryHandle->fileIter, fid);
......@@ -1504,7 +1541,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists
// check if current file block is all consumed
STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot];
STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo;
// current block is done, try next
if (!cur->mixBlock || cur->blockCompleted) {
if ((cur->slot == pQueryHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pQueryHandle->order)) ||
......@@ -1515,10 +1552,10 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists
// next block of the same file
int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1;
cur->slot += step;
cur->mixBlock = false;
cur->blockCompleted = false;
STableBlockInfo* pNext = &pQueryHandle->pDataBlockInfo[cur->slot];
*exists = loadFileDataBlock(pQueryHandle, pNext->compBlock, pNext->pTableCheckInfo);
......@@ -1540,10 +1577,10 @@ static bool doHasDataInBuffer(STsdbQueryHandle* pQueryHandle) {
if (hasMoreDataInCache(pQueryHandle)) {
return true;
}
pQueryHandle->activeIndex += 1;
}
return false;
}
......@@ -1561,14 +1598,14 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
if (pQueryHandle->type == TSDB_QUERY_TYPE_EXTERNAL) {
pQueryHandle->type = TSDB_QUERY_TYPE_ALL;
pQueryHandle->order = TSDB_ORDER_DESC;
if (!tsdbNextDataBlock(pHandle)) {
return false;
}
/*SDataBlockInfo* pBlockInfo =*/ tsdbRetrieveDataBlockInfo(pHandle, &blockInfo);
/*SArray *pDataBlock = */tsdbRetrieveDataBlock(pHandle, pQueryHandle->defaultLoadColumn);
if (pQueryHandle->cur.win.ekey == pQueryHandle->window.skey) {
// data already retrieve, discard other data rows and return
int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle);
......@@ -1576,7 +1613,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i);
memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes);
}
pQueryHandle->cur.win = (STimeWindow){pQueryHandle->window.skey, pQueryHandle->window.skey};
pQueryHandle->window = pQueryHandle->cur.win;
pQueryHandle->cur.rows = 1;
......@@ -1593,7 +1630,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
pSecQueryHandle->checkFiles = true;
pSecQueryHandle->activeIndex = 0;
pSecQueryHandle->outputCapacity = ((STsdbRepo*)pSecQueryHandle->pTsdb)->config.maxRowsPerFileBlock;
if (tsdbInitReadHelper(&pSecQueryHandle->rhelper, (STsdbRepo*) pSecQueryHandle->pTsdb) != 0) {
free(pSecQueryHandle);
return false;
......@@ -1603,24 +1640,24 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
// allocate buffer in order to load data blocks from file
int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle);
pSecQueryHandle->statis = calloc(numOfCols, sizeof(SDataStatis));
pSecQueryHandle->pColumns = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData colInfo = {{0}, 0};
SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i);
colInfo.info = pCol->info;
colInfo.pData = calloc(1, EXTRA_BYTES + pQueryHandle->outputCapacity * pCol->info.bytes);
taosArrayPush(pSecQueryHandle->pColumns, &colInfo);
}
size_t si = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
pSecQueryHandle->pTableCheckInfo = taosArrayInit(si, sizeof(STableCheckInfo));
STsdbMeta* pMeta = tsdbGetMeta(pQueryHandle->pTsdb);
assert(pMeta != NULL);
for (int32_t j = 0; j < si; ++j) {
STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, j);
STableCheckInfo info = {
......@@ -1628,10 +1665,10 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
.tableId = pCheckInfo->tableId,
.pTableObj = pCheckInfo->pTableObj,
};
taosArrayPush(pSecQueryHandle->pTableCheckInfo, &info);
}
tsdbInitDataBlockLoadInfo(&pSecQueryHandle->dataBlockLoadInfo);
tsdbInitCompBlockLoadInfo(&pSecQueryHandle->compBlockLoadInfo);
pSecQueryHandle->defaultLoadColumn = taosArrayClone(pQueryHandle->defaultLoadColumn);
......@@ -1641,17 +1678,17 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
tsdbRetrieveDataBlockInfo((void*) pSecQueryHandle, &blockInfo);
tsdbRetrieveDataBlock((void*) pSecQueryHandle, pSecQueryHandle->defaultLoadColumn);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i);
memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes);
SColumnInfoData* pCol1 = taosArrayGet(pSecQueryHandle->pColumns, i);
assert(pCol->info.colId == pCol1->info.colId);
memcpy(pCol->pData + pCol->info.bytes, pCol1->pData, pCol1->info.bytes);
}
SColumnInfoData* pTSCol = taosArrayGet(pQueryHandle->pColumns, 0);
// it is ascending order
......@@ -1675,7 +1712,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
pQueryHandle->checkFiles = false;
return true;
}
if (pQueryHandle->checkFiles) {
bool exists = true;
int32_t code = getDataBlocksInFiles(pQueryHandle, &exists);
......@@ -1688,11 +1725,11 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
pQueryHandle->cost.checkForNextTime += elapsedTime;
return exists;
}
pQueryHandle->activeIndex = 0;
pQueryHandle->checkFiles = false;
}
// TODO: opt by consider the scan order
bool ret = doHasDataInBuffer(pQueryHandle);
terrno = TSDB_CODE_SUCCESS;
......@@ -1705,15 +1742,15 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) {
void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) {
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pqHandle;
assert(!ASCENDING_TRAVERSE(pQueryHandle->order));
// starts from the buffer in case of descending timestamp order check data blocks
// todo consider the query time window, current last_row does not apply the query time window
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
TSKEY key = TSKEY_INITIAL_VAL;
int32_t index = -1;
for(int32_t i = 0; i < numOfTables; ++i) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
if (pCheckInfo->pTableObj->lastKey > key) {
......@@ -1721,36 +1758,36 @@ void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) {
index = i;
}
}
if (index == -1) {
// todo add failure test cases
return;
}
// erase all other elements in array list
size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
for (int32_t i = 0; i < size; ++i) {
if (i == index) {
continue;
}
STableCheckInfo* pTableCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
tSkipListDestroyIter(pTableCheckInfo->iter);
if (pTableCheckInfo->pDataCols != NULL) {
tfree(pTableCheckInfo->pDataCols->buf);
}
tfree(pTableCheckInfo->pDataCols);
tfree(pTableCheckInfo->pCompInfo);
}
STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, index);
taosArrayClear(pQueryHandle->pTableCheckInfo);
info.lastKey = key;
taosArrayPush(pQueryHandle->pTableCheckInfo, &info);
// update the query time window according to the chosen last timestamp
pQueryHandle->window = (STimeWindow) {key, key};
}
......@@ -1759,13 +1796,13 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) {
// filter the queried time stamp in the first place
STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle;
pQueryHandle->order = TSDB_ORDER_DESC;
assert(pQueryHandle->window.skey == pQueryHandle->window.ekey);
// starts from the buffer in case of descending timestamp order check data blocks
// todo consider the query time window, current last_row does not apply the query time window
size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo);
int32_t i = 0;
while(i < numOfTables) {
STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i);
......@@ -1773,21 +1810,21 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) {
pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL) {
break;
}
i++;
}
// there are no data in all the tables
if (i == numOfTables) {
return;
}
STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i);
taosArrayClear(pQueryHandle->pTableCheckInfo);
info.lastKey = pQueryHandle->window.skey;
taosArrayPush(pQueryHandle->pTableCheckInfo, &info);
// update the query time window according to the chosen last timestamp
pQueryHandle->window = (STimeWindow) {info.lastKey, TSKEY_INITIAL_VAL};
}
......@@ -1811,7 +1848,7 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int
if ((key > maxKey && ASCENDING_TRAVERSE(pQueryHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pQueryHandle->order))) {
tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pQueryHandle, key, pQueryHandle->window.skey,
pQueryHandle->window.ekey);
break;
}
......@@ -1826,24 +1863,24 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int
moveToNextRowInMem(pCheckInfo);
break;
}
} while(moveToNextRowInMem(pCheckInfo));
assert(numOfRows <= maxRowsToRead);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < maxRowsToRead) {
int32_t emptySize = maxRowsToRead - numOfRows;
for(int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
int64_t elapsedTime = taosGetTimestampUs() - st;
tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d", pQueryHandle,
elapsedTime, numOfRows, numOfCols);
tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d, %p", pQueryHandle,
elapsedTime, numOfRows, numOfCols, pQueryHandle->qinfo);
return numOfRows;
}
......@@ -1852,7 +1889,7 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p
STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle;
SQueryFilePos* cur = &pHandle->cur;
STable* pTable = NULL;
// there are data in file
if (pHandle->cur.fid >= 0) {
STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot];
......@@ -1874,13 +1911,13 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p
*/
int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataStatis** pBlockStatis) {
STsdbQueryHandle* pHandle = (STsdbQueryHandle*) pQueryHandle;
SQueryFilePos* c = &pHandle->cur;
if (c->mixBlock) {
*pBlockStatis = NULL;
return TSDB_CODE_SUCCESS;
}
STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot];
assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0)));
......@@ -1900,7 +1937,7 @@ int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataSta
for(int32_t i = 0; i < numOfCols; ++i) {
pHandle->statis[i].colId = colIds[i];
}
tsdbGetDataStatis(&pHandle->rhelper, pHandle->statis, numOfCols);
// always load the first primary timestamp column data
......@@ -1949,31 +1986,31 @@ SArray* tsdbRetrieveDataBlock(TsdbQueryHandleT* pQueryHandle, SArray* pIdList) {
} else {
SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock);
assert(pHandle->realNumOfRows <= binfo.rows);
// data block has been loaded, todo extract method
SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo;
if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fileId == pHandle->cur.fid &&
pBlockLoadInfo->tid == pCheckInfo->pTableObj->tableId.tid) {
return pHandle->pColumns;
} else { // only load the file block
SCompBlock* pBlock = pBlockInfo->compBlock;
doLoadFileDataBlock(pHandle, pBlock, pCheckInfo);
doLoadFileDataBlock(pHandle, pBlock, pCheckInfo, pHandle->cur.slot);
// todo refactor
int32_t numOfRows = copyDataFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1);
// if the buffer is not full in case of descending order query, move the data in the front of the buffer
if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) {
int32_t emptySize = pHandle->outputCapacity - numOfRows;
int32_t reqNumOfCols = taosArrayGetSize(pHandle->pColumns);
for(int32_t i = 0; i < reqNumOfCols; ++i) {
SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i);
memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes);
}
}
return pHandle->pColumns;
}
}
......@@ -1984,11 +2021,11 @@ static int32_t getAllTableList(STable* pSuperTable, SArray* list) {
SSkipListIterator* iter = tSkipListCreateIter(pSuperTable->pIndex);
while (tSkipListIterNext(iter)) {
SSkipListNode* pNode = tSkipListIterGet(iter);
STable** pTable = (STable**) SL_GET_NODE_DATA((SSkipListNode*) pNode);
taosArrayPush(list, pTable);
}
tSkipListDestroyIter(iter);
return TSDB_CODE_SUCCESS;
}
......@@ -1998,12 +2035,12 @@ static void destroyHelper(void* param) {
return;
}
tQueryInfo* pInfo = (tQueryInfo*)param;
if (pInfo->optr != TSDB_RELATION_IN) {
tfree(pInfo->q);
}
// tVariantDestroy(&(pInfo->q));
free(param);
}
......@@ -2015,7 +2052,7 @@ void filterPrepare(void* expr, void* param) {
}
pExpr->_node.info = calloc(1, sizeof(tQueryInfo));
STSchema* pTSSchema = (STSchema*) param;
tQueryInfo* pInfo = pExpr->_node.info;
tVariant* pCond = pExpr->_node.pRight->pVal;
......@@ -2025,7 +2062,7 @@ void filterPrepare(void* expr, void* param) {
pInfo->optr = pExpr->_node.optr;
pInfo->compare = getComparFunc(pSchema->type, pInfo->optr);
pInfo->param = pTSSchema;
if (pInfo->optr == TSDB_RELATION_IN) {
pInfo->q = (char*) pCond->arr;
} else {
......@@ -2038,25 +2075,24 @@ typedef struct STableGroupSupporter {
int32_t numOfCols;
SColIndex* pCols;
STSchema* pTagSchema;
// void* tsdbMeta;
} STableGroupSupporter;
int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) {
STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param;
STable* pTable1 = *(STable**) p1;
STable* pTable2 = *(STable**) p2;
for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) {
SColIndex* pColIndex = &pTableGroupSupp->pCols[i];
int32_t colIndex = pColIndex->colIndex;
assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX);
char * f1 = NULL;
char * f2 = NULL;
int32_t type = 0;
int32_t bytes = 0;
if (colIndex == TSDB_TBNAME_COLUMN_INDEX) {
f1 = (char*) TABLE_NAME(pTable1);
f2 = (char*) TABLE_NAME(pTable2);
......@@ -2090,14 +2126,14 @@ int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) {
return ret;
}
}
return 0;
}
void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, STableGroupSupporter* pSupp,
__ext_compar_fn_t compareFn) {
STable* pTable = taosArrayGetP(pTableList, 0);
SArray* g = taosArrayInit(16, POINTER_BYTES);
taosArrayPush(g, &pTable);
tsdbRefTable(pTable);
......@@ -2105,10 +2141,10 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable
for (int32_t i = 1; i < numOfTables; ++i) {
STable** prev = taosArrayGet(pTableList, i - 1);
STable** p = taosArrayGet(pTableList, i);
int32_t ret = compareFn(prev, p, pSupp);
assert(ret == 0 || ret == -1);
tsdbRefTable(*p);
assert((*p)->type == TSDB_CHILD_TABLE);
......@@ -2120,20 +2156,20 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable
taosArrayPush(g, p);
}
}
taosArrayPush(pGroups, &g);
}
SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols) {
assert(pTableList != NULL);
SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);
size_t size = taosArrayGetSize(pTableList);
if (size == 0) {
tsdbDebug("no qualified tables");
return pTableGroup;
}
if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table
SArray* sa = taosArrayInit(size, POINTER_BYTES);
for(int32_t i = 0; i < size; ++i) {
......@@ -2143,7 +2179,7 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC
tsdbRefTable(*pTable);
taosArrayPush(sa, pTable);
}
taosArrayPush(pTableGroup, &sa);
tsdbDebug("all %zu tables belong to one group", size);
} else {
......@@ -2151,18 +2187,18 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC
pSupp->numOfCols = numOfOrderCols;
pSupp->pTagSchema = pTagSchema;
pSupp->pCols = pCols;
taosqsort(pTableList->pData, size, POINTER_BYTES, pSupp, tableGroupComparFn);
createTableGroupImpl(pTableGroup, pTableList, size, pSupp, tableGroupComparFn);
tfree(pSupp);
}
return pTableGroup;
}
bool indexedNodeFilterFp(const void* pNode, void* param) {
tQueryInfo* pInfo = (tQueryInfo*) param;
STable* pTable = *(STable**)(SL_GET_NODE_DATA((SSkipListNode*)pNode));
char* val = NULL;
......@@ -2172,7 +2208,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) {
} else {
val = tdGetKVRowValOfCol(pTable->tagVal, pInfo->sch.colId);
}
int32_t ret = 0;
if (val == NULL) { //the val is possible to be null, so check it out carefully
ret = -1; // val is missing in table tags value pairs
......@@ -2209,7 +2245,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) {
default:
assert(false);
}
return true;
}
......@@ -2239,7 +2275,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT
goto _error;
}
if (pTable->type != TSDB_SUPER_TABLE) {
tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", tid:%d, name:%s", tsdb, uid, pTable->tableId.tid,
pTable->name->data);
......@@ -2252,7 +2288,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT
//NOTE: not add ref count for super table
SArray* res = taosArrayInit(8, POINTER_BYTES);
STSchema* pTagSchema = tsdbGetTableTagSchema(pTable);
// no tags and tbname condition, all child tables of this stable are involved
if (tbnameCond == NULL && (pTagCond == NULL || len == 0)) {
int32_t ret = getAllTableList(pTable, res);
......@@ -2263,7 +2299,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT
pGroupInfo->numOfTables = taosArrayGetSize(res);
pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols);
tsdbDebug("%p no table name/tag condition, all tables belong to one group, numOfTables:%zu", tsdb, pGroupInfo->numOfTables);
taosArrayDestroy(res);
......@@ -2299,7 +2335,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT
} CATCH( code ) {
CLEANUP_EXECUTE();
terrno = code;
goto _error;
goto _error;
// TODO: more error handling
} END_TRY
......@@ -2335,12 +2371,12 @@ int32_t tsdbGetOneTableGroup(TSDB_REPO_T* tsdb, uint64_t uid, STableGroupInfo* p
pGroupInfo->numOfTables = 1;
pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
SArray* group = taosArrayInit(1, POINTER_BYTES);
taosArrayPush(group, &pTable);
taosArrayPush(pGroupInfo->pGroupList, &group);
return TSDB_CODE_SUCCESS;
_error:
......@@ -2428,8 +2464,8 @@ void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle) {
tsdbDestroyHelper(&pQueryHandle->rhelper);
SIOCostSummary* pCost = &pQueryHandle->cost;
tsdbDebug(":io-cost summary: statis-info:%"PRId64"us, datablock:%" PRId64"us, check data:%"PRId64"us, %p",
pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pQueryHandle->qinfo);
tsdbDebug("%p :io-cost summary: statis-info:%"PRId64"us, datablock:%" PRId64"us, check data:%"PRId64"us, %p",
pQueryHandle, pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pQueryHandle->qinfo);
tfree(pQueryHandle);
}
......
......@@ -68,8 +68,6 @@ typedef struct {
int64_t refreshTime;
STrashElem * pTrash;
char* name;
// void * tmrCtrl;
// void * pTimer;
SCacheStatis statistics;
SHashObj * pHashTable;
__cache_free_fn_t freeFp;
......
......@@ -55,6 +55,8 @@ int tdListPrepend(SList *list, void *data);
int tdListAppend(SList *list, void *data);
SListNode *tdListPopHead(SList *list);
SListNode *tdListPopTail(SList *list);
SListNode *tdListGetHead(SList *list);
SListNode *tsListGetTail(SList *list);
SListNode *tdListPopNode(SList *list, SListNode *node);
void tdListMove(SList *src, SList *dst);
void tdListDiscard(SList *list);
......
......@@ -343,7 +343,7 @@ void* taosCacheUpdateExpireTimeByName(SCacheObj *pCacheObj, void *key, size_t ke
SCacheDataNode **ptNode = (SCacheDataNode **)taosHashGet(pCacheObj->pHashTable, key, keyLen);
if (ptNode != NULL) {
T_REF_INC(*ptNode);
(*ptNode)->expireTime = taosGetTimestampMs() + (*ptNode)->lifespan;
(*ptNode)->expireTime = expireTime; // taosGetTimestampMs() + (*ptNode)->lifespan;
}
__cache_unlock(pCacheObj);
......@@ -381,7 +381,7 @@ void *taosCacheAcquireByData(SCacheObj *pCacheObj, void *data) {
}
void *taosCacheTransfer(SCacheObj *pCacheObj, void **data) {
if (pCacheObj == NULL || data == NULL) return NULL;
if (pCacheObj == NULL || data == NULL || (*data) == NULL) return NULL;
size_t offset = offsetof(SCacheDataNode, data);
SCacheDataNode *ptNode = (SCacheDataNode *)((char *)(*data) - offset);
......@@ -419,7 +419,7 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) {
// note: extend lifespan before dec ref count
bool inTrashCan = pNode->inTrashCan;
if (pCacheObj->extendLifespan && (!inTrashCan)) {
if (pCacheObj->extendLifespan && (!inTrashCan) && (!_remove)) {
atomic_store_64(&pNode->expireTime, pNode->lifespan + taosGetTimestampMs());
uDebug("cache:%s data:%p extend life time to %"PRId64 " before release", pCacheObj->name, pNode->data, pNode->expireTime);
}
......@@ -457,8 +457,9 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) {
} else {
// NOTE: once refcount is decrease, pNode may be freed by other thread immediately.
int32_t ref = T_REF_DEC(pNode);
uDebug("cache:%s, key:%p, %p is released, refcnt:%d, in trashcan:%d", pCacheObj->name, pNode->key, pNode->data, ref,
inTrashCan);
uDebug("cache:%s, key:%p, %p released, refcnt:%d, data in trancan:%d", pCacheObj->name, pNode->key, pNode->data,
ref, inTrashCan);
}
}
......@@ -572,6 +573,7 @@ void taosRemoveFromTrashCan(SCacheObj *pCacheObj, STrashElem *pElem) {
free(pElem);
}
// TODO add another lock when scanning trashcan
void taosTrashCanEmpty(SCacheObj *pCacheObj, bool force) {
__cache_wr_lock(pCacheObj);
......@@ -643,6 +645,7 @@ static void doCacheRefresh(SCacheObj* pCacheObj, int64_t time, __cache_free_fn_t
__cache_wr_lock(pCacheObj);
while (taosHashIterNext(pIter)) {
SCacheDataNode *pNode = *(SCacheDataNode **)taosHashIterGet(pIter);
if (pNode->expireTime < time && T_REF_VAL_GET(pNode) <= 0) {
taosCacheReleaseNode(pCacheObj, pNode);
continue;
......@@ -674,6 +677,7 @@ void* taosCacheTimedRefresh(void *handle) {
// check if current cache object will be deleted every 500ms.
if (pCacheObj->deleting) {
uDebug("%s refresh threads quit", pCacheObj->name);
break;
}
......
#include "taosdef.h"
#include "tcompare.h"
#include <tarray.h>
#include "tarray.h"
#include "tutil.h"
int32_t compareInt32Val(const void *pLeft, const void *pRight) {
......
......@@ -76,6 +76,7 @@ int tdListPrepend(SList *list, void *data) {
SListNode *node = (SListNode *)malloc(sizeof(SListNode) + list->eleSize);
if (node == NULL) return -1;
node->next = node->prev = NULL;
memcpy((void *)(node->data), data, list->eleSize);
tdListPrependNode(list, node);
......@@ -121,6 +122,22 @@ SListNode *tdListPopTail(SList *list) {
return node;
}
SListNode *tdListGetHead(SList *list) {
if (list == NULL || list->numOfEles == 0) {
return NULL;
}
return list->head;
}
SListNode *tsListGetTail(SList *list) {
if (list == NULL || list->numOfEles == 0) {
return NULL;
}
return list->tail;
}
SListNode *tdListPopNode(SList *list, SListNode *node) {
if (list->head == node) {
list->head = node->next;
......
......@@ -71,11 +71,45 @@ static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle) {
pRead->rpcMsg.msgType = TSDB_MSG_TYPE_QUERY;
pRead->pCont = qhandle;
pRead->contLen = 0;
pRead->rpcMsg.handle = NULL;
atomic_add_fetch_32(&pVnode->refCount, 1);
taosWriteQitem(pVnode->rqueue, TAOS_QTYPE_QUERY, pRead);
}
static int32_t vnodeDumpQueryResult(SRspRet *pRet, void* pVnode, void* handle, bool* freeHandle) {
bool continueExec = false;
int32_t code = TSDB_CODE_SUCCESS;
if ((code = qDumpRetrieveResult(handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) {
if (continueExec) {
vDebug("QInfo:%p add to query task queue for exec", handle);
vnodePutItemIntoReadQueue(pVnode, handle);
pRet->qhandle = handle;
*freeHandle = false;
} else {
vDebug("QInfo:%p exec completed", handle);
*freeHandle = true;
}
} else {
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
*freeHandle = true;
}
return code;
}
static void vnodeBuildNoResultQueryRsp(SRspRet* pRet) {
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
pRet->len = sizeof(SRetrieveTableRsp);
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
SRetrieveTableRsp* pRsp = pRet->rsp;
pRsp->completed = true;
}
static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
void *pCont = pReadMsg->pCont;
int32_t contLen = pReadMsg->contLen;
......@@ -98,6 +132,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
vWarn("QInfo:%p invalid qhandle, no matched query handle, conn:%p", (void*) killQueryMsg->qhandle, pReadMsg->rpcMsg.handle);
} else {
assert(*qhandle == (void*) killQueryMsg->qhandle);
qKillQuery(*qhandle);
qReleaseQInfo(pVnode->qMgmt, (void**) &qhandle, true);
}
......@@ -110,7 +145,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
if (contLen != 0) {
qinfo_t pQInfo = NULL;
code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, pVnode, &pQInfo);
code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, &pQInfo);
SQueryTableRsp *pRsp = (SQueryTableRsp *) rpcMallocCont(sizeof(SQueryTableRsp));
pRsp->code = code;
......@@ -133,7 +168,6 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
pRsp->qhandle = htobe64((uint64_t) pQInfo);
}
pQInfo = NULL;
if (handle != NULL && vnodeNotifyCurrentQhandle(pReadMsg->rpcMsg.handle, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) {
vError("vgId:%d, QInfo:%p, query discarded since link is broken, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle);
pRsp->code = TSDB_CODE_RPC_NETWORK_UNAVAIL;
......@@ -153,16 +187,34 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
} else {
assert(pCont != NULL);
handle = qAcquireQInfo(pVnode->qMgmt, (uint64_t) pCont);
if (handle == NULL) {
vWarn("QInfo:%p invalid qhandle in continuing exec query, conn:%p", (void*) pCont, pReadMsg->rpcMsg.handle);
code = TSDB_CODE_QRY_INVALID_QHANDLE;
} else {
vDebug("vgId:%d, QInfo:%p, dnode continue exec query", pVnode->vgId, (void*) pCont);
code = TSDB_CODE_VND_ACTION_IN_PROGRESS;
qTableQuery(*handle); // do execute query
bool freehandle = false;
bool buildRes = qTableQuery(*handle); // do execute query
// build query rsp
if (buildRes) {
// update the connection info according to the retrieve connection
pReadMsg->rpcMsg.handle = qGetResultRetrieveMsg(*handle);
assert(pReadMsg->rpcMsg.handle != NULL);
vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle);
code = vnodeDumpQueryResult(&pReadMsg->rspRet, pVnode, *handle, &freehandle);
// todo test the error code case
if (code == TSDB_CODE_SUCCESS) {
code = TSDB_CODE_QRY_HAS_RSP;
}
}
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freehandle);
}
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false);
}
return code;
......@@ -176,7 +228,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
pRetrieve->qhandle = htobe64(pRetrieve->qhandle);
pRetrieve->free = htons(pRetrieve->free);
vDebug("vgId:%d, QInfo:%p, retrieve msg is disposed", pVnode->vgId, (void*) pRetrieve->qhandle);
vDebug("vgId:%d, QInfo:%p, retrieve msg is disposed, free:%d, conn:%p", pVnode->vgId, (void*) pRetrieve->qhandle, pRetrieve->free, pReadMsg->rpcMsg.handle);
memset(pRet, 0, sizeof(SRspRet));
......@@ -185,16 +237,8 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
if (handle == NULL || (*handle) != (void*) pRetrieve->qhandle) {
code = TSDB_CODE_QRY_INVALID_QHANDLE;
vDebug("vgId:%d, invalid qhandle in fetch result, QInfo:%p", pVnode->vgId, (void*) pRetrieve->qhandle);
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
pRet->len = sizeof(SRetrieveTableRsp);
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
SRetrieveTableRsp* pRsp = pRet->rsp;
pRsp->numOfRows = 0;
pRsp->useconds = 0;
pRsp->completed = true;
vnodeBuildNoResultQueryRsp(pRet);
return code;
}
......@@ -203,35 +247,25 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) {
qKillQuery(*handle);
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true);
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
pRet->len = sizeof(SRetrieveTableRsp);
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
SRetrieveTableRsp* pRsp = pRet->rsp;
pRsp->numOfRows = 0;
pRsp->completed = true;
pRsp->useconds = 0;
vnodeBuildNoResultQueryRsp(pRet);
return code;
}
bool freeHandle = true;
code = qRetrieveQueryResultInfo(*handle);
bool buildRes = false;
code = qRetrieveQueryResultInfo(*handle, &buildRes, pReadMsg->rpcMsg.handle);
if (code != TSDB_CODE_SUCCESS) {
//TODO handle malloc failure
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp));
} else { // if failed to dump result, free qhandle immediately
if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len)) == TSDB_CODE_SUCCESS) {
if (qHasMoreResultsToRetrieve(*handle)) {
vnodePutItemIntoReadQueue(pVnode, *handle);
pRet->qhandle = *handle;
freeHandle = false;
} else {
qKillQuery(*handle);
freeHandle = true;
}
} else { // result is not ready, return immediately
if (!buildRes) {
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false);
return TSDB_CODE_QRY_NOT_READY;
}
code = vnodeDumpQueryResult(pRet, pVnode, *handle, &freeHandle);
}
qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle);
......
......@@ -93,6 +93,8 @@ run general/parser/groupby.sim
sleep 2000
run general/parser/tags_filter.sim
sleep 2000
run general/parser/topbot.sim
sleep 2000
run general/parser/union.sim
sleep 2000
run general/parser/sliding.sim
......
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/cfg.sh -n dnode1 -c walLevel -v 0
system sh/exec.sh -n dnode1 -s start
sleep 3000
sql connect
$dbPrefix = tb_db
$tbPrefix = tb_tb
$stbPrefix = tb_stb
$tbNum = 10
$rowNum = 1000
$totalNum = $tbNum * $rowNum
$loops = 200000
$log = 10000
$ts0 = 1537146000000
$delta = 600000
print ========== topbot.sim
$i = 0
$db = $dbPrefix . $i
$stb = $stbPrefix . $i
sql drop database $db -x step1
step1:
sql create database $db cache 16 maxtables 200
print ====== create tables
sql use $db
sql create table $stb (ts timestamp, c1 int, c2 bigint, c3 float, c4 double, c5 smallint, c6 tinyint, c7 bool, c8 binary(10), c9 nchar(10)) tags(t1 int)
$i = 0
$ts = $ts0
$halfNum = $tbNum / 2
while $i < $halfNum
$tbId = $i + $halfNum
$tb = $tbPrefix . $i
$tb1 = $tbPrefix . $tbId
sql create table $tb using $stb tags( $i )
sql create table $tb1 using $stb tags( $tbId )
$x = 0
while $x < $rowNum
$xs = $x * $delta
$ts = $ts0 + $xs
$c = $x / 10
$c = $c * 10
$c = $x - $c
$binary = 'binary . $c
$binary = $binary . '
$nchar = 'nchar . $c
$nchar = $nchar . '
sql insert into $tb values ( $ts , $c , $c , $c , $c , $c , $c , true, $binary , $nchar )
sql insert into $tb1 values ( $ts , $c , NULL , $c , NULL , $c , $c , true, $binary , $nchar )
$x = $x + 1
endw
$i = $i + 1
endw
print ====== tables created
sql use $db
##### select from table
print ====== select top/bot from table and check num of rows returned
sql select top(c1, 100) from tb_stb0
if $row != 100 then
return -1
endi
sql select last(c2) from tb_tb9
if $row != 1 then
return -1
endi
system sh/exec.sh -n dnode1 -s stop -x SIGINT
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册