diff --git a/src/client/inc/tscSecondaryMerge.h b/src/client/inc/tscSecondaryMerge.h index bcfe14fcb79e79b6d3965dfd5970421819f27b23..08d995c9f3d789a82f5b8fa1331d8653a017181b 100644 --- a/src/client/inc/tscSecondaryMerge.h +++ b/src/client/inc/tscSecondaryMerge.h @@ -68,7 +68,7 @@ typedef struct SLocalReducer { bool hasPrevRow; // cannot be released bool hasUnprocessedRow; tOrderDescriptor * pDesc; - tColModel * resColModel; + SColumnModel * resColModel; tExtMemBuffer ** pExtMemBuffer; // disk-based buffer SInterpolationInfo interpolationInfo; // interpolation support structure char * pFinalRes; // result data after interpo @@ -90,21 +90,21 @@ typedef struct SSubqueryState { } SSubqueryState; typedef struct SRetrieveSupport { - tExtMemBuffer ** pExtMemBuffer; // for build loser tree + tExtMemBuffer ** pExtMemBuffer; // for build loser tree tOrderDescriptor *pOrderDescriptor; - tColModel * pFinalColModel; // colModel for final result + SColumnModel * pFinalColModel; // colModel for final result SSubqueryState * pState; int32_t subqueryIndex; // index of current vnode in vnode list SSqlObj * pParentSqlObj; - tFilePage * localBuffer; // temp buffer, there is a buffer for each vnode to - uint32_t numOfRetry; // record the number of retry times + tFilePage * localBuffer; // temp buffer, there is a buffer for each vnode to + uint32_t numOfRetry; // record the number of retry times pthread_mutex_t queryMutex; } SRetrieveSupport; int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOrderDescriptor **pDesc, - tColModel **pFinalModel, uint32_t nBufferSize); + SColumnModel **pFinalModel, uint32_t nBufferSize); -void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, tColModel *pFinalModel, +void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, SColumnModel *pFinalModel, int32_t numOfVnodes); int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, void *data, @@ -116,7 +116,7 @@ int32_t tscFlushTmpBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tF * create local reducer to launch the second-stage reduce process at client site */ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc, - tColModel *finalModel, SSqlCmd *pSqlCmd, SSqlRes *pRes); + SColumnModel *finalModel, SSqlCmd *pSqlCmd, SSqlRes *pRes); void tscDestroyLocalReducer(SSqlObj *pSql); diff --git a/src/client/src/tscFunctionImpl.c b/src/client/src/tscFunctionImpl.c index 248b197d5240e2ee0c680e46f4c542ccce21446e..ed7d4dfc602fde29538183d8f691a24f5d63ec56 100644 --- a/src/client/src/tscFunctionImpl.c +++ b/src/client/src/tscFunctionImpl.c @@ -27,6 +27,7 @@ #include "ttime.h" #include "ttypes.h" #include "tutil.h" +#include "tpercentile.h" #define GET_INPUT_CHAR(x) (((char *)((x)->aInputElemBuf)) + ((x)->startOffset) * ((x)->inputBytes)) #define GET_INPUT_CHAR_INDEX(x, y) (GET_INPUT_CHAR(x) + (y) * (x)->inputBytes) @@ -220,7 +221,7 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI } else if (functionId == TSDB_FUNC_TWA) { *type = TSDB_DATA_TYPE_DOUBLE; *bytes = sizeof(STwaInfo); - *intermediateResBytes = sizeof(STwaInfo); + *intermediateResBytes = *bytes; return TSDB_CODE_SUCCESS; } } @@ -271,7 +272,6 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI } else if (functionId == TSDB_FUNC_PERCT) { *type = (int16_t)TSDB_DATA_TYPE_DOUBLE; *bytes = (int16_t)sizeof(double); - //*intermediateResBytes = POINTER_BYTES; *intermediateResBytes = (int16_t)sizeof(double); } else if (functionId == TSDB_FUNC_LEASTSQR) { *type = TSDB_DATA_TYPE_BINARY; @@ -1878,7 +1878,8 @@ static void do_top_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pData, tVariantCreateFromBinary(&val, pData, tDataTypeDesc[type].nSize, type); tValuePair **pList = pInfo->res; - + assert(pList != NULL); + if (pInfo->num < maxLen) { if (pInfo->num == 0 || ((type >= TSDB_DATA_TYPE_TINYINT && type <= TSDB_DATA_TYPE_BIGINT) && @@ -2416,7 +2417,7 @@ static bool percentile_function_setup(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); SSchema field[1] = {{pCtx->inputType, "dummyCol", 0, pCtx->inputBytes}}; - tColModel *pModel = tColModelCreate(field, 1, 1000); + SColumnModel *pModel = createColumnModel(field, 1, 1000); int32_t orderIdx = 0; // tOrderDesc object diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 6ef28a55f60fc847dc36de26856682d2fb6b65c8..b0d5a58bcf0c679f6dc35ce2f5f1eab650e2950d 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -321,7 +321,7 @@ int32_t tsParseOneColumnData(SSchema *pSchema, SSQLToken *pToken, char *payload, if (pToken->type == TK_NULL) { *(uint32_t *)payload = TSDB_DATA_NCHAR_NULL; } else { - // if the converted output len is over than pSchema->bytes, return error: 'Argument list too long' + // if the converted output len is over than pColumnModel->bytes, return error: 'Argument list too long' if (!taosMbsToUcs4(pToken->z, pToken->n, payload, pSchema->bytes)) { char buf[512] = {0}; snprintf(buf, 512, "%s", strerror(errno)); diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index ef5ec5808b66b3a100fd72714d7e0972eb574298..da7e22fe1e42d0dc9b761b9bd13567440d659005 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -72,7 +72,7 @@ static void setColumnOffsetValueInResultset(SQueryInfo* pQueryInfo); static int32_t parseGroupbyClause(SQueryInfo* pQueryInfo, tVariantList* pList, SSqlCmd* pCmd); static int32_t parseIntervalClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql); -static int32_t setSlidingClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql); +static int32_t parseSlidingClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql); static int32_t addProjectionExprAndResultField(SQueryInfo* pQueryInfo, tSQLExprItem* pItem); @@ -657,14 +657,14 @@ int32_t parseIntervalClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { return ret; } - if (setSlidingClause(pQueryInfo, pQuerySql) != TSDB_CODE_SUCCESS) { + if (parseSlidingClause(pQueryInfo, pQuerySql) != TSDB_CODE_SUCCESS) { return TSDB_CODE_INVALID_SQL; } return TSDB_CODE_SUCCESS; } -int32_t setSlidingClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { +int32_t parseSlidingClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { const char* msg0 = "sliding value too small"; const char* msg1 = "sliding value no larger than the interval value"; @@ -685,7 +685,7 @@ int32_t setSlidingClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); } } else { - pSliding->n = pQueryInfo->nAggTimeInterval; + pQueryInfo->nSlidingTime = -1; } return TSDB_CODE_SUCCESS; @@ -5453,8 +5453,6 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { const char* msg0 = "invalid table name"; const char* msg1 = "table name too long"; const char* msg2 = "point interpolation query needs timestamp"; - const char* msg3 = "sliding value too small"; - const char* msg4 = "sliding value no larger than the interval value"; const char* msg5 = "fill only available for interval query"; const char* msg6 = "start(end) time of query range required or time range too large"; const char* msg7 = "illegal number of tables in from clause"; @@ -5587,30 +5585,6 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { if (!hasTimestampForPointInterpQuery(pQueryInfo)) { return invalidSqlErrMsg(pQueryInfo->msg, msg2); } - -// // set sliding value, the query time range needs to be decide in the first place -// SSQLToken* pSliding = &pQuerySql->sliding; -// if (pSliding->n != 0) { -// if (!tscEmbedded && pCmd->inStream == 0 && hasDefaultQueryTimeRange(pQueryInfo)) { // sliding only allowed in stream -// const char* msg = "time range expected for sliding window query"; -// return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg); -// } -// -// getTimestampInUsFromStr(pSliding->z, pSliding->n, &pQueryInfo->nSlidingTime); -// if (pMeterMetaInfo->pMeterMeta->precision == TSDB_TIME_PRECISION_MILLI) { -// pQueryInfo->nSlidingTime /= 1000; -// } -// -// if (pQueryInfo->nSlidingTime < tsMinSlidingTime) { -// return invalidSqlErrMsg(pQueryInfo->msg, msg3); -// } -// -// if (pQueryInfo->nSlidingTime > pQueryInfo->nAggTimeInterval) { -// return invalidSqlErrMsg(pQueryInfo->msg, msg4); -// } -// } else { -// pQueryInfo->nSlidingTime = -1; -// } // in case of join query, time range is required. if (QUERY_IS_JOIN_QUERY(pQueryInfo->type)) { diff --git a/src/client/src/tscSecondaryMerge.c b/src/client/src/tscSecondaryMerge.c index 30f1dfad773304f98b76df152cb90085bf95c55f..ca57030539a451d4967ace21fc688a1e44ffea76 100644 --- a/src/client/src/tscSecondaryMerge.c +++ b/src/client/src/tscSecondaryMerge.c @@ -62,16 +62,19 @@ static void tscInitSqlContext(SSqlCmd *pCmd, SSqlRes *pRes, SLocalReducer *pRedu for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { SQLFunctionCtx *pCtx = &pReducer->pCtx[i]; - pCtx->aOutputBuf = pReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pReducer->resColModel->maxCapacity; + pCtx->aOutputBuf = pReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pReducer->resColModel->capacity; pCtx->order = pQueryInfo->order.order; pCtx->functionId = pQueryInfo->exprsInfo.pExprs[i].functionId; // input buffer hold only one point data - pCtx->aInputElemBuf = pReducer->pTempBuffer->data + pDesc->pSchema->colOffset[i]; + int16_t offset = getColumnModelOffset(pDesc->pColumnModel, i); + SSchema* pSchema = getColumnModelSchema(pDesc->pColumnModel, i); + + pCtx->aInputElemBuf = pReducer->pTempBuffer->data + offset; // input data format comes from pModel - pCtx->inputType = pDesc->pSchema->pFields[i].type; - pCtx->inputBytes = pDesc->pSchema->pFields[i].bytes; + pCtx->inputType = pSchema->type; + pCtx->inputBytes = pSchema->bytes; TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, i); // output data format yet comes from pCmd. @@ -132,11 +135,11 @@ static void tscInitSqlContext(SSqlCmd *pCmd, SSqlRes *pRes, SLocalReducer *pRedu * todo release allocated memory process with async process */ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc, - tColModel *finalmodel, SSqlCmd *pCmd, SSqlRes *pRes) { + SColumnModel *finalmodel, SSqlCmd *pCmd, SSqlRes *pRes) { // offset of cmd in SSqlObj structure char *pSqlObjAddr = (char *)pCmd - offsetof(SSqlObj, cmd); - if (pMemBuffer == NULL || pDesc->pSchema == NULL) { + if (pMemBuffer == NULL || pDesc->pColumnModel == NULL) { tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); tscError("%p no local buffer or intermediate result format model", pSqlObjAddr); @@ -162,9 +165,9 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd return; } - if (pDesc->pSchema->maxCapacity >= pMemBuffer[0]->nPageSize) { - tscError("%p Invalid value of buffer capacity %d and page size %d ", pSqlObjAddr, pDesc->pSchema->maxCapacity, - pMemBuffer[0]->nPageSize); + if (pDesc->pColumnModel->capacity >= pMemBuffer[0]->pageSize) { + tscError("%p Invalid value of buffer capacity %d and page size %d ", pSqlObjAddr, pDesc->pColumnModel->capacity, + pMemBuffer[0]->pageSize); tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); pRes->code = TSDB_CODE_APP_ERROR; @@ -196,7 +199,7 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd int32_t numOfFlushoutInFile = pMemBuffer[i]->fileMeta.flushoutData.nLength; for (int32_t j = 0; j < numOfFlushoutInFile; ++j) { - SLocalDataSource *pDS = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->nPageSize); + SLocalDataSource *pDS = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->pageSize); if (pDS == NULL) { tscError("%p failed to create merge structure", pSqlObjAddr); pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; @@ -219,7 +222,7 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pDesc->pSchema, pDS->filePage.data, pDS->filePage.numOfElems, pMemBuffer[0]->numOfElemsPerPage, + tColModelDisplayEx(pDesc->pColumnModel, pDS->filePage.data, pDS->filePage.numOfElems, pMemBuffer[0]->numOfElemsPerPage, colInfo); #endif if (pDS->filePage.numOfElems == 0) { // no data in this flush @@ -259,7 +262,7 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd tscRestoreSQLFunctionForMetricQuery(pQueryInfo); tscFieldInfoCalOffset(pQueryInfo); - if (pReducer->rowSize > pMemBuffer[0]->nPageSize) { + if (pReducer->rowSize > pMemBuffer[0]->pageSize) { assert(false); // todo fixed row size is larger than the minimum page size; } @@ -274,15 +277,15 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd pReducer->discardData = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage)); pReducer->discard = false; - pReducer->nResultBufSize = pMemBuffer[0]->nPageSize * 16; + pReducer->nResultBufSize = pMemBuffer[0]->pageSize * 16; pReducer->pResultBuf = (tFilePage *)calloc(1, pReducer->nResultBufSize + sizeof(tFilePage)); int32_t finalRowLength = tscGetResRowLength(pQueryInfo); pReducer->resColModel = finalmodel; - pReducer->resColModel->maxCapacity = pReducer->nResultBufSize / finalRowLength; + pReducer->resColModel->capacity = pReducer->nResultBufSize / finalRowLength; assert(finalRowLength <= pReducer->rowSize); - pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->maxCapacity); + pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->capacity); pReducer->pBufForInterpo = calloc(1, pReducer->nResultBufSize); if (pReducer->pTempBuffer == NULL|| pReducer->discardData == NULL || pReducer->pResultBuf == NULL || @@ -304,8 +307,8 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd tscCreateResPointerInfo(pRes, pQueryInfo); tscInitSqlContext(pCmd, pRes, pReducer, pDesc); - // we change the maxCapacity of schema to denote that there is only one row in temp buffer - pReducer->pDesc->pSchema->maxCapacity = 1; + // we change the capacity of schema to denote that there is only one row in temp buffer + pReducer->pDesc->pColumnModel->capacity = 1; //restore the limitation value at the last stage if (tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) { @@ -333,7 +336,8 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) { pInterpoInfo->pTags[0] = (char *)pInterpoInfo->pTags + POINTER_BYTES * pQueryInfo->groupbyExpr.numOfGroupCols; for (int32_t i = 1; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) { - pInterpoInfo->pTags[i] = pReducer->resColModel->pFields[startIndex + i - 1].bytes + pInterpoInfo->pTags[i - 1]; + SSchema* pSchema = getColumnModelSchema(pReducer->resColModel, startIndex + i - 1); + pInterpoInfo->pTags[i] = pSchema->bytes + pInterpoInfo->pTags[i - 1]; } } else { assert(pInterpoInfo->pTags == NULL); @@ -346,16 +350,16 @@ static int32_t tscFlushTmpBufferImpl(tExtMemBuffer *pMemoryBuf, tOrderDescriptor return 0; } - assert(pPage->numOfElems <= pDesc->pSchema->maxCapacity); + assert(pPage->numOfElems <= pDesc->pColumnModel->capacity); // sort before flush to disk, the data must be consecutively put on tFilePage. - if (pDesc->orderIdx.numOfOrderedCols > 0) { + if (pDesc->orderIdx.numOfCols > 0) { tColDataQSort(pDesc, pPage->numOfElems, 0, pPage->numOfElems - 1, pPage->data, orderType); } #ifdef _DEBUG_VIEW printf("%" PRIu64 " rows data flushed to disk after been sorted:\n", pPage->numOfElems); - tColModelDisplay(pDesc->pSchema, pPage->data, pPage->numOfElems, pPage->numOfElems); + tColModelDisplay(pDesc->pColumnModel, pPage->data, pPage->numOfElems, pPage->numOfElems); #endif // write to cache after being sorted @@ -383,18 +387,19 @@ int32_t tscFlushTmpBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tF int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, void *data, int32_t numOfRows, int32_t orderType) { - if (pPage->numOfElems + numOfRows <= pDesc->pSchema->maxCapacity) { - tColModelAppend(pDesc->pSchema, pPage, data, 0, numOfRows, numOfRows); + SColumnModel *pModel = pDesc->pColumnModel; + + if (pPage->numOfElems + numOfRows <= pModel->capacity) { + tColModelAppend(pModel, pPage, data, 0, numOfRows, numOfRows); return 0; } - tColModel *pModel = pDesc->pSchema; - - int32_t numOfRemainEntries = pDesc->pSchema->maxCapacity - pPage->numOfElems; + // current buffer is overflow, flush data to extensive buffer + int32_t numOfRemainEntries = pModel->capacity - pPage->numOfElems; tColModelAppend(pModel, pPage, data, 0, numOfRemainEntries, numOfRows); - /* current buffer is full, need to flushed to disk */ - assert(pPage->numOfElems == pDesc->pSchema->maxCapacity); + // current buffer is full, need to flushed to disk + assert(pPage->numOfElems == pModel->capacity); int32_t ret = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType); if (ret != 0) { return -1; @@ -404,15 +409,15 @@ int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePa while (remain > 0) { int32_t numOfWriteElems = 0; - if (remain > pModel->maxCapacity) { - numOfWriteElems = pModel->maxCapacity; + if (remain > pModel->capacity) { + numOfWriteElems = pModel->capacity; } else { numOfWriteElems = remain; } tColModelAppend(pModel, pPage, data, numOfRows - remain, numOfWriteElems, numOfRows); - if (pPage->numOfElems == pModel->maxCapacity) { + if (pPage->numOfElems == pModel->capacity) { int32_t ret = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType); if (ret != 0) { return -1; @@ -508,7 +513,7 @@ void tscDestroyLocalReducer(SSqlObj *pSql) { tscTrace("%p free local reducer finished", pSql); } -static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCmd, tColModel *pModel) { +static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCmd, SColumnModel *pModel) { int32_t numOfGroupByCols = 0; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); @@ -567,7 +572,7 @@ bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage } tOrderDescriptor *pOrderDesc = pReducer->pDesc; - int32_t numOfCols = pOrderDesc->orderIdx.numOfOrderedCols; + int32_t numOfCols = pOrderDesc->orderIdx.numOfCols; // no group by columns, all data belongs to one group if (numOfCols <= 0) { @@ -577,25 +582,25 @@ bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage if (pOrderDesc->orderIdx.pData[numOfCols - 1] == PRIMARYKEY_TIMESTAMP_COL_INDEX) { //<= 0 // super table interval query assert(pQueryInfo->nAggTimeInterval > 0); - pOrderDesc->orderIdx.numOfOrderedCols -= 1; + pOrderDesc->orderIdx.numOfCols -= 1; } else { // simple group by query assert(pQueryInfo->nAggTimeInterval == 0); } // only one row exists int32_t ret = compare_a(pOrderDesc, 1, 0, pPrev, 1, 0, tmpBuffer->data); - pOrderDesc->orderIdx.numOfOrderedCols = numOfCols; + pOrderDesc->orderIdx.numOfCols = numOfCols; return (ret == 0); } int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOrderDescriptor **pOrderDesc, - tColModel **pFinalModel, uint32_t nBufferSizes) { + SColumnModel **pFinalModel, uint32_t nBufferSizes) { SSqlCmd *pCmd = &pSql->cmd; SSqlRes *pRes = &pSql->res; SSchema * pSchema = NULL; - tColModel *pModel = NULL; + SColumnModel *pModel = NULL; *pFinalModel = NULL; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); @@ -630,14 +635,10 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr capacity = nBufferSizes / rlen; } - pModel = tColModelCreate(pSchema, pQueryInfo->fieldsInfo.numOfOutputCols, capacity); + pModel = createColumnModel(pSchema, pQueryInfo->fieldsInfo.numOfOutputCols, capacity); for (int32_t i = 0; i < pMeterMetaInfo->pMetricMeta->numOfVnodes; ++i) { - char tmpPath[512] = {0}; - getTmpfilePath("tv_bf_db", tmpPath); - tscTrace("%p create [%d](%d) tmp file for subquery:%s", pSql, pMeterMetaInfo->pMetricMeta->numOfVnodes, i, tmpPath); - - tExtMemBufferCreate(&(*pMemBuffer)[i], nBufferSizes, rlen, tmpPath, pModel); + (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel); (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL; } @@ -655,7 +656,7 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr strcpy(pSchema[i].name, pField->name); } - *pFinalModel = tColModelCreate(pSchema, pQueryInfo->fieldsInfo.numOfOutputCols, capacity); + *pFinalModel = createColumnModel(pSchema, pQueryInfo->fieldsInfo.numOfOutputCols, capacity); tfree(pSchema); return TSDB_CODE_SUCCESS; @@ -667,12 +668,12 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr * @param pFinalModel * @param numOfVnodes */ -void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, tColModel *pFinalModel, +void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, SColumnModel *pFinalModel, int32_t numOfVnodes) { - tColModelDestroy(pFinalModel); + destroyColumnModel(pFinalModel); tOrderDescDestroy(pDesc); for (int32_t i = 0; i < numOfVnodes; ++i) { - tExtMemBufferDestroy(&pMemBuffer[i]); + pMemBuffer[i] = destoryExtMemBuffer(pMemBuffer[i]); } tfree(pMemBuffer); @@ -697,8 +698,8 @@ int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSource *p #if defined(_DEBUG_VIEW) printf("new page load to buffer\n"); - tColModelDisplay(pOneInterDataSrc->pMemBuffer->pColModel, pOneInterDataSrc->filePage.data, - pOneInterDataSrc->filePage.numOfElems, pOneInterDataSrc->pMemBuffer->pColModel->maxCapacity); + tColModelDisplay(pOneInterDataSrc->pMemBuffer->pColumnModel, pOneInterDataSrc->filePage.data, + pOneInterDataSrc->filePage.numOfElems, pOneInterDataSrc->pMemBuffer->pColumnModel->capacity); #endif *needAdjustLoserTree = true; } else { @@ -759,7 +760,7 @@ void savePrevRecordAndSetupInterpoInfo(SLocalReducer *pLocalReducer, SQueryInfo* pLocalReducer->discard = true; pLocalReducer->discardData->numOfElems = 0; - tColModel *pModel = pLocalReducer->pDesc->pSchema; + SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel; tColModelAppend(pModel, pLocalReducer->discardData, pLocalReducer->prevRowOfInput, 0, 1, 1); } @@ -782,11 +783,12 @@ static void reversedCopyResultToDstBuf(SQueryInfo* pQueryInfo, SSqlRes *pRes, tF } static void reversedCopyFromInterpolationToDstBuf(SQueryInfo* pQueryInfo, SSqlRes *pRes, tFilePage **pResPages, SLocalReducer *pLocalReducer) { + assert(0); for (int32_t i = 0; i < pQueryInfo->exprsInfo.numOfExprs; ++i) { TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, i); int32_t offset = tscFieldInfoGetOffset(pQueryInfo, i); - assert(offset == pLocalReducer->resColModel->colOffset[i]); + assert(offset == getColumnModelOffset(pLocalReducer->resColModel, i)); char *src = pResPages[i]->data + (pRes->numOfRows - 1) * pField->bytes; char *dst = pRes->data + pRes->numOfRows * offset; @@ -880,7 +882,7 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo tFilePage **pResPages = malloc(POINTER_BYTES * pQueryInfo->fieldsInfo.numOfOutputCols); for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, i); - pResPages[i] = calloc(1, sizeof(tFilePage) + pField->bytes * pLocalReducer->resColModel->maxCapacity); + pResPages[i] = calloc(1, sizeof(tFilePage) + pField->bytes * pLocalReducer->resColModel->capacity); } char ** srcData = (char **)malloc((POINTER_BYTES + sizeof(int32_t)) * pQueryInfo->fieldsInfo.numOfOutputCols); @@ -899,11 +901,11 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo TSKEY etime = taosGetRevisedEndKey(actualETime, pQueryInfo->order.order, pQueryInfo->nAggTimeInterval, pQueryInfo->intervalTimeUnit, precision); int32_t nrows = taosGetNumOfResultWithInterpo(pInterpoInfo, pPrimaryKeys, remains, pQueryInfo->nAggTimeInterval, etime, - pLocalReducer->resColModel->maxCapacity); + pLocalReducer->resColModel->capacity); int32_t newRows = taosDoInterpoResult(pInterpoInfo, pQueryInfo->interpoType, pResPages, remains, nrows, pQueryInfo->nAggTimeInterval, pPrimaryKeys, pLocalReducer->resColModel, srcData, - pQueryInfo->defaultVal, functions, pLocalReducer->resColModel->maxCapacity); + pQueryInfo->defaultVal, functions, pLocalReducer->resColModel->capacity); assert(newRows <= nrows); if (pQueryInfo->limit.offset < newRows) { @@ -960,11 +962,10 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo if (pQueryInfo->order.order == TSQL_SO_ASC) { for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, i); - - memcpy(pRes->data + pLocalReducer->resColModel->colOffset[i] * pRes->numOfRows, pResPages[i]->data, - pField->bytes * pRes->numOfRows); + int16_t offset = getColumnModelOffset(pLocalReducer->resColModel, i); + memcpy(pRes->data + offset * pRes->numOfRows, pResPages[i]->data, pField->bytes * pRes->numOfRows); } - } else { + } else {//todo bug?? reversedCopyFromInterpolationToDstBuf(pQueryInfo, pRes, pResPages, pLocalReducer); } } @@ -979,13 +980,15 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo } static void savePreviousRow(SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) { - tColModel *pColModel = pLocalReducer->pDesc->pSchema; - assert(pColModel->maxCapacity == 1 && tmpBuffer->numOfElems == 1); + SColumnModel *pColumnModel = pLocalReducer->pDesc->pColumnModel; + assert(pColumnModel->capacity == 1 && tmpBuffer->numOfElems == 1); // copy to previous temp buffer - for (int32_t i = 0; i < pLocalReducer->pDesc->pSchema->numOfCols; ++i) { - memcpy(pLocalReducer->prevRowOfInput + pColModel->colOffset[i], tmpBuffer->data + pColModel->colOffset[i], - pColModel->pFields[i].bytes); + for (int32_t i = 0; i < pColumnModel->numOfCols; ++i) { + SSchema* pSchema = getColumnModelSchema(pColumnModel, i); + int16_t offset = getColumnModelOffset(pColumnModel, i); + + memcpy(pLocalReducer->prevRowOfInput + offset, tmpBuffer->data + offset, pSchema->bytes); } tmpBuffer->numOfElems = 0; @@ -1127,7 +1130,7 @@ bool needToMerge(SQueryInfo* pQueryInfo, SLocalReducer *pLocalReducer, tFilePage ret = 1; // disable merge procedure } else { tOrderDescriptor *pDesc = pLocalReducer->pDesc; - if (pDesc->orderIdx.numOfOrderedCols > 0) { + if (pDesc->orderIdx.numOfCols > 0) { if (pDesc->tsOrder == TSQL_SO_ASC) { // asc // todo refactor comparator ret = compare_a(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data); @@ -1177,7 +1180,7 @@ bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool no SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); tFilePage *pResBuf = pLocalReducer->pResultBuf; - tColModel *pModel = pLocalReducer->resColModel; + SColumnModel *pModel = pLocalReducer->resColModel; pRes->code = TSDB_CODE_SUCCESS; @@ -1192,7 +1195,7 @@ bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool no return false; } - tColModelCompact(pModel, pResBuf, pModel->maxCapacity); + tColModelCompact(pModel, pResBuf, pModel->capacity); memcpy(pLocalReducer->pBufForInterpo, pResBuf->data, pLocalReducer->nResultBufSize); #ifdef _DEBUG_VIEW @@ -1204,9 +1207,11 @@ bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool no int32_t startIndex = pQueryInfo->fieldsInfo.numOfOutputCols - pQueryInfo->groupbyExpr.numOfGroupCols; for (int32_t i = 0; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) { + int16_t offset = getColumnModelOffset(pModel, startIndex + i); + SSchema* pSchema = getColumnModelSchema(pModel, startIndex + i); + memcpy(pInterpoInfo->pTags[i], - pLocalReducer->pBufForInterpo + pModel->colOffset[startIndex + i] * pResBuf->numOfElems, - pModel->pFields[startIndex + i].bytes); + pLocalReducer->pBufForInterpo + offset * pResBuf->numOfElems, pSchema->bytes); } taosInterpoSetStartInfo(&pLocalReducer->interpolationInfo, pResBuf->numOfElems, pQueryInfo->interpoType); @@ -1218,7 +1223,7 @@ bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool no void resetOutputBuf(SQueryInfo* pQueryInfo, SLocalReducer *pLocalReducer) { // reset output buffer to the beginning for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { pLocalReducer->pCtx[i].aOutputBuf = - pLocalReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pLocalReducer->resColModel->maxCapacity; + pLocalReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pLocalReducer->resColModel->capacity; } memset(pLocalReducer->pResultBuf, 0, pLocalReducer->nResultBufSize + sizeof(tFilePage)); @@ -1270,7 +1275,7 @@ static bool doInterpolationForCurrentGroup(SSqlObj *pSql) { int32_t remain = taosNumOfRemainPoints(pInterpoInfo); TSKEY ekey = taosGetRevisedEndKey(etime, pQueryInfo->order.order, pQueryInfo->nAggTimeInterval, pQueryInfo->intervalTimeUnit, p); int32_t rows = taosGetNumOfResultWithInterpo(pInterpoInfo, (TSKEY *)pLocalReducer->pBufForInterpo, remain, - pQueryInfo->nAggTimeInterval, ekey, pLocalReducer->resColModel->maxCapacity); + pQueryInfo->nAggTimeInterval, ekey, pLocalReducer->resColModel->capacity); if (rows > 0) { // do interpo doInterpolateResult(pSql, pLocalReducer, false); } @@ -1302,7 +1307,7 @@ static bool doHandleLastRemainData(SSqlObj *pSql) { etime = taosGetRevisedEndKey(etime, pQueryInfo->order.order, pQueryInfo->nAggTimeInterval, pQueryInfo->intervalTimeUnit, precision); int32_t rows = taosGetNumOfResultWithInterpo(pInterpoInfo, NULL, 0, pQueryInfo->nAggTimeInterval, etime, - pLocalReducer->resColModel->maxCapacity); + pLocalReducer->resColModel->capacity); if (rows > 0) { // do interpo doInterpolateResult(pSql, pLocalReducer, true); } @@ -1391,7 +1396,7 @@ int32_t tscDoLocalreduce(SSqlObj *pSql) { // clear buffer handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer); - tColModel *pModel = pLocalReducer->pDesc->pSchema; + SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel; while (1) { if (isAllSourcesCompleted(pLocalReducer)) { @@ -1408,14 +1413,14 @@ int32_t tscDoLocalreduce(SSqlObj *pSql) { SLocalDataSource *pOneDataSrc = pLocalReducer->pLocalDataSrc[pTree->pNode[0].index]; tColModelAppend(pModel, tmpBuffer, pOneDataSrc->filePage.data, pOneDataSrc->rowIdx, 1, - pOneDataSrc->pMemBuffer->pColModel->maxCapacity); + pOneDataSrc->pMemBuffer->pColumnModel->capacity); #if defined(_DEBUG_VIEW) printf("chosen row:\t"); SSrcColumnInfo colInfo[256] = {0}; tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->numOfElems, pModel->maxCapacity, colInfo); + tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->numOfElems, pModel->capacity, colInfo); #endif if (pLocalReducer->discard) { @@ -1470,7 +1475,7 @@ int32_t tscDoLocalreduce(SSqlObj *pSql) { * continue to process results instead of return results. */ if ((!sameGroup && pResBuf->numOfElems > 0) || - (pResBuf->numOfElems == pLocalReducer->resColModel->maxCapacity)) { + (pResBuf->numOfElems == pLocalReducer->resColModel->capacity)) { // does not belong to the same group bool notSkipped = doGenerateFinalResults(pSql, pLocalReducer, !sameGroup); diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 677f728472382fe35990a4aea2c8f6e2eb4a8505..ea064093cd15c23211f81e8ca1a4663e25fd862a 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -901,7 +901,7 @@ int tscLaunchSTableSubqueries(SSqlObj *pSql) { tExtMemBuffer ** pMemoryBuf = NULL; tOrderDescriptor *pDesc = NULL; - tColModel * pModel = NULL; + SColumnModel * pModel = NULL; pRes->qhandle = 1; // hack the qhandle check @@ -1181,7 +1181,7 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { tscTrace("%p sub:%p retrieve numOfRows:%d totalNumOfRows:%d from ip:%u,vid:%d,orderOfSub:%d", pPObj, pSql, pRes->numOfRows, pState->numOfRetrievedRows, pSvd->ip, pSvd->vnode, idx); - if (num > tsMaxNumOfOrderedResults) { + if (num > tsMaxNumOfOrderedResults && tscIsProjectionQueryOnSTable(pQueryInfo, 0)) { tscError("%p sub:%p num of OrderedRes is too many, max allowed:%" PRId64 " , current:%" PRId64, pPObj, pSql, tsMaxNumOfOrderedResults, num); tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_SORTED_RES_TOO_MANY); @@ -1194,7 +1194,7 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { SSrcColumnInfo colInfo[256] = {0}; tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pDesc->pSchema, pRes->data, pRes->numOfRows, pRes->numOfRows, colInfo); + tColModelDisplayEx(pDesc->pColumnModel, pRes->data, pRes->numOfRows, pRes->numOfRows, colInfo); #endif if (tsTotalTmpDirGB != 0 && tsAvailTmpDirGB < tsMinimalTmpDirGB) { tscError("%p sub:%p client disk space remain %.3f GB, need at least %.3f GB, stop query", pPObj, pSql, @@ -1202,6 +1202,7 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); return; } + int32_t ret = saveToBuffer(trsupport->pExtMemBuffer[idx], pDesc, trsupport->localBuffer, pRes->data, pRes->numOfRows, pQueryInfo->groupbyExpr.orderType); if (ret < 0) { @@ -1214,17 +1215,17 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { } else { // all data has been retrieved to client /* data in from current vnode is stored in cache and disk */ - uint32_t numOfRowsFromVnode = trsupport->pExtMemBuffer[idx]->numOfAllElems + trsupport->localBuffer->numOfElems; + uint32_t numOfRowsFromVnode = trsupport->pExtMemBuffer[idx]->numOfTotalElems + trsupport->localBuffer->numOfElems; tscTrace("%p sub:%p all data retrieved from ip:%u,vid:%d, numOfRows:%d, orderOfSub:%d", pPObj, pSql, pSvd->ip, pSvd->vnode, numOfRowsFromVnode, idx); - tColModelCompact(pDesc->pSchema, trsupport->localBuffer, pDesc->pSchema->maxCapacity); + tColModelCompact(pDesc->pColumnModel, trsupport->localBuffer, pDesc->pColumnModel->capacity); #ifdef _DEBUG_VIEW printf("%" PRIu64 " rows data flushed to disk:\n", trsupport->localBuffer->numOfElems); SSrcColumnInfo colInfo[256] = {0}; tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pDesc->pSchema, trsupport->localBuffer->data, trsupport->localBuffer->numOfElems, + tColModelDisplayEx(pDesc->pColumnModel, trsupport->localBuffer->data, trsupport->localBuffer->numOfElems, trsupport->localBuffer->numOfElems, colInfo); #endif @@ -1256,7 +1257,7 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { } // all sub-queries are returned, start to local merge process - pDesc->pSchema->maxCapacity = trsupport->pExtMemBuffer[idx]->numOfElemsPerPage; + pDesc->pColumnModel->capacity = trsupport->pExtMemBuffer[idx]->numOfElemsPerPage; tscTrace("%p retrieve from %d vnodes completed.final NumOfRows:%d,start to build loser tree", pPObj, pState->numOfTotal, pState->numOfRetrievedRows); @@ -1516,7 +1517,7 @@ void tscUpdateVnodeInQueryMsg(SSqlObj *pSql, char *buf) { char * pStart = buf + tsRpcHeadSize; SQueryMeterMsg *pQueryMsg = (SQueryMeterMsg *)pStart; - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { // pSchema == NULL, query on meter + if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { // pColumnModel == NULL, query on meter SMeterMeta *pMeterMeta = pMeterMetaInfo->pMeterMeta; pQueryMsg->vnode = htons(pMeterMeta->vpeerDesc[pSql->index].vnode); } else { // query on metric diff --git a/src/client/src/tscStream.c b/src/client/src/tscStream.c index 9fc9706dd9fb87179fabcc484f7ae56afb231914..1b5b55352ebca20ec8d4496b76072bba32139568 100644 --- a/src/client/src/tscStream.c +++ b/src/client/src/tscStream.c @@ -396,7 +396,9 @@ static void tscSetSlidingWindowInfo(SSqlObj *pSql, SSqlStream *pStream) { int64_t minSlidingTime = (pStream->precision == TSDB_TIME_PRECISION_MICRO) ? tsMinSlidingTime * 1000L : tsMinSlidingTime; - if (pQueryInfo->nSlidingTime < minSlidingTime) { + if (pQueryInfo->nSlidingTime == -1) { + pQueryInfo->nSlidingTime = pQueryInfo->nAggTimeInterval; + } else if (pQueryInfo->nSlidingTime < minSlidingTime) { tscWarn("%p stream:%p, original sliding value:%" PRId64 " too small, reset to:%" PRId64 "", pSql, pStream, pQueryInfo->nSlidingTime, minSlidingTime); diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index e9395d7dde46e478ae5058e6842579eb33079b57..22027ab54a28bee78bba67ffa753136b7d99d0fe 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -676,7 +676,7 @@ int32_t tscGetDataBlockFromList(void* pHashList, SDataBlockList* pDataBlockList, STableDataBlocks** dataBlocks) { *dataBlocks = NULL; - STableDataBlocks** t1 = (STableDataBlocks**)taosGetDataFromHash(pHashList, (const char*)&id, sizeof(id)); + STableDataBlocks** t1 = (STableDataBlocks**)taosGetDataFromHashTable(pHashList, (const char*)&id, sizeof(id)); if (t1 != NULL) { *dataBlocks = *t1; } diff --git a/src/inc/hash.h b/src/inc/hash.h index 54a43fb6ebc3f692c642e1270a948016b4244194..14c73fb37015042f2be0dd31be89ba59374ce098 100644 --- a/src/inc/hash.h +++ b/src/inc/hash.h @@ -43,10 +43,10 @@ typedef struct SHashEntry { typedef struct HashObj { SHashEntry **hashList; - uint32_t capacity; - int size; - _hash_fn_t hashFp; - bool multithreadSafe; // enable lock + uint32_t capacity; // number of slots + int size; // number of elements in hash table + _hash_fn_t hashFp; // hash function + bool multithreadSafe; // enable lock or not #if defined LINUX pthread_rwlock_t lock; @@ -57,11 +57,13 @@ typedef struct HashObj { } HashObj; void *taosInitHashTable(uint32_t capacity, _hash_fn_t fn, bool multithreadSafe); +void taosDeleteFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen); int32_t taosAddToHashTable(HashObj *pObj, const char *key, uint32_t keyLen, void *data, uint32_t size); -void taosDeleteFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen); +int32_t taosNumElemsInHashTable(HashObj *pObj); + +char *taosGetDataFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen); -char *taosGetDataFromHash(HashObj *pObj, const char *key, uint32_t keyLen); void taosCleanUpHashTable(void *handle); diff --git a/src/inc/textbuffer.h b/src/inc/textbuffer.h index c7de20bd746d5889cc9d2c6407743ecace557bae..b46b98ed382e207d77abdff8a8bd2f41408f5fd5 100644 --- a/src/inc/textbuffer.h +++ b/src/inc/textbuffer.h @@ -19,20 +19,14 @@ extern "C" { #endif -#include -#include -#include - -#include "tutil.h" +#include "os.h" #include "taosmsg.h" +#include "tutil.h" -#define DEFAULT_PAGE_SIZE 16384 // 16k larger than the SHistoInfo -#define MIN_BUFFER_SIZE (1 << 19) -#define MAX_TMPFILE_PATH_LENGTH PATH_MAX -#define INITIAL_ALLOCATION_BUFFER_SIZE 64 - -// forward declare -struct tTagSchema; +#define DEFAULT_PAGE_SIZE 16384 // 16k larger than the SHistoInfo +#define MIN_BUFFER_SIZE (1 << 19) +#define MAX_TMPFILE_PATH_LENGTH PATH_MAX +#define INITIAL_ALLOCATION_BUFFER_SIZE 64 typedef enum EXT_BUFFER_FLUSH_MODEL { /* @@ -61,12 +55,12 @@ typedef struct tFlushoutData { tFlushoutInfo *pFlushoutInfo; } tFlushoutData; -typedef struct tFileMeta { +typedef struct SFileInfo { uint32_t nFileSize; // in pages - uint32_t nPageSize; + uint32_t pageSize; uint32_t numOfElemsInFile; tFlushoutData flushoutData; -} tFileMeta; +} SFileInfo; typedef struct tFilePage { uint64_t numOfElems; @@ -78,65 +72,73 @@ typedef struct tFilePagesItem { tFilePage item; } tFilePagesItem; -typedef struct tColModel { - int32_t maxCapacity; - int32_t numOfCols; - int16_t * colOffset; - struct SSchema *pFields; -} tColModel; +typedef struct SSchemaEx { + struct SSchema field; + int16_t offset; +} SSchemaEx; -typedef struct tOrderIdx { - int32_t numOfOrderedCols; +typedef struct SColumnModel { + int32_t capacity; + int32_t numOfCols; + int16_t rowSize; + SSchemaEx *pFields; +} SColumnModel; + +typedef struct SColumnOrderInfo { + int32_t numOfCols; int16_t pData[]; -} tOrderIdx; +} SColumnOrderInfo; typedef struct tOrderDescriptor { - union { - struct tTagSchema *pTagSchema; - tColModel * pSchema; - }; - int32_t tsOrder; // timestamp order type if exists - tOrderIdx orderIdx; + SColumnModel * pColumnModel; + int32_t tsOrder; // timestamp order type if exists + SColumnOrderInfo orderIdx; } tOrderDescriptor; typedef struct tExtMemBuffer { - int32_t nMaxSizeInPages; - + int32_t inMemCapacity; int32_t nElemSize; - int32_t nPageSize; - - int32_t numOfAllElems; + int32_t pageSize; + int32_t numOfTotalElems; int32_t numOfElemsInBuffer; int32_t numOfElemsPerPage; + int16_t numOfInMemPages; - int16_t numOfPagesInMem; tFilePagesItem *pHead; tFilePagesItem *pTail; - tFileMeta fileMeta; - - char dataFilePath[MAX_TMPFILE_PATH_LENGTH]; - FILE *dataFile; - - tColModel *pColModel; + char * path; + FILE * file; + SFileInfo fileMeta; + SColumnModel * pColumnModel; EXT_BUFFER_FLUSH_MODEL flushModel; } tExtMemBuffer; +/** + * + * @param fileNamePattern + * @param dstPath + */ void getTmpfilePath(const char *fileNamePattern, char *dstPath); -/* - * create ext-memory buffer +/** + * + * @param inMemSize + * @param elemSize + * @param pModel + * @return */ -void tExtMemBufferCreate(tExtMemBuffer **pMemBuffer, int32_t numOfBufferSize, int32_t elemSize, - const char *tmpDataFilePath, tColModel *pModel); +tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel); -/* - * destroy ext-memory buffer +/** + * + * @param pMemBuffer + * @return */ -void tExtMemBufferDestroy(tExtMemBuffer **pMemBuffer); +void *destoryExtMemBuffer(tExtMemBuffer *pMemBuffer); -/* +/** * @param pMemBuffer * @param data input data pointer * @param numOfRows number of rows in data @@ -145,12 +147,15 @@ void tExtMemBufferDestroy(tExtMemBuffer **pMemBuffer); */ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRows); -/* - * flush all data into disk and release all in-memory buffer +/** + * + * @param pMemBuffer + * @return */ bool tExtMemBufferFlush(tExtMemBuffer *pMemBuffer); -/* +/** + * * remove all data that has been put into buffer, including in buffer or * ext-buffer(disk) */ @@ -163,11 +168,44 @@ void tExtMemBufferClear(tExtMemBuffer *pMemBuffer); */ bool tExtMemBufferLoadData(tExtMemBuffer *pMemBuffer, tFilePage *pFilePage, int32_t flushIdx, int32_t pageIdx); +/** + * + * @param pMemBuffer + * @return + */ bool tExtMemBufferIsAllDataInMem(tExtMemBuffer *pMemBuffer); -tColModel *tColModelCreate(SSchema *field, int32_t numOfCols, int32_t maxCapacity); +/** + * + * @param fields + * @param numOfCols + * @param blockCapacity + * @return + */ +SColumnModel *createColumnModel(SSchema *fields, int32_t numOfCols, int32_t blockCapacity); + +/** + * + * @param pSrc + * @return + */ +SColumnModel *cloneColumnModel(SColumnModel *pSrc); + +/** + * + * @param pModel + */ +void destroyColumnModel(SColumnModel *pModel); + +/* + * compress data into consecutive block without hole in data + */ +void tColModelCompact(SColumnModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity); + +void tColModelErase(SColumnModel *pModel, tFilePage *inputBuffer, int32_t maxCapacity, int32_t s, int32_t e); +SSchema *getColumnModelSchema(SColumnModel *pColumnModel, int32_t index); -void tColModelDestroy(tColModel *pModel); +int16_t getColumnModelOffset(SColumnModel *pColumnModel, int32_t index); typedef struct SSrcColumnInfo { int32_t functionId; @@ -177,68 +215,18 @@ typedef struct SSrcColumnInfo { /* * display data in column format model for debug purpose only */ -void tColModelDisplay(tColModel *pModel, void *pData, int32_t numOfRows, int32_t maxCount); +void tColModelDisplay(SColumnModel *pModel, void *pData, int32_t numOfRows, int32_t maxCount); -void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32_t maxCount, SSrcColumnInfo *pInfo); +void tColModelDisplayEx(SColumnModel *pModel, void *pData, int32_t numOfRows, int32_t maxCount, SSrcColumnInfo *pInfo); -/* - * compress data into consecutive block without hole in data - */ -void tColModelCompact(tColModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity); - -void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapacity, int32_t s, int32_t e); - -tOrderDescriptor *tOrderDesCreate(int32_t *orderColIdx, int32_t numOfOrderCols, tColModel *pModel, int32_t tsOrderType); +tOrderDescriptor *tOrderDesCreate(const int32_t *orderColIdx, int32_t numOfOrderCols, SColumnModel *pModel, + int32_t tsOrderType); void tOrderDescDestroy(tOrderDescriptor *pDesc); -void tColModelAppend(tColModel *dstModel, tFilePage *dstPage, void *srcData, int32_t srcStartRows, +void tColModelAppend(SColumnModel *dstModel, tFilePage *dstPage, void *srcData, int32_t srcStartRows, int32_t numOfRowsToWrite, int32_t srcCapacity); -/////////////////////////////////////////////////////////////////////////////////////////////////////// -typedef struct MinMaxEntry { - union { - double dMinVal; - int32_t iMinVal; - int64_t i64MinVal; - }; - union { - double dMaxVal; - int32_t iMaxVal; - int64_t i64MaxVal; - }; -} MinMaxEntry; - -typedef struct tMemBucketSegment { - int32_t numOfSlots; - MinMaxEntry * pBoundingEntries; - tExtMemBuffer **pBuffer; -} tMemBucketSegment; - -typedef struct tMemBucket { - int16_t numOfSegs; - int16_t nTotalSlots; - int16_t nSlotsOfSeg; - int16_t dataType; - - int16_t nElemSize; - int32_t numOfElems; - - int32_t nTotalBufferSize; - int32_t maxElemsCapacity; - - int16_t nPageSize; - int16_t numOfTotalPages; - int16_t numOfAvailPages; /* remain available buffer pages */ - - tMemBucketSegment *pSegs; - tOrderDescriptor * pOrderDesc; - - MinMaxEntry nRange; - - void (*HashFunc)(struct tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); -} tMemBucket; - typedef int (*__col_compar_fn_t)(tOrderDescriptor *, int32_t numOfRows, int32_t idx1, int32_t idx2, char *data); void tColDataQSort(tOrderDescriptor *, int32_t numOfRows, int32_t start, int32_t end, char *data, int32_t orderType); @@ -253,19 +241,6 @@ int32_t compare_a(tOrderDescriptor *, int32_t numOfRow1, int32_t s1, char *data1 int32_t compare_d(tOrderDescriptor *, int32_t numOfRow1, int32_t s1, char *data1, int32_t numOfRow2, int32_t s2, char *data2); -tMemBucket* tMemBucketCreate(int32_t totalSlots, int32_t nBufferSize, int16_t nElemSize, - int16_t dataType, tOrderDescriptor *pDesc); - -void tMemBucketDestroy(tMemBucket *pBucket); - -void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows); - -double getPercentile(tMemBucket *pMemBucket, double percent); - -void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); - -void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); - #ifdef __cplusplus } #endif diff --git a/src/inc/thistogram.h b/src/inc/thistogram.h index 7e5b1ccac6c9f1c882e9690398b6526340cf9fde..bb058449e806c8270dbf141ca3a81103f63c6e5c 100644 --- a/src/inc/thistogram.h +++ b/src/inc/thistogram.h @@ -20,8 +20,6 @@ extern "C" { #endif -#include "tskiplist.h" - #define USE_ARRAYLIST #define MAX_HISTOGRAM_BIN 500 diff --git a/src/inc/tinterpolation.h b/src/inc/tinterpolation.h index 22b4ceb4f06799e826c2711c7317b1ebd3197e9d..f4b327bcbec82b2b9ca8e2f5c92b044700240dbc 100644 --- a/src/inc/tinterpolation.h +++ b/src/inc/tinterpolation.h @@ -78,7 +78,7 @@ int32_t taosNumOfRemainPoints(SInterpolationInfo *pInterpoInfo); */ int32_t taosDoInterpoResult(SInterpolationInfo *pInterpoInfo, int16_t interpoType, tFilePage **data, int32_t numOfRawDataInRows, int32_t outputRows, int64_t nInterval, - const int64_t *pPrimaryKeyArray, tColModel *pModel, char **srcData, int64_t *defaultVal, + const int64_t *pPrimaryKeyArray, SColumnModel *pModel, char **srcData, int64_t *defaultVal, const int32_t *functionIDs, int32_t bufSize); int taosDoLinearInterpolation(int32_t type, SPoint *point1, SPoint *point2, SPoint *point); diff --git a/src/inc/tpercentile.h b/src/inc/tpercentile.h new file mode 100644 index 0000000000000000000000000000000000000000..b9cf50e0bbf24357b729f8bc39996f589d6c18fc --- /dev/null +++ b/src/inc/tpercentile.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TPERCENTILE_H +#define TDENGINE_TPERCENTILE_H + +#include "textbuffer.h" + +typedef struct MinMaxEntry { + union { + double dMinVal; + int32_t iMinVal; + int64_t i64MinVal; + }; + union { + double dMaxVal; + int32_t iMaxVal; + int64_t i64MaxVal; + }; +} MinMaxEntry; + +typedef struct tMemBucketSegment { + int32_t numOfSlots; + MinMaxEntry * pBoundingEntries; + tExtMemBuffer **pBuffer; +} tMemBucketSegment; + +typedef struct tMemBucket { + int16_t numOfSegs; + int16_t nTotalSlots; + int16_t nSlotsOfSeg; + int16_t dataType; + + int16_t nElemSize; + int32_t numOfElems; + + int32_t nTotalBufferSize; + int32_t maxElemsCapacity; + + int16_t pageSize; + int16_t numOfTotalPages; + int16_t numOfAvailPages; /* remain available buffer pages */ + + tMemBucketSegment *pSegs; + tOrderDescriptor * pOrderDesc; + + MinMaxEntry nRange; + + void (*HashFunc)(struct tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); +} tMemBucket; + +tMemBucket *tMemBucketCreate(int32_t totalSlots, int32_t nBufferSize, int16_t nElemSize, int16_t dataType, + tOrderDescriptor *pDesc); + +void tMemBucketDestroy(tMemBucket *pBucket); + +void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows); + +double getPercentile(tMemBucket *pMemBucket, double percent); + +void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); + +void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); + +#endif // TDENGINE_TPERCENTILE_H diff --git a/src/inc/tresultBuf.h b/src/inc/tresultBuf.h new file mode 100644 index 0000000000000000000000000000000000000000..a464479af27a7e8515f4260c0ea6a73aed780933 --- /dev/null +++ b/src/inc/tresultBuf.h @@ -0,0 +1,104 @@ +#ifndef TDENGINE_VNODEQUERYUTIL_H +#define TDENGINE_VNODEQUERYUTIL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "os.h" +#include "textbuffer.h" + +typedef struct SIDList { + uint32_t alloc; + int32_t size; + int32_t* pData; +} SIDList; + +typedef struct SQueryResultBuf { + int32_t numOfRowsPerPage; + int32_t numOfPages; + int64_t totalBufSize; + int32_t fd; // data file fd + int32_t allocateId; // allocated page id + int32_t incStep; // minimum allocated pages + char* pBuf; // mmap buffer pointer + char* path; // file path + + uint32_t numOfAllocGroupIds; // number of allocated id list + void* idsTable; // id hash table + SIDList* list; // for each id, there is a page id list +} SQueryResultBuf; + +/** + * create disk-based result buffer + * @param pResultBuf + * @param size + * @param rowSize + * @return + */ +int32_t createResultBuf(SQueryResultBuf** pResultBuf, int32_t size, int32_t rowSize); + +/** + * + * @param pResultBuf + * @param groupId + * @param pageId + * @return + */ +tFilePage* getNewDataBuf(SQueryResultBuf* pResultBuf, int32_t groupId, int32_t* pageId); + +/** + * + * @param pResultBuf + * @return + */ +int32_t getNumOfRowsPerPage(SQueryResultBuf* pResultBuf); + +/** + * + * @param pResultBuf + * @param groupId + * @return + */ +SIDList getDataBufPagesIdList(SQueryResultBuf* pResultBuf, int32_t groupId); + +/** + * get the specified buffer page by id + * @param pResultBuf + * @param id + * @return + */ +tFilePage* getResultBufferPageById(SQueryResultBuf* pResultBuf, int32_t id); + +/** + * get the total buffer size in the format of disk file + * @param pResultBuf + * @return + */ +int32_t getResBufSize(SQueryResultBuf* pResultBuf); + +/** + * get the number of groups in the result buffer + * @param pResultBuf + * @return + */ +int32_t getNumOfResultBufGroupId(SQueryResultBuf* pResultBuf); + +/** + * destroy result buffer + * @param pResultBuf + */ +void destroyResultBuf(SQueryResultBuf* pResultBuf); + +/** + * + * @param pList + * @return + */ +int32_t getLastPageId(SIDList *pList); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_VNODEQUERYUTIL_H diff --git a/src/system/detail/inc/vnodeQueryImpl.h b/src/system/detail/inc/vnodeQueryImpl.h index dc86f924aa1655291cb9ee97c2b23d8386ae46a7..40b65aa16375a81bb4a46f61967b2576d5af6189 100644 --- a/src/system/detail/inc/vnodeQueryImpl.h +++ b/src/system/detail/inc/vnodeQueryImpl.h @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODEQUERYUTIL_H -#define TDENGINE_VNODEQUERYUTIL_H +#ifndef TDENGINE_VNODEQUERYIMPL_H +#define TDENGINE_VNODEQUERYIMPL_H #ifdef __cplusplus extern "C" { @@ -120,7 +120,7 @@ typedef enum { typedef int (*__block_search_fn_t)(char* data, int num, int64_t key, int order); static FORCE_INLINE SMeterObj* getMeterObj(void* hashHandle, int32_t sid) { - return *(SMeterObj**)taosGetDataFromHash(hashHandle, (const char*) &sid, sizeof(sid)); + return *(SMeterObj**)taosGetDataFromHashTable(hashHandle, (const char*) &sid, sizeof(sid)); } bool isQueryKilled(SQuery* pQuery); @@ -209,7 +209,7 @@ int32_t vnodeGetHeaderFile(SQueryRuntimeEnv *pRuntimeEnv, int32_t fileIndex); * @param ekey * @return */ -SMeterQueryInfo* createMeterQueryInfo(SQuery* pQuery, TSKEY skey, TSKEY ekey); +SMeterQueryInfo* createMeterQueryInfo(SQuery* pQuery, int32_t sid, TSKEY skey, TSKEY ekey); /** * Destroy meter query info @@ -224,7 +224,7 @@ void destroyMeterQueryInfo(SMeterQueryInfo *pMeterQueryInfo, int32_t numOfCols); * @param skey * @param ekey */ -void changeMeterQueryInfoForSuppleQuery(SMeterQueryInfo *pMeterQueryInfo, TSKEY skey, TSKEY ekey); +void changeMeterQueryInfoForSuppleQuery(SQueryResultBuf* pResultBuf, SMeterQueryInfo *pMeterQueryInfo, TSKEY skey, TSKEY ekey); /** * add the new allocated disk page to meter query info @@ -276,11 +276,11 @@ void displayInterResult(SData** pdata, SQuery* pQuery, int32_t numOfRows); void vnodePrintQueryStatistics(SMeterQuerySupportObj* pSupporter); -void clearGroupResultBuf(SOutputRes* pOneOutputRes, int32_t nOutputCols); -void copyGroupResultBuf(SOutputRes* dst, const SOutputRes* src, int32_t nOutputCols); +void clearGroupResultBuf(SQueryRuntimeEnv *pRuntimeEnv, SOutputRes *pOneOutputRes); +void copyGroupResultBuf(SQueryRuntimeEnv *pRuntimeEnv, SOutputRes* dst, const SOutputRes* src); -void resetSlidingWindowInfo(SSlidingWindowInfo* pSlidingWindowInfo, int32_t numOfCols); -void clearCompletedSlidingWindows(SSlidingWindowInfo* pSlidingWindowInfo, int32_t numOfCols); +void resetSlidingWindowInfo(SQueryRuntimeEnv *pRuntimeEnv, SSlidingWindowInfo* pSlidingWindowInfo); +void clearCompletedSlidingWindows(SQueryRuntimeEnv* pRuntimeEnv); int32_t numOfClosedSlidingWindow(SSlidingWindowInfo* pSlidingWindowInfo); void closeSlidingWindow(SSlidingWindowInfo* pSlidingWindowInfo, int32_t slot); void closeAllSlidingWindow(SSlidingWindowInfo* pSlidingWindowInfo); @@ -289,4 +289,4 @@ void closeAllSlidingWindow(SSlidingWindowInfo* pSlidingWindowInfo); } #endif -#endif // TDENGINE_VNODEQUERYUTIL_H +#endif // TDENGINE_VNODEQUERYIMPL_H diff --git a/src/system/detail/inc/vnodeRead.h b/src/system/detail/inc/vnodeRead.h index ee88e5e36681a2a9d22faa5f757fa3c4f1f57a59..bda53cd3d86161b172a97b03b81f19dc3438f6b5 100644 --- a/src/system/detail/inc/vnodeRead.h +++ b/src/system/detail/inc/vnodeRead.h @@ -21,6 +21,7 @@ extern "C" { #endif #include "os.h" +#include "tresultBuf.h" #include "tinterpolation.h" #include "vnodeTagMgmt.h" @@ -84,10 +85,15 @@ typedef struct SQueryCostSummary { int64_t tmpBufferInDisk; // size of buffer for intermediate result } SQueryCostSummary; +typedef struct SPosInfo { + int64_t pageId; + int32_t rowId; +} SPosInfo; + typedef struct SOutputRes { uint16_t numOfRows; int32_t nAlloc; - tFilePage** result; + SPosInfo pos; SResultInfo* resultInfo; } SOutputRes; @@ -159,7 +165,7 @@ typedef struct SQueryRuntimeEnv { SInterpolationInfo interpoInfo; SData** pInterpoBuf; - SSlidingWindowInfo swindowResInfo; + SSlidingWindowInfo swindowResInfo; STSBuf* pTSBuf; STSCursor cur; @@ -174,16 +180,17 @@ typedef struct SQueryRuntimeEnv { * So we keep a copy of the support structure as well as the cache block data itself. */ SCacheBlock cacheBlock; + + SQueryResultBuf* pResultBuf; + bool stableQuery; // is super table query or not } SQueryRuntimeEnv; -/* intermediate result during multimeter query involves interval */ +/* intermediate pos during multimeter query involves interval */ typedef struct SMeterQueryInfo { int64_t lastKey; int64_t skey; int64_t ekey; int32_t numOfRes; - uint32_t numOfPages; - uint32_t numOfAlloc; int32_t reverseIndex; // reversed output indicator, start from (numOfRes-1) int16_t reverseFillRes; // denote if reverse fill the results in supplementary scan required or not int16_t queryRangeSet; // denote if the query range is set, only available for interval query @@ -191,7 +198,7 @@ typedef struct SMeterQueryInfo { int64_t tag; STSCursor cur; SResultInfo* resultInfo; - uint32_t* pageList; + int32_t sid; // for retrieve the page id list } SMeterQueryInfo; typedef struct SMeterDataInfo { @@ -235,16 +242,8 @@ typedef struct SMeterQuerySupportObj { */ int32_t meterIdx; - int32_t meterOutputFd; - int32_t lastPageId; - int32_t numOfPages; int32_t numOfGroupResultPages; int32_t groupResultSize; - - char* meterOutputMMapBuf; - int64_t bufSize; - char extBufFile[256]; // external file name - SMeterDataInfo* pMeterDataInfo; TSKEY* tsList; diff --git a/src/system/detail/inc/vnodeTagMgmt.h b/src/system/detail/inc/vnodeTagMgmt.h index 320ef5645395e08578d206b4f9db89809b56dafb..b801d1c5412d68ff52fc5e9c0ad33b8b23a6a469 100644 --- a/src/system/detail/inc/vnodeTagMgmt.h +++ b/src/system/detail/inc/vnodeTagMgmt.h @@ -32,21 +32,14 @@ extern "C" { * Note: * 1. we implement a quick sort algorithm, may remove it later. */ - -typedef struct tTagSchema { - struct SSchema *pSchema; - int32_t numOfCols; - int32_t colOffset[]; -} tTagSchema; - typedef struct tSidSet { int32_t numOfSids; int32_t numOfSubSet; SMeterSidExtInfo **pSids; int32_t * starterPos; // position of each subgroup, generated according to - tTagSchema *pTagSchema; - tOrderIdx orderIdx; + SColumnModel *pColumnModel; + SColumnOrderInfo orderIdx; } tSidSet; typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, void *param); @@ -54,8 +47,6 @@ typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, void *param tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOfMeters, SSchema *pSchema, int32_t numOfTags, SColIndexEx *colList, int32_t numOfOrderCols); -tTagSchema *tCreateTagSchema(SSchema *pSchema, int32_t numOfTagCols); - int32_t *calculateSubGroup(void **pSids, int32_t numOfMeters, int32_t *numOfSubset, tOrderDescriptor *pOrderDesc, __ext_compar_fn_t compareFn); diff --git a/src/system/detail/src/mgmtDnodeInt.c b/src/system/detail/src/mgmtDnodeInt.c index a1dae7738f8f0f32b6c22666e62aff89e13f2c16..1a6d0c9c09f6db94cb885387ee917024c2847cd2 100644 --- a/src/system/detail/src/mgmtDnodeInt.c +++ b/src/system/detail/src/mgmtDnodeInt.c @@ -210,7 +210,7 @@ char *mgmtBuildCreateMeterIe(STabObj *pMeter, char *pMsg, int vnode) { for (int i = 0; i < pMeter->numOfColumns; ++i) { pCreateMeter->schema[i].type = pSchema[i].type; - /* strcpy(pCreateMeter->schema[i].name, pSchema[i].name); */ + /* strcpy(pCreateMeter->schema[i].name, pColumnModel[i].name); */ pCreateMeter->schema[i].bytes = htons(pSchema[i].bytes); pCreateMeter->schema[i].colId = htons(pSchema[i].colId); } diff --git a/src/system/detail/src/mgmtSupertableQuery.c b/src/system/detail/src/mgmtSupertableQuery.c index 1b7ae66e6d5b01dd15e0dc26d91a03e37a2e29ab..347b54595eea5ab786d622e68c2befd122f76e07 100644 --- a/src/system/detail/src/mgmtSupertableQuery.c +++ b/src/system/detail/src/mgmtSupertableQuery.c @@ -70,7 +70,7 @@ static int32_t tabObjResultComparator(const void* p1, const void* p2, void* para STabObj* pNode1 = (STabObj*)p1; STabObj* pNode2 = (STabObj*)p2; - for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfOrderedCols; ++i) { + for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfCols; ++i) { int32_t colIdx = pOrderDesc->orderIdx.pData[i]; char* f1 = NULL; @@ -86,7 +86,9 @@ static int32_t tabObjResultComparator(const void* p1, const void* p2, void* para } else { f1 = mgmtMeterGetTag(pNode1, colIdx, NULL); f2 = mgmtMeterGetTag(pNode2, colIdx, &schema); - assert(schema.type == pOrderDesc->pTagSchema->pSchema[colIdx].type); + + SSchema* pSchema = getColumnModelSchema(pOrderDesc->pColumnModel, colIdx); + assert(schema.type == pSchema->type); } int32_t ret = doCompare(f1, f2, schema.type, schema.bytes); @@ -109,7 +111,7 @@ static int32_t tabObjResultComparator(const void* p1, const void* p2, void* para * @param pOrderIndexInfo * @param numOfTags */ -static void mgmtUpdateOrderTagColIndex(SMetricMetaMsg* pMetricMetaMsg, int32_t tableIndex, tOrderIdx* pOrderIndexInfo, +static void mgmtUpdateOrderTagColIndex(SMetricMetaMsg* pMetricMetaMsg, int32_t tableIndex, SColumnOrderInfo* pOrderIndexInfo, int32_t numOfTags) { SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pMetricMetaMsg + pMetricMetaMsg->metaElem[tableIndex]); SColIndexEx* groupColumnList = (SColIndexEx*)((char*)pMetricMetaMsg + pElem->groupbyTagColumnList); @@ -123,7 +125,7 @@ static void mgmtUpdateOrderTagColIndex(SMetricMetaMsg* pMetricMetaMsg, int32_t t } } - pOrderIndexInfo->numOfOrderedCols = numOfGroupbyTags; + pOrderIndexInfo->numOfCols = numOfGroupbyTags; } // todo merge sort function with losertree used @@ -143,14 +145,14 @@ void mgmtReorganizeMetersInMetricMeta(SMetricMetaMsg* pMetricMetaMsg, int32_t ta */ tOrderDescriptor* descriptor = (tOrderDescriptor*)calloc(1, sizeof(tOrderDescriptor) + sizeof(int32_t) * pElem->numOfGroupCols); - descriptor->pTagSchema = tCreateTagSchema(pTagSchema, pMetric->numOfTags); - descriptor->orderIdx.numOfOrderedCols = pElem->numOfGroupCols; + descriptor->pColumnModel = createColumnModel(pTagSchema, pMetric->numOfTags, 1); + descriptor->orderIdx.numOfCols = pElem->numOfGroupCols; int32_t* startPos = NULL; int32_t numOfSubset = 1; mgmtUpdateOrderTagColIndex(pMetricMetaMsg, tableIndex, &descriptor->orderIdx, pMetric->numOfTags); - if (descriptor->orderIdx.numOfOrderedCols > 0) { + if (descriptor->orderIdx.numOfCols > 0) { tQSortEx(pRes->pRes, POINTER_BYTES, 0, pRes->num - 1, descriptor, tabObjResultComparator); startPos = calculateSubGroup(pRes->pRes, pRes->num, &numOfSubset, descriptor, tabObjResultComparator); } else { @@ -166,7 +168,7 @@ void mgmtReorganizeMetersInMetricMeta(SMetricMetaMsg* pMetricMetaMsg, int32_t ta */ qsort(pRes->pRes, (size_t)pRes->num, POINTER_BYTES, tabObjVGIDComparator); - free(descriptor->pTagSchema); + free(descriptor->pColumnModel); free(descriptor); free(startPos); } @@ -291,15 +293,15 @@ static void orderResult(SMetricMetaMsg* pMetricMetaMsg, tQueryResultset* pRes, i STabObj* pMetric = mgmtGetMeter(pElem->meterId); SSchema* pTagSchema = (SSchema*)(pMetric->schema + pMetric->numOfColumns * sizeof(SSchema)); - descriptor->pTagSchema = tCreateTagSchema(pTagSchema, pMetric->numOfTags); + descriptor->pColumnModel = createColumnModel(pTagSchema, pMetric->numOfTags, 1); descriptor->orderIdx.pData[0] = colIndex; - descriptor->orderIdx.numOfOrderedCols = 1; + descriptor->orderIdx.numOfCols = 1; // sort results list tQSortEx(pRes->pRes, POINTER_BYTES, 0, pRes->num - 1, descriptor, tabObjResultComparator); - free(descriptor->pTagSchema); + free(descriptor->pColumnModel); free(descriptor); } diff --git a/src/system/detail/src/vnodeFile.c b/src/system/detail/src/vnodeFile.c index 9c53d47507b18582333aa64abc9f39fb1cab5407..8ac2f212112f38b84dd8998e0b7353e091a46d57 100644 --- a/src/system/detail/src/vnodeFile.c +++ b/src/system/detail/src/vnodeFile.c @@ -127,7 +127,7 @@ int vnodeCreateHeadDataFile(int vnode, int fileId, char *headName, char *dataNam if (symlink(dDataName, dataName) != 0) return -1; if (symlink(dLastName, lastName) != 0) return -1; - dPrint("vid:%d, fileId:%d, empty header file:%s dataFile:%s lastFile:%s on disk:%s is created ", + dPrint("vid:%d, fileId:%d, empty header file:%s file:%s lastFile:%s on disk:%s is created ", vnode, fileId, headName, dataName, lastName, path); return 0; diff --git a/src/system/detail/src/vnodeQueryImpl.c b/src/system/detail/src/vnodeQueryImpl.c index 81fce50bd6f950c2f50fc60efb4ce251481855bb..33fb3fe760de8b6dc27079d8da231b4291fc2c7e 100644 --- a/src/system/detail/src/vnodeQueryImpl.c +++ b/src/system/detail/src/vnodeQueryImpl.c @@ -56,7 +56,7 @@ static void vnodeInitLoadCompBlockInfo(SLoadCompBlockInfo *pCompBlockLoadInfo static int32_t moveToNextBlock(SQueryRuntimeEnv *pRuntimeEnv, int32_t step, __block_search_fn_t searchFn, bool loadData); static int32_t doMergeMetersResultsToGroupRes(SMeterQuerySupportObj *pSupporter, SQuery *pQuery, - SQueryRuntimeEnv *pRuntimeEnv, SMeterDataInfo *pMeterHeadDataInfo, + SQueryRuntimeEnv *pRuntimeEnv, SMeterDataInfo *pMeterDataInfo, int32_t start, int32_t end); static TSKEY getTimestampInCacheBlock(SQueryRuntimeEnv *pRuntimeEnv, SCacheBlock *pBlock, int32_t index); @@ -68,9 +68,6 @@ static int32_t getNextDataFileCompInfo(SQueryRuntimeEnv *pRuntimeEnv, SMeterObj static void setGroupOutputBuffer(SQueryRuntimeEnv *pRuntimeEnv, SOutputRes *pResult); static void getAlignedIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, TSKEY keyInData, TSKEY skey, TSKEY ekey); -static void doApplyIntervalQueryOnBlock(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQueryInfo, - SBlockInfo *pBlockInfo, int64_t *pPrimaryCol, SField *pFields, - __block_search_fn_t searchFn); static int32_t saveResult(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQueryInfo, int32_t numOfResult); static void applyIntervalQueryOnBlock(SMeterQuerySupportObj *pSupporter, SMeterDataInfo *pMeterDataInfo, @@ -589,7 +586,8 @@ static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, int64_t StartQue char *primaryColumnData, int32_t size, int32_t functionId, SField *pField, bool hasNull, int32_t blockStatus, void *param, int32_t scanFlag); -void createGroupResultBuf(SQuery *pQuery, SOutputRes *pOneResult, bool isMetricQuery); +void createQueryResultBuf(SQueryRuntimeEnv *pRuntimeEnv, SOutputRes *pResultRow, bool isSTableQuery, SPosInfo *posInfo); + static void destroyGroupResultBuf(SOutputRes *pOneOutputRes, int32_t nOutputCols); static int32_t binarySearchForBlockImpl(SCompBlock *pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) { @@ -1576,7 +1574,7 @@ static SOutputRes *doSetSlidingWindowFromKey(SSlidingWindowInfo *pSlidingWindowI SWindowStatus **pStatus) { int32_t p = -1; - int32_t *p1 = (int32_t *)taosGetDataFromHash(pSlidingWindowInfo->hashList, pData, bytes); + int32_t *p1 = (int32_t *)taosGetDataFromHashTable(pSlidingWindowInfo->hashList, pData, bytes); if (p1 != NULL) { p = *p1; @@ -1614,7 +1612,7 @@ static SOutputRes *doSetSlidingWindowFromKey(SSlidingWindowInfo *pSlidingWindowI return &pSlidingWindowInfo->pResult[p]; } -static int32_t initSlidingWindowInfo(SSlidingWindowInfo *pSlidingWindowInfo, int32_t threshold, int16_t type, +static int32_t initSlidingWindowInfo(SSlidingWindowInfo *pSlidingWindowInfo, int32_t threshold, int16_t type, int32_t rowSizes, SOutputRes *pRes) { pSlidingWindowInfo->capacity = threshold; pSlidingWindowInfo->threshold = threshold; @@ -1627,8 +1625,18 @@ static int32_t initSlidingWindowInfo(SSlidingWindowInfo *pSlidingWindowInfo, int pSlidingWindowInfo->curIndex = -1; pSlidingWindowInfo->size = 0; pSlidingWindowInfo->pResult = pRes; - pSlidingWindowInfo->pStatus = calloc(threshold, sizeof(SWindowStatus)); +// createResultBuf(&pSlidingWindowInfo->pResultBuf, 10, rowSizes); + + pSlidingWindowInfo->pStatus = calloc(threshold, sizeof(SWindowStatus)); +// pSlidingWindowInfo->pResultInfo = calloc(threshold, POINTER_BYTES); + +// for(int32_t i = 0; i < threshold; ++i) { +// pSlidingWindowInfo->pResultInfo[i] = calloc((size_t)numOfOutput, sizeof(SResultInfo)); + + +// } + if (pSlidingWindowInfo->pStatus == NULL || pSlidingWindowInfo->hashList == NULL) { return -1; } @@ -1643,17 +1651,19 @@ static void destroySlidingWindowInfo(SSlidingWindowInfo *pSlidingWindowInfo) { } taosCleanUpHashTable(pSlidingWindowInfo->hashList); +// destroyResultBuf(pSlidingWindowInfo->pResultBuf); + tfree(pSlidingWindowInfo->pStatus); } -void resetSlidingWindowInfo(SSlidingWindowInfo *pSlidingWindowInfo, int32_t numOfCols) { +void resetSlidingWindowInfo(SQueryRuntimeEnv *pRuntimeEnv, SSlidingWindowInfo *pSlidingWindowInfo) { if (pSlidingWindowInfo == NULL || pSlidingWindowInfo->capacity == 0) { return; } for (int32_t i = 0; i < pSlidingWindowInfo->size; ++i) { SOutputRes *pOneRes = &pSlidingWindowInfo->pResult[i]; - clearGroupResultBuf(pOneRes, numOfCols); + clearGroupResultBuf(pRuntimeEnv, pOneRes); } memset(pSlidingWindowInfo->pStatus, 0, sizeof(SWindowStatus) * pSlidingWindowInfo->capacity); @@ -1669,7 +1679,8 @@ void resetSlidingWindowInfo(SSlidingWindowInfo *pSlidingWindowInfo, int32_t numO pSlidingWindowInfo->prevSKey = 0; } -void clearCompletedSlidingWindows(SSlidingWindowInfo *pSlidingWindowInfo, int32_t numOfCols) { +void clearCompletedSlidingWindows(SQueryRuntimeEnv* pRuntimeEnv) { + SSlidingWindowInfo* pSlidingWindowInfo = &pRuntimeEnv->swindowResInfo; if (pSlidingWindowInfo == NULL || pSlidingWindowInfo->capacity == 0 || pSlidingWindowInfo->size == 0) { return; } @@ -1689,24 +1700,25 @@ void clearCompletedSlidingWindows(SSlidingWindowInfo *pSlidingWindowInfo, int32_ } int32_t remain = pSlidingWindowInfo->size - i; + //clear remain list memmove(pSlidingWindowInfo->pStatus, &pSlidingWindowInfo->pStatus[i], remain * sizeof(SWindowStatus)); memset(&pSlidingWindowInfo->pStatus[remain], 0, (pSlidingWindowInfo->capacity - remain) * sizeof(SWindowStatus)); for(int32_t k = 0; k < remain; ++k) { - copyGroupResultBuf(&pSlidingWindowInfo->pResult[k], &pSlidingWindowInfo->pResult[i + k], numOfCols); + copyGroupResultBuf(pRuntimeEnv, &pSlidingWindowInfo->pResult[k], &pSlidingWindowInfo->pResult[i + k]); } for(int32_t k = remain; k < pSlidingWindowInfo->size; ++k) { SOutputRes *pOneRes = &pSlidingWindowInfo->pResult[k]; - clearGroupResultBuf(pOneRes, numOfCols); + clearGroupResultBuf(pRuntimeEnv, pOneRes); } pSlidingWindowInfo->size = remain; for(int32_t k = 0; k < pSlidingWindowInfo->size; ++k) { SWindowStatus* pStatus = &pSlidingWindowInfo->pStatus[k]; - int32_t *p = (int32_t*) taosGetDataFromHash(pSlidingWindowInfo->hashList, (const char*)&pStatus->window.skey, TSDB_KEYSIZE); + int32_t *p = (int32_t*) taosGetDataFromHashTable(pSlidingWindowInfo->hashList, (const char*)&pStatus->window.skey, TSDB_KEYSIZE); int32_t v = *p; v = (v - i); @@ -1720,14 +1732,12 @@ void clearCompletedSlidingWindows(SSlidingWindowInfo *pSlidingWindowInfo, int32_ } int32_t numOfClosedSlidingWindow(SSlidingWindowInfo *pSlidingWindowInfo) { - for (int32_t i = 0; i < pSlidingWindowInfo->size; ++i) { - SWindowStatus *pStatus = &pSlidingWindowInfo->pStatus[i]; - if (pStatus->closed == false) { - return i; - } + int32_t i = 0; + while(i < pSlidingWindowInfo->size && pSlidingWindowInfo->pStatus[i].closed) { + ++i; } - return 0; + return i; } void closeSlidingWindow(SSlidingWindowInfo* pSlidingWindowInfo, int32_t slot) { @@ -2469,20 +2479,21 @@ void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, int64_t startQueryTimes } // set the output buffer for the selectivity + tag query -static void setCtxTagColumnInfo(SQuery *pQuery, SQueryRuntimeEnv *pRuntimeEnv) { +static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) { if (isSelectivityWithTagsQuery(pQuery)) { int32_t num = 0; - SQLFunctionCtx *pCtx = NULL; + SQLFunctionCtx *p = NULL; + int16_t tagLen = 0; SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutputCols, POINTER_BYTES); for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { SSqlFuncExprMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].pBase; if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) { - tagLen += pRuntimeEnv->pCtx[i].outputBytes; - pTagCtx[num++] = &pRuntimeEnv->pCtx[i]; + tagLen += pCtx[i].outputBytes; + pTagCtx[num++] = &pCtx[i]; } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) { - pCtx = &pRuntimeEnv->pCtx[i]; + p = &pCtx[i]; } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) { // tag function may be the group by tag column // ts may be the required primary timestamp column @@ -2492,14 +2503,14 @@ static void setCtxTagColumnInfo(SQuery *pQuery, SQueryRuntimeEnv *pRuntimeEnv) { } } - pCtx->tagInfo.pTagCtxList = pTagCtx; - pCtx->tagInfo.numOfTagCols = num; - pCtx->tagInfo.tagsLen = tagLen; + p->tagInfo.pTagCtxList = pTagCtx; + p->tagInfo.numOfTagCols = num; + p->tagInfo.tagsLen = tagLen; } } static int32_t setupQueryRuntimeEnv(SMeterObj *pMeterObj, SQuery *pQuery, SQueryRuntimeEnv *pRuntimeEnv, - SSchema *pTagsSchema, int16_t order, bool isMetricQuery) { + SColumnModel *pTagsSchema, int16_t order, bool isSTableQuery) { dTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pQuery)); pRuntimeEnv->pMeterObj = pMeterObj; @@ -2520,8 +2531,10 @@ static int32_t setupQueryRuntimeEnv(SMeterObj *pMeterObj, SQuery *pQuery, SQuery SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; if (TSDB_COL_IS_TAG(pSqlFuncMsg->colInfo.flag)) { // process tag column info - pCtx->inputType = pTagsSchema[pColIndexEx->colIdx].type; - pCtx->inputBytes = pTagsSchema[pColIndexEx->colIdx].bytes; + SSchema* pSchema = getColumnModelSchema(pTagsSchema, pColIndexEx->colIdx); + + pCtx->inputType = pSchema->type; + pCtx->inputBytes = pSchema->bytes; } else { pCtx->inputType = GET_COLUMN_TYPE(pQuery, i); pCtx->inputBytes = GET_COLUMN_BYTES(pQuery, i); @@ -2567,15 +2580,15 @@ static int32_t setupQueryRuntimeEnv(SMeterObj *pMeterObj, SQuery *pQuery, SQuery // set the intermediate result output buffer SResultInfo *pResInfo = &pRuntimeEnv->resultInfo[i]; - setResultInfoBuf(pResInfo, pQuery->pSelectExpr[i].interResBytes, isMetricQuery); + setResultInfoBuf(pResInfo, pQuery->pSelectExpr[i].interResBytes, isSTableQuery); } // if it is group by normal column, do not set output buffer, the output buffer is pResult - if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isMetricQuery) { + if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isSTableQuery) { resetCtxOutputBuf(pRuntimeEnv); } - setCtxTagColumnInfo(pQuery, pRuntimeEnv); + setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx); // for loading block data in memory assert(vnodeList[pMeterObj->vnode].cfg.rowsInFileBlock == pMeterObj->pointsPerFileBlock); @@ -2651,6 +2664,7 @@ static int64_t getOldestKey(int32_t numOfFiles, int64_t fileId, SVnodeCfg *pCfg) } bool isQueryKilled(SQuery *pQuery) { + return false; SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); /* @@ -4120,21 +4134,29 @@ static void allocMemForInterpo(SMeterQuerySupportObj *pSupporter, SQuery *pQuery } } -static int32_t allocateOutputBufForGroup(SMeterQuerySupportObj *pSupporter, SQuery *pQuery, bool isMetricQuery) { - int32_t slot = 0; +static int32_t createQueryOutputBuffer(SMeterQuerySupportObj *pSupporter, SQuery *pQuery, bool isSTableQuery) { + SQueryRuntimeEnv* pRuntimeEnv = &pSupporter->runtimeEnv; + + int32_t numOfRows = 0; if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (pQuery->nAggTimeInterval > 0 && pQuery->slidingTime > 0)) { - slot = 10000; + numOfRows = 10000; } else { - slot = pSupporter->pSidSet->numOfSubSet; + numOfRows = pSupporter->pSidSet->numOfSubSet; } - pSupporter->pResult = calloc(1, sizeof(SOutputRes) * slot); + createResultBuf(&pRuntimeEnv->pResultBuf, 100, pQuery->rowSize); + + // total number of initial results + pSupporter->pResult = calloc(numOfRows, sizeof(SOutputRes)); if (pSupporter->pResult == NULL) { return TSDB_CODE_SERV_OUT_OF_MEMORY; } - for (int32_t k = 0; k < slot; ++k) { + int32_t pageId = -1; + tFilePage* page = NULL; + + for (int32_t k = 0; k < numOfRows; ++k) { SOutputRes *pOneRes = &pSupporter->pResult[k]; pOneRes->nAlloc = 1; @@ -4142,14 +4164,23 @@ static int32_t allocateOutputBufForGroup(SMeterQuerySupportObj *pSupporter, SQue * for single table top/bottom query, the output for group by normal column, the output rows is * equals to the maximum rows, instead of 1. */ - if (!isMetricQuery && isTopBottomQuery(pQuery)) { + if (!isSTableQuery && isTopBottomQuery(pQuery)) { assert(pQuery->numOfOutputCols > 1); SSqlFunctionExpr *pExpr = &pQuery->pSelectExpr[1]; pOneRes->nAlloc = pExpr->pBase.arg[0].argValue.i64; } - createGroupResultBuf(pQuery, pOneRes, isMetricQuery); + if (page == NULL || page->numOfElems >= pRuntimeEnv->numOfRowsPerPage) { + page = getNewDataBuf(pRuntimeEnv->pResultBuf, 0, &pageId); + } + + assert(pageId >= 0); + + SPosInfo posInfo = {.pageId = pageId, .rowId = page->numOfElems}; + + createQueryResultBuf(pRuntimeEnv, pOneRes, isSTableQuery, &posInfo); + page->numOfElems += 1; // next row is available } return TSDB_CODE_SUCCESS; @@ -4211,6 +4242,32 @@ _error_clean: return TSDB_CODE_SERV_OUT_OF_MEMORY; } +static int32_t getRowParamForMultiRowsOutput(SQuery* pQuery, bool isSTableQuery) { + int32_t rowparam = 1; + + if (isTopBottomQuery(pQuery) && (!isSTableQuery)) { + rowparam = pQuery->pSelectExpr[1].pBase.arg->argValue.i64; + } + + return rowparam; +} + +static int32_t getNumOfRowsInResultPage(SQuery* pQuery, bool isSTableQuery) { + int32_t rowSize = pQuery->rowSize * getRowParamForMultiRowsOutput(pQuery, isSTableQuery); + return (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / rowSize; +} + +static char* getPosInResultPage(SQueryRuntimeEnv* pRuntimeEnv, int32_t columnIndex, SOutputRes* pResult) { + SQuery* pQuery = pRuntimeEnv->pQuery; + tFilePage* page = getResultBufferPageById(pRuntimeEnv->pResultBuf, pResult->pos.pageId); + + int32_t numOfRows = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->stableQuery); + int32_t realRowId = pResult->pos.rowId * getRowParamForMultiRowsOutput(pQuery, pRuntimeEnv->stableQuery); + + return ((char*)page->data) + pRuntimeEnv->offset[columnIndex] * numOfRows + + pQuery->pSelectExpr[columnIndex].resBytes * realRowId; +} + int32_t vnodeQuerySingleMeterPrepare(SQInfo *pQInfo, SMeterObj *pMeterObj, SMeterQuerySupportObj *pSupporter, void *param) { SQuery *pQuery = &pQInfo->query; @@ -4280,9 +4337,10 @@ int32_t vnodeQuerySingleMeterPrepare(SQInfo *pQInfo, SMeterObj *pMeterObj, SMete } vnodeRecordAllFiles(pQInfo, pMeterObj->vnode); - + + pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, false); if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (pQuery->nAggTimeInterval > 0 && pQuery->slidingTime > 0)) { - if ((code = allocateOutputBufForGroup(pSupporter, pQuery, false)) != TSDB_CODE_SUCCESS) { + if ((code = createQueryOutputBuffer(pSupporter, pQuery, false)) != TSDB_CODE_SUCCESS) { return code; } @@ -4293,8 +4351,7 @@ int32_t vnodeQuerySingleMeterPrepare(SQInfo *pQInfo, SMeterObj *pMeterObj, SMete type = TSDB_DATA_TYPE_TIMESTAMP; } - // todo bug! - initSlidingWindowInfo(&pRuntimeEnv->swindowResInfo, 3, type, pSupporter->pResult); + initSlidingWindowInfo(&pRuntimeEnv->swindowResInfo, 3, type, pQuery->rowSize, pSupporter->pResult); } pSupporter->rawSKey = pQuery->skey; @@ -4404,6 +4461,8 @@ int32_t vnodeQuerySingleMeterPrepare(SQInfo *pQInfo, SMeterObj *pMeterObj, SMete // the pQuery->skey is changed during normalizedFirstQueryRange, so set the newest lastkey value pQuery->lastKey = pQuery->skey; + pRuntimeEnv->stableQuery = false; + return TSDB_CODE_SUCCESS; } @@ -4437,15 +4496,6 @@ void vnodeQueryFreeQInfoEx(SQInfo *pQInfo) { } } - if (FD_VALID(pSupporter->meterOutputFd)) { - assert(pSupporter->meterOutputMMapBuf != NULL); - dTrace("QInfo:%p disk-based output buffer during query:%" PRId64 " bytes", pQInfo, pSupporter->bufSize); - munmap(pSupporter->meterOutputMMapBuf, pSupporter->bufSize); - tclose(pSupporter->meterOutputFd); - - unlink(pSupporter->extBufFile); - } - tSidSetDestroy(&pSupporter->pSidSet); if (pSupporter->pMeterDataInfo != NULL) { @@ -4498,12 +4548,7 @@ int32_t vnodeMultiMeterQueryPrepare(SQInfo *pQInfo, SQuery *pQuery, void *param) pQuery->lastKey = pQuery->skey; // create runtime environment - SSchema *pTagSchema = NULL; - - tTagSchema *pTagSchemaInfo = pSupporter->pSidSet->pTagSchema; - if (pTagSchemaInfo != NULL) { - pTagSchema = pTagSchemaInfo->pSchema; - } + SColumnModel *pTagSchemaInfo = pSupporter->pSidSet->pColumnModel; // get one queried meter SMeterObj *pMeter = getMeterObj(pSupporter->pMetersHashTable, pSupporter->pSidSet->pSids[0]->sid); @@ -4517,7 +4562,7 @@ int32_t vnodeMultiMeterQueryPrepare(SQInfo *pQInfo, SQuery *pQuery, void *param) tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order); } - int32_t ret = setupQueryRuntimeEnv(pMeter, pQuery, &pSupporter->runtimeEnv, pTagSchema, TSQL_SO_ASC, true); + int32_t ret = setupQueryRuntimeEnv(pMeter, pQuery, &pSupporter->runtimeEnv, pTagSchemaInfo, TSQL_SO_ASC, true); if (ret != TSDB_CODE_SUCCESS) { return ret; } @@ -4530,45 +4575,25 @@ int32_t vnodeMultiMeterQueryPrepare(SQInfo *pQInfo, SQuery *pQuery, void *param) tSidSetSort(pSupporter->pSidSet); vnodeRecordAllFiles(pQInfo, pMeter->vnode); - if ((ret = allocateOutputBufForGroup(pSupporter, pQuery, true)) != TSDB_CODE_SUCCESS) { + if ((ret = createQueryOutputBuffer(pSupporter, pQuery, true)) != TSDB_CODE_SUCCESS) { return ret; } if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // group by columns not tags; int16_t type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr); - initSlidingWindowInfo(&pRuntimeEnv->swindowResInfo, 10039, type, pSupporter->pResult); + initSlidingWindowInfo(&pRuntimeEnv->swindowResInfo, 4096, type, pQuery->rowSize, pSupporter->pResult); } if (pQuery->nAggTimeInterval != 0) { - getTmpfilePath("tb_metric_mmap", pSupporter->extBufFile); - pSupporter->meterOutputFd = open(pSupporter->extBufFile, O_CREAT | O_RDWR, 0666); - - if (!FD_VALID(pSupporter->meterOutputFd)) { - dError("QInfo:%p failed to create file: %s on disk. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); - return TSDB_CODE_SERV_OUT_OF_MEMORY; - } - - pSupporter->numOfPages = pSupporter->numOfMeters; - - ret = ftruncate(pSupporter->meterOutputFd, pSupporter->numOfPages * DEFAULT_INTERN_BUF_SIZE); + // one page for each table at least + ret = createResultBuf(&pRuntimeEnv->pResultBuf, pSupporter->numOfMeters, pQuery->rowSize); if (ret != TSDB_CODE_SUCCESS) { - dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile, - strerror(errno)); - return TSDB_CODE_SERV_NO_DISKSPACE; - } - - pRuntimeEnv->numOfRowsPerPage = (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / pQuery->rowSize; - pSupporter->lastPageId = -1; - pSupporter->bufSize = pSupporter->numOfPages * DEFAULT_INTERN_BUF_SIZE; - - pSupporter->meterOutputMMapBuf = - mmap(NULL, pSupporter->bufSize, PROT_READ | PROT_WRITE, MAP_SHARED, pSupporter->meterOutputFd, 0); - if (pSupporter->meterOutputMMapBuf == MAP_FAILED) { - dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); - return TSDB_CODE_SERV_OUT_OF_MEMORY; + return ret; } } - + + pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, true); + // metric query do not invoke interpolation, it will be done at the second-stage merge if (!isPointInterpoQuery(pQuery)) { pQuery->interpoType = TSDB_INTERPO_NONE; @@ -4577,7 +4602,8 @@ int32_t vnodeMultiMeterQueryPrepare(SQInfo *pQInfo, SQuery *pQuery, void *param) TSKEY revisedStime = taosGetIntervalStartTimestamp(pSupporter->rawSKey, pQuery->nAggTimeInterval, pQuery->intervalTimeUnit, pQuery->precision); taosInitInterpoInfo(&pRuntimeEnv->interpoInfo, pQuery->order.order, revisedStime, 0, 0); - + pRuntimeEnv->stableQuery = true; + return TSDB_CODE_SUCCESS; } @@ -5061,14 +5087,14 @@ void queryOnBlock(SMeterQuerySupportObj *pSupporter, int64_t *primaryKeys, int32 * set tag value in SQLFunctionCtx * e.g.,tag information into input buffer */ -static void doSetTagValueInParam(tTagSchema *pTagSchema, int32_t tagColIdx, SMeterSidExtInfo *pMeterSidInfo, +static void doSetTagValueInParam(SColumnModel *pTagSchema, int32_t tagColIdx, SMeterSidExtInfo *pMeterSidInfo, tVariant *param) { assert(tagColIdx >= 0); - int32_t *fieldValueOffset = pTagSchema->colOffset; - - void * pStr = (char *)pMeterSidInfo->tags + fieldValueOffset[tagColIdx]; - SSchema *pCol = &pTagSchema->pSchema[tagColIdx]; + int16_t offset = getColumnModelOffset(pTagSchema, tagColIdx); + + void * pStr = (char *)pMeterSidInfo->tags + offset; + SSchema *pCol = getColumnModelSchema(pTagSchema, tagColIdx); tVariantDestroy(param); @@ -5081,7 +5107,7 @@ static void doSetTagValueInParam(tTagSchema *pTagSchema, int32_t tagColIdx, SMet void vnodeSetTagValueInParam(tSidSet *pSidSet, SQueryRuntimeEnv *pRuntimeEnv, SMeterSidExtInfo *pMeterSidInfo) { SQuery * pQuery = pRuntimeEnv->pQuery; - tTagSchema *pTagSchema = pSidSet->pTagSchema; + SColumnModel *pTagSchema = pSidSet->pColumnModel; SSqlFuncExprMsg *pFuncMsg = &pQuery->pSelectExpr[0].pBase; if (pQuery->numOfOutputCols == 1 && pFuncMsg->functionId == TSDB_FUNC_TS_COMP) { @@ -5259,19 +5285,9 @@ void UNUSED_FUNC displayInterResult(SData **pdata, SQuery *pQuery, int32_t numOf } } -static tFilePage *getFilePage(SMeterQuerySupportObj *pSupporter, int32_t pageId) { - assert(pageId <= pSupporter->lastPageId && pageId >= 0); - return (tFilePage *)(pSupporter->meterOutputMMapBuf + DEFAULT_INTERN_BUF_SIZE * pageId); -} - -static tFilePage *getMeterDataPage(SMeterQuerySupportObj *pSupporter, SMeterDataInfo *pMeterDataInfo, int32_t pageId) { - SMeterQueryInfo *pMeterQueryInfo = pMeterDataInfo->pMeterQInfo; - if (pageId >= pMeterQueryInfo->numOfPages) { - return NULL; - } - - int32_t realId = pMeterQueryInfo->pageList[pageId]; - return getFilePage(pSupporter, realId); +static tFilePage *getMeterDataPage(SQueryResultBuf* pResultBuf, SMeterQueryInfo *pMeterQueryInfo, int32_t index) { + SIDList pList = getDataBufPagesIdList(pResultBuf, pMeterQueryInfo->sid); + return getResultBufferPageById(pResultBuf, pList.pData[index]); } typedef struct Position { @@ -5287,7 +5303,9 @@ typedef struct SCompSupporter { int64_t getCurrentTimestamp(SCompSupporter *pSupportor, int32_t meterIdx) { Position * pPos = &pSupportor->pPosition[meterIdx]; - tFilePage *pPage = getMeterDataPage(pSupportor->pSupporter, pSupportor->pMeterDataInfo[meterIdx], pPos->pageIdx); + tFilePage *pPage = getMeterDataPage(pSupportor->pSupporter->runtimeEnv.pResultBuf, + pSupportor->pMeterDataInfo[meterIdx]->pMeterQInfo, pPos->pageIdx); + return *(int64_t *)(pPage->data + TSDB_KEYSIZE * pPos->rowIdx); } @@ -5295,10 +5313,11 @@ int32_t meterResultComparator(const void *pLeft, const void *pRight, void *param int32_t left = *(int32_t *)pLeft; int32_t right = *(int32_t *)pRight; - SCompSupporter *supportor = (SCompSupporter *)param; - - Position leftPos = supportor->pPosition[left]; - Position rightPos = supportor->pPosition[right]; + SCompSupporter *supporter = (SCompSupporter *)param; + SQueryResultBuf* pResultBuf = supporter->pSupporter->runtimeEnv.pResultBuf; + + Position leftPos = supporter->pPosition[left]; + Position rightPos = supporter->pPosition[right]; /* left source is exhausted */ if (leftPos.pageIdx == -1 && leftPos.rowIdx == -1) { @@ -5310,10 +5329,10 @@ int32_t meterResultComparator(const void *pLeft, const void *pRight, void *param return -1; } - tFilePage *pPageLeft = getMeterDataPage(supportor->pSupporter, supportor->pMeterDataInfo[left], leftPos.pageIdx); + tFilePage *pPageLeft = getMeterDataPage(pResultBuf, supporter->pMeterDataInfo[left]->pMeterQInfo, leftPos.pageIdx); int64_t leftTimestamp = *(int64_t *)(pPageLeft->data + TSDB_KEYSIZE * leftPos.rowIdx); - tFilePage *pPageRight = getMeterDataPage(supportor->pSupporter, supportor->pMeterDataInfo[right], rightPos.pageIdx); + tFilePage *pPageRight = getMeterDataPage(pResultBuf, supporter->pMeterDataInfo[right]->pMeterQInfo, rightPos.pageIdx); int64_t rightTimestamp = *(int64_t *)(pPageRight->data + TSDB_KEYSIZE * rightPos.rowIdx); if (leftTimestamp == rightTimestamp) { @@ -5373,23 +5392,40 @@ void copyResToQueryResultBuf(SMeterQuerySupportObj *pSupporter, SQuery *pQuery) } SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - char * pStart = pSupporter->meterOutputMMapBuf + DEFAULT_INTERN_BUF_SIZE * (pSupporter->lastPageId + 1) + - pSupporter->groupResultSize * pSupporter->offset; - - uint64_t numOfElem = ((tFilePage *)pStart)->numOfElems; - assert(numOfElem <= pQuery->pointsToRead); - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - memcpy(pQuery->sdata[i], pStart, pRuntimeEnv->pCtx[i].outputBytes * numOfElem + sizeof(tFilePage)); - pStart += pRuntimeEnv->pCtx[i].outputBytes * pQuery->pointsToRead + sizeof(tFilePage); + SQueryResultBuf* pResultBuf = pRuntimeEnv->pResultBuf; + + SIDList list = getDataBufPagesIdList(pResultBuf, 200000 + pSupporter->offset + (pSupporter->subgroupIdx - 1)* 10000); + + int32_t total = 0; + for(int32_t i = 0; i < list.size; ++i) { + tFilePage* pData = getResultBufferPageById(pResultBuf, list.pData[i]); + total += pData->numOfElems; + } + + pQuery->sdata[0]->len = total; + + int32_t offset = 0; + for(int32_t num = 0; num < list.size; ++num) { + tFilePage* pData = getResultBufferPageById(pResultBuf, list.pData[num]); + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; + char* pDest = pQuery->sdata[i]->data; + + memcpy(pDest + offset*bytes, pData->data + pRuntimeEnv->offset[i] * pData->numOfElems, bytes * pData->numOfElems); + } + + offset += pData->numOfElems; } - pQuery->pointsRead += numOfElem; + assert(pQuery->pointsRead == 0); + + pQuery->pointsRead += pQuery->sdata[0]->len; pSupporter->offset += 1; } int32_t doMergeMetersResultsToGroupRes(SMeterQuerySupportObj *pSupporter, SQuery *pQuery, SQueryRuntimeEnv *pRuntimeEnv, - SMeterDataInfo *pMeterHeadDataInfo, int32_t start, int32_t end) { + SMeterDataInfo *pMeterDataInfo, int32_t start, int32_t end) { // calculate the maximum required space if (pSupporter->groupResultSize == 0) { for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { @@ -5403,8 +5439,11 @@ int32_t doMergeMetersResultsToGroupRes(SMeterQuerySupportObj *pSupporter, SQuery int32_t numOfMeters = 0; for (int32_t i = start; i < end; ++i) { - if (pMeterHeadDataInfo[i].pMeterQInfo->numOfPages > 0 && pMeterHeadDataInfo[i].pMeterQInfo->numOfRes > 0) { - pValidMeter[numOfMeters] = &pMeterHeadDataInfo[i]; + int32_t sid = pMeterDataInfo[i].pMeterQInfo->sid; + SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, sid); + + if (list.size > 0 && pMeterDataInfo[i].pMeterQInfo->numOfRes > 0) { + pValidMeter[numOfMeters] = &pMeterDataInfo[i]; // set the merge start position: page:0, index:0 posArray[numOfMeters].pageIdx = 0; posArray[numOfMeters++].rowIdx = 0; @@ -5433,10 +5472,11 @@ int32_t doMergeMetersResultsToGroupRes(SMeterQuerySupportObj *pSupporter, SQuery while (1) { int32_t pos = pTree->pNode[0].index; Position * position = &cs.pPosition[pos]; - tFilePage *pPage = getMeterDataPage(cs.pSupporter, pValidMeter[pos], position->pageIdx); + SQueryResultBuf* pResultBuf = cs.pSupporter->runtimeEnv.pResultBuf; + tFilePage *pPage = getMeterDataPage(pResultBuf, pValidMeter[pos]->pMeterQInfo, position->pageIdx); int64_t ts = getCurrentTimestamp(&cs, pos); - if (ts == lastTimestamp) { // merge with the last one + if (ts == lastTimestamp) {// merge with the last one doMerge(pRuntimeEnv, ts, pPage, position->rowIdx, true); } else { // copy data to disk buffer @@ -5448,7 +5488,7 @@ int32_t doMergeMetersResultsToGroupRes(SMeterQuerySupportObj *pSupporter, SQuery resetMergeResultBuf(pQuery, pCtx); } - pPage = getMeterDataPage(cs.pSupporter, pValidMeter[pos], position->pageIdx); + pPage = getMeterDataPage(pResultBuf, pValidMeter[pos]->pMeterQInfo, position->pageIdx); if (pPage->numOfElems <= 0) { // current source data page is empty // do nothing } else { @@ -5464,17 +5504,19 @@ int32_t doMergeMetersResultsToGroupRes(SMeterQuerySupportObj *pSupporter, SQuery cs.pPosition[pos].pageIdx += 1; // try next page // check if current page is empty or not. if it is empty, ignore it and try next - if (cs.pPosition[pos].pageIdx <= cs.pMeterDataInfo[pos]->pMeterQInfo->numOfPages - 1) { - tFilePage *newPage = getMeterDataPage(cs.pSupporter, pValidMeter[pos], position->pageIdx); + SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, cs.pMeterDataInfo[pos]->pMeterQInfo->sid); + if (cs.pPosition[pos].pageIdx <= list.size - 1) { + tFilePage *newPage = getMeterDataPage(pResultBuf, pValidMeter[pos]->pMeterQInfo, position->pageIdx); + + // if current source data page is null, it must be the last page of source output page if (newPage->numOfElems <= 0) { - // if current source data page is null, it must be the last page of source output page cs.pPosition[pos].pageIdx += 1; - assert(cs.pPosition[pos].pageIdx >= cs.pMeterDataInfo[pos]->pMeterQInfo->numOfPages - 1); + assert(cs.pPosition[pos].pageIdx >= list.size - 1); } } // the following code must be executed if current source pages are exhausted - if (cs.pPosition[pos].pageIdx >= cs.pMeterDataInfo[pos]->pMeterQInfo->numOfPages) { + if (cs.pPosition[pos].pageIdx >= list.size) { cs.pPosition[pos].pageIdx = -1; cs.pPosition[pos].rowIdx = -1; @@ -5492,8 +5534,8 @@ int32_t doMergeMetersResultsToGroupRes(SMeterQuerySupportObj *pSupporter, SQuery if (buffer[0]->numOfElems != 0) { // there are data in buffer if (flushFromResultBuf(pSupporter, pQuery, pRuntimeEnv) != TSDB_CODE_SUCCESS) { - dError("QInfo:%p failed to flush data into temp file, abort query", GET_QINFO_ADDR(pQuery), - pSupporter->extBufFile); +// dError("QInfo:%p failed to flush data into temp file, abort query", GET_QINFO_ADDR(pQuery), +// pSupporter->extBufFile); tfree(pTree); tfree(pValidMeter); tfree(posArray); @@ -5518,70 +5560,39 @@ int32_t doMergeMetersResultsToGroupRes(SMeterQuerySupportObj *pSupporter, SQuery return pSupporter->numOfGroupResultPages; } -static int32_t extendDiskBuf(const SQuery *pQuery, SMeterQuerySupportObj *pSupporter, int32_t numOfPages) { - assert(pSupporter->numOfPages * DEFAULT_INTERN_BUF_SIZE == pSupporter->bufSize); - - SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); - - int32_t ret = munmap(pSupporter->meterOutputMMapBuf, pSupporter->bufSize); - pSupporter->numOfPages = numOfPages; - - /* - * disk-based output buffer is exhausted, try to extend the disk-based buffer, the available disk space may - * be insufficient - */ - ret = ftruncate(pSupporter->meterOutputFd, pSupporter->numOfPages * DEFAULT_INTERN_BUF_SIZE); - if (ret != 0) { - dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile, - strerror(errno)); - pQInfo->code = -TSDB_CODE_SERV_NO_DISKSPACE; - pQInfo->killed = 1; - - return pQInfo->code; - } - - pSupporter->bufSize = pSupporter->numOfPages * DEFAULT_INTERN_BUF_SIZE; - pSupporter->meterOutputMMapBuf = - mmap(NULL, pSupporter->bufSize, PROT_READ | PROT_WRITE, MAP_SHARED, pSupporter->meterOutputFd, 0); - - if (pSupporter->meterOutputMMapBuf == MAP_FAILED) { - dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); - pQInfo->code = -TSDB_CODE_SERV_OUT_OF_MEMORY; - pQInfo->killed = 1; - - return pQInfo->code; - } - - return TSDB_CODE_SUCCESS; -} - int32_t flushFromResultBuf(SMeterQuerySupportObj *pSupporter, const SQuery *pQuery, const SQueryRuntimeEnv *pRuntimeEnv) { - int32_t numOfMeterResultBufPages = pSupporter->lastPageId + 1; - int64_t dstSize = numOfMeterResultBufPages * DEFAULT_INTERN_BUF_SIZE + - pSupporter->groupResultSize * (pSupporter->numOfGroupResultPages + 1); - - int32_t requiredPages = pSupporter->numOfPages; - if (requiredPages * DEFAULT_INTERN_BUF_SIZE < dstSize) { - while (requiredPages * DEFAULT_INTERN_BUF_SIZE < dstSize) { - requiredPages += pSupporter->numOfMeters; + SQueryResultBuf* pResultBuf = pRuntimeEnv->pResultBuf; + int32_t capacity = (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage))/ pQuery->rowSize; + + // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000. + int32_t base = 200000; + int32_t pageId = -1; + + int32_t remain = pQuery->sdata[0]->len; + int32_t offset = 0; + + while(remain > 0) { + int32_t r = remain; + if (r > capacity) { + r = capacity; } - - if (extendDiskBuf(pQuery, pSupporter, requiredPages) != TSDB_CODE_SUCCESS) { - return -1; + + tFilePage* buf = getNewDataBuf(pResultBuf, base + pSupporter->subgroupIdx*10000 + pSupporter->numOfGroupResultPages, &pageId); + + //pagewise copy to dest buffer + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; + buf->numOfElems = r; + + memcpy(buf->data + pRuntimeEnv->offset[i] * buf->numOfElems, ((char*)pQuery->sdata[i]->data) + offset * bytes, + buf->numOfElems * bytes); } + + offset += r; + remain -= r; } - - char *lastPosition = pSupporter->meterOutputMMapBuf + DEFAULT_INTERN_BUF_SIZE * numOfMeterResultBufPages + - pSupporter->groupResultSize * pSupporter->numOfGroupResultPages; - - for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - int32_t size = pRuntimeEnv->pCtx[i].outputBytes * pQuery->sdata[0]->len + sizeof(tFilePage); - memcpy(lastPosition, pQuery->sdata[i], size); - - lastPosition += pRuntimeEnv->pCtx[i].outputBytes * pQuery->pointsToRead + sizeof(tFilePage); - } - + pSupporter->numOfGroupResultPages += 1; return TSDB_CODE_SUCCESS; } @@ -5691,49 +5702,62 @@ void enableFunctForMasterScan(SQueryRuntimeEnv *pRuntimeEnv, int32_t order) { pQuery->order.order = (pQuery->order.order ^ 1); } -void createGroupResultBuf(SQuery *pQuery, SOutputRes *pOneResult, bool isMetricQuery) { - int32_t numOfOutput = pQuery->numOfOutputCols; - - pOneResult->resultInfo = calloc((size_t)numOfOutput, sizeof(SResultInfo)); - - pOneResult->result = malloc(POINTER_BYTES * numOfOutput); - for (int32_t i = 0; i < numOfOutput; ++i) { - size_t size = pQuery->pSelectExpr[i].interResBytes; - SResultInfo *pResInfo = &pOneResult->resultInfo[i]; - - pOneResult->result[i] = malloc(sizeof(tFilePage) + size * pOneResult->nAlloc); - pOneResult->result[i]->numOfElems = 0; +void createQueryResultBuf(SQueryRuntimeEnv *pRuntimeEnv, SOutputRes *pResultRow, bool isSTableQuery, SPosInfo *posInfo) { + SQuery* pQuery = pRuntimeEnv->pQuery; + + int32_t numOfCols = pQuery->numOfOutputCols; - setResultInfoBuf(pResInfo, (int32_t)size, isMetricQuery); + pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo)); + pResultRow->pos = *posInfo;//page->data + (pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage) + page->numOfElems*s1; + + for (int32_t i = 0; i < numOfCols; ++i) { + SResultInfo *pResultInfo = &pResultRow->resultInfo[i]; + size_t size = pQuery->pSelectExpr[i].interResBytes; + setResultInfoBuf(pResultInfo, (int32_t)size, isSTableQuery); } } -void clearGroupResultBuf(SOutputRes *pOneOutputRes, int32_t nOutputCols) { +void clearGroupResultBuf(SQueryRuntimeEnv *pRuntimeEnv, SOutputRes *pOneOutputRes) { if (pOneOutputRes == NULL) { return; } - for (int32_t i = 0; i < nOutputCols; ++i) { - SResultInfo *pResInfo = &pOneOutputRes->resultInfo[i]; - int32_t size = sizeof(tFilePage) + pResInfo->bufLen * pOneOutputRes->nAlloc; + for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutputCols; ++i) { + SResultInfo *pResultInfo = &pOneOutputRes->resultInfo[i]; +// int32_t size = sizeof(tFilePage) + pResultInfo->bufLen * pOneOutputRes->nAlloc; - memset(pOneOutputRes->result[i], 0, (size_t)size); - resetResultInfo(pResInfo); +// memset(pOneOutputRes->pos[i], 0, (size_t)size); + char* s = getPosInResultPage(pRuntimeEnv, i, pOneOutputRes); + size_t size = pRuntimeEnv->pQuery->pSelectExpr[i].resBytes; + memset(s, 0, size); + + resetResultInfo(pResultInfo); } } -void copyGroupResultBuf(SOutputRes* dst, const SOutputRes* src, int32_t nOutputCols) { +void copyGroupResultBuf(SQueryRuntimeEnv *pRuntimeEnv, SOutputRes* dst, const SOutputRes* src) { + dst->numOfRows = src->numOfRows; + dst->nAlloc = src->nAlloc; + + int32_t nOutputCols = pRuntimeEnv->pQuery->numOfOutputCols; + for(int32_t i = 0; i < nOutputCols; ++i) { SResultInfo *pDst = &dst->resultInfo[i]; SResultInfo *pSrc = &src->resultInfo[i]; char* buf = pDst->interResultBuf; memcpy(pDst, pSrc, sizeof(SResultInfo)); - pDst->interResultBuf = buf; + pDst->interResultBuf = buf; // restore the allocated buffer + + // copy the result info struct memcpy(pDst->interResultBuf, pSrc->interResultBuf, pDst->bufLen); - - int32_t size = sizeof(tFilePage) + pSrc->bufLen * src->nAlloc; - memcpy(dst->result[i], src->result[i], size); + + // copy the output buffer data from src to dst, the position info keep unchanged + char* dstBuf = getPosInResultPage(pRuntimeEnv, i, dst); + char* srcBuf = getPosInResultPage(pRuntimeEnv, i, src); + size_t s = pRuntimeEnv->pQuery->pSelectExpr[i].resBytes; + + memcpy(dstBuf, srcBuf, s); } } @@ -5743,12 +5767,12 @@ void destroyGroupResultBuf(SOutputRes *pOneOutputRes, int32_t nOutputCols) { } for (int32_t i = 0; i < nOutputCols; ++i) { - free(pOneOutputRes->result[i]); +// free(pOneOutputRes->pos[i]); free(pOneOutputRes->resultInfo[i].interResultBuf); } free(pOneOutputRes->resultInfo); - free(pOneOutputRes->result); +// free(pOneOutputRes->result); } void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) { @@ -5812,6 +5836,7 @@ void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) { for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; pRuntimeEnv->pCtx[j].currentStage = 0; + aAggs[functionId].init(&pRuntimeEnv->pCtx[j]); } } @@ -6153,7 +6178,7 @@ void forwardIntervalQueryRange(SMeterQuerySupportObj *pSupporter, SQueryRuntimeE (!QUERY_IS_ASC_QUERY(pQuery) && pQuery->lastKey <= pQuery->ekey)) { setQueryStatus(pQuery, QUERY_COMPLETED); } else { - TSKEY nextTimestamp = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos); + /*TSKEY nextTimestamp =*/ loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos); } return; @@ -6306,7 +6331,7 @@ int32_t vnodeFilterQualifiedMeters(SQInfo *pQInfo, int32_t vid, tSidSet *pSidSet pOneMeterDataInfo->offsetInHeaderFile = (uint64_t)compHeader->compInfoOffset; if (pOneMeterDataInfo->pMeterQInfo == NULL) { - pOneMeterDataInfo->pMeterQInfo = createMeterQueryInfo(pQuery, pSupporter->rawSKey, pSupporter->rawEKey); + pOneMeterDataInfo->pMeterQInfo = createMeterQueryInfo(pQuery, pMeterObj->sid, pSupporter->rawSKey, pSupporter->rawEKey); } (*pReqMeterDataInfo)[*numOfMeters] = pOneMeterDataInfo; @@ -6325,18 +6350,18 @@ int32_t vnodeFilterQualifiedMeters(SQInfo *pQInfo, int32_t vid, tSidSet *pSidSet return TSDB_CODE_SUCCESS; } -SMeterQueryInfo *createMeterQueryInfo(SQuery *pQuery, TSKEY skey, TSKEY ekey) { +SMeterQueryInfo *createMeterQueryInfo(SQuery *pQuery, int32_t sid, TSKEY skey, TSKEY ekey) { SMeterQueryInfo *pMeterQueryInfo = calloc(1, sizeof(SMeterQueryInfo)); pMeterQueryInfo->skey = skey; pMeterQueryInfo->ekey = ekey; pMeterQueryInfo->lastKey = skey; - pMeterQueryInfo->numOfPages = 0; - pMeterQueryInfo->numOfAlloc = INIT_ALLOCATE_DISK_PAGES; - pMeterQueryInfo->pageList = calloc(pMeterQueryInfo->numOfAlloc, sizeof(uint32_t)); +// pMeterQueryInfo->numOfPages = 0; +// pMeterQueryInfo->numOfAlloc = INIT_ALLOCATE_DISK_PAGES; +// pMeterQueryInfo->pageList = calloc(pMeterQueryInfo->numOfAlloc, sizeof(uint32_t)); pMeterQueryInfo->lastResRows = 0; - + pMeterQueryInfo->sid = sid; pMeterQueryInfo->cur.vnodeIndex = -1; pMeterQueryInfo->resultInfo = calloc((size_t)pQuery->numOfOutputCols, sizeof(SResultInfo)); @@ -6353,7 +6378,7 @@ void destroyMeterQueryInfo(SMeterQueryInfo *pMeterQueryInfo, int32_t numOfCols) return; } - free(pMeterQueryInfo->pageList); +// free(pMeterQueryInfo->pageList); for (int32_t i = 0; i < numOfCols; ++i) { tfree(pMeterQueryInfo->resultInfo[i].interResultBuf); } @@ -6362,7 +6387,8 @@ void destroyMeterQueryInfo(SMeterQueryInfo *pMeterQueryInfo, int32_t numOfCols) free(pMeterQueryInfo); } -void changeMeterQueryInfoForSuppleQuery(SMeterQueryInfo *pMeterQueryInfo, TSKEY skey, TSKEY ekey) { +void changeMeterQueryInfoForSuppleQuery(SQueryResultBuf* pResultBuf, SMeterQueryInfo *pMeterQueryInfo, TSKEY skey, + TSKEY ekey) { if (pMeterQueryInfo == NULL) { return; } @@ -6376,7 +6402,9 @@ void changeMeterQueryInfoForSuppleQuery(SMeterQueryInfo *pMeterQueryInfo, TSKEY pMeterQueryInfo->cur.vnodeIndex = -1; // previous does not generate any results - if (pMeterQueryInfo->numOfPages == 0) { + SIDList list = getDataBufPagesIdList(pResultBuf, pMeterQueryInfo->sid); + + if (list.size == 0) { pMeterQueryInfo->reverseFillRes = 0; } else { pMeterQueryInfo->reverseIndex = pMeterQueryInfo->numOfRes; @@ -6384,35 +6412,6 @@ void changeMeterQueryInfoForSuppleQuery(SMeterQueryInfo *pMeterQueryInfo, TSKEY } } -static tFilePage *allocNewPage(SQuery *pQuery, SMeterQuerySupportObj *pSupporter, uint32_t *pageId) { - if (pSupporter->lastPageId == pSupporter->numOfPages - 1) { - if (extendDiskBuf(pQuery, pSupporter, pSupporter->numOfPages + pSupporter->numOfMeters) != TSDB_CODE_SUCCESS) { - return NULL; - } - } - - *pageId = (++pSupporter->lastPageId); - return getFilePage(pSupporter, *pageId); -} - -tFilePage *addDataPageForMeterQueryInfo(SQuery *pQuery, SMeterQueryInfo *pMeterQueryInfo, - SMeterQuerySupportObj *pSupporter) { - uint32_t pageId = 0; - - tFilePage *pPage = allocNewPage(pQuery, pSupporter, &pageId); - if (pPage == NULL) { // failed to allocate disk-based buffer for intermediate results - return NULL; - } - - if (pMeterQueryInfo->numOfPages >= pMeterQueryInfo->numOfAlloc) { - pMeterQueryInfo->numOfAlloc = pMeterQueryInfo->numOfAlloc << 1; - pMeterQueryInfo->pageList = realloc(pMeterQueryInfo->pageList, sizeof(uint32_t) * pMeterQueryInfo->numOfAlloc); - } - - pMeterQueryInfo->pageList[pMeterQueryInfo->numOfPages++] = pageId; - return pPage; -} - void saveIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, SMeterQueryInfo *pMeterQueryInfo) { SQuery *pQuery = pRuntimeEnv->pQuery; @@ -6824,25 +6823,23 @@ void setExecutionContext(SMeterQuerySupportObj *pSupporter, SOutputRes *outputRe static void setGroupOutputBuffer(SQueryRuntimeEnv *pRuntimeEnv, SOutputRes *pResult) { SQuery *pQuery = pRuntimeEnv->pQuery; - - // Note: pResult->result[i]->numOfElems == 0, there is only fixed number of results for each group + + // Note: pResult->pos[i]->numOfElems == 0, there is only fixed number of results for each group for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - assert(pResult->result[i]->numOfElems == 0 || pResult->result[i]->numOfElems == 1); - SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - pCtx->aOutputBuf = pResult->result[i]->data + pCtx->outputBytes * pResult->result[i]->numOfElems; - + pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf; } - + /* * set the output buffer information and intermediate buffer * not all queries require the interResultBuf, such as COUNT */ pCtx->resultInfo = &pResult->resultInfo[i]; - + // set super table query flag SResultInfo *pResInfo = GET_RES_INFO(pCtx); if (!isGroupbyNormalCol(pQuery->pGroupbyExpr)) { @@ -6867,12 +6864,16 @@ void setCtxOutputPointerForSupplementScan(SMeterQuerySupportObj *pSupporter, SMe tFilePage *pData = NULL; int32_t i = 0; + SQueryResultBuf* pResultBuf = pRuntimeEnv->pResultBuf; + // find the position for this output result - for (; i < pMeterQueryInfo->numOfPages; ++i) { - pData = getFilePage(pSupporter, pMeterQueryInfo->pageList[i]); + SIDList list = getDataBufPagesIdList(pResultBuf, pMeterQueryInfo->sid); + for (; i < list.size; ++i) { + pData = getResultBufferPageById(pResultBuf, list.pData[i]); if (index <= pData->numOfElems) { break; } + index -= pData->numOfElems; } @@ -6934,17 +6935,18 @@ int32_t setOutputBufferForIntervalQuery(SMeterQuerySupportObj *pSupporter, SMete SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; tFilePage * pData = NULL; - SQuery *pQuery = pRuntimeEnv->pQuery; + SQueryResultBuf* pResultBuf = pRuntimeEnv->pResultBuf; // in the first scan, new space needed for results - if (pMeterQueryInfo->numOfPages == 0) { - pData = addDataPageForMeterQueryInfo(pQuery, pMeterQueryInfo, pSupporter); + SIDList list = getDataBufPagesIdList(pResultBuf, pMeterQueryInfo->sid); + int32_t pageId = -1; + if (list.size == 0) { + pData = getNewDataBuf(pResultBuf, pMeterQueryInfo->sid, &pageId); } else { - int32_t lastPageId = pMeterQueryInfo->pageList[pMeterQueryInfo->numOfPages - 1]; - pData = getFilePage(pSupporter, lastPageId); + pData = getResultBufferPageById(pResultBuf, getLastPageId(&list)); if (pData->numOfElems >= pRuntimeEnv->numOfRowsPerPage) { - pData = addDataPageForMeterQueryInfo(pRuntimeEnv->pQuery, pMeterQueryInfo, pSupporter); + pData = getNewDataBuf(pResultBuf, pMeterQueryInfo->sid, &pageId); if (pData != NULL) { assert(pData->numOfElems == 0); // number of elements must be 0 for new allocated buffer } @@ -7012,171 +7014,171 @@ int32_t setIntervalQueryExecutionContext(SMeterQuerySupportObj *pSupporter, int3 return 0; } -static void doApplyIntervalQueryOnBlock(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQueryInfo, - SBlockInfo *pBlockInfo, int64_t *pPrimaryCol, SField *pFields, - __block_search_fn_t searchFn) { - SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; - SQuery * pQuery = pRuntimeEnv->pQuery; - int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - - int64_t nextKey = -1; - bool queryCompleted = false; - - while (1) { - int32_t numOfRes = 0; - int32_t steps = applyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, pPrimaryCol, pFields, searchFn, &numOfRes); - assert(steps > 0); - - // NOTE: in case of stable query, only ONE(or ZERO) row of result generated for each query range - if (pMeterQueryInfo->lastResRows == 0) { - pMeterQueryInfo->lastResRows = numOfRes; - } else { - assert(pMeterQueryInfo->lastResRows == 1); - } - - int32_t pos = pQuery->pos + steps * factor; - - // query does not reach the end of current block - if ((pos < pBlockInfo->size && QUERY_IS_ASC_QUERY(pQuery)) || (pos >= 0 && !QUERY_IS_ASC_QUERY(pQuery))) { - nextKey = pPrimaryCol[pos]; - } else { - assert((pQuery->lastKey > pBlockInfo->keyLast && QUERY_IS_ASC_QUERY(pQuery)) || - (pQuery->lastKey < pBlockInfo->keyFirst && !QUERY_IS_ASC_QUERY(pQuery))); - } - - // all data satisfy current query are checked, query completed - if (QUERY_IS_ASC_QUERY(pQuery)) { - queryCompleted = (nextKey > pQuery->ekey || pQuery->ekey <= pBlockInfo->keyLast); - } else { - queryCompleted = (nextKey < pQuery->ekey || pQuery->ekey >= pBlockInfo->keyFirst); - } - - /* - * 1. there may be more date that satisfy current query interval, other than - * current block, we need to try next data blocks - * 2. query completed, since reaches the upper bound of the main query range - */ - if (QUERY_IS_ASC_QUERY(pQuery)) { - if (pQuery->lastKey > pBlockInfo->keyLast || pQuery->lastKey > pSupporter->rawEKey || - nextKey > pSupporter->rawEKey) { - /* - * current interval query is completed, set query result flag closed and - * try next data block if pQuery->ekey == pSupporter->rawEKey, whole query is completed - */ - if (pQuery->lastKey > pBlockInfo->keyLast) { - assert(pQuery->ekey >= pBlockInfo->keyLast); - } - - if (pQuery->lastKey > pSupporter->rawEKey || nextKey > pSupporter->rawEKey) { - /* whole query completed, save result and abort */ - assert(queryCompleted); - saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); - - // save the pQuery->lastKey for retrieve data in cache, actually, there will be no qualified data in cache. - saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - } else if (pQuery->ekey == pBlockInfo->keyLast) { - /* current interval query is completed, set the next query range on other data blocks if exist */ - int64_t prevEKey = pQuery->ekey; - - getAlignedIntervalQueryRange(pRuntimeEnv, pQuery->lastKey, pSupporter->rawSKey, pSupporter->rawEKey); - saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - - assert(queryCompleted && prevEKey < pQuery->skey); - if (pMeterQueryInfo->lastResRows > 0) { - saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); - } - } else { - /* - * Data that satisfy current query range may locate in current block and blocks that are directly right - * next to current block. Therefore, we need to keep the query range(interval) unchanged until reaching - * the direct next data block, while only forwards the pQuery->lastKey. - * - * With the information of the directly next data block, whether locates in cache or disk, - * current interval query being completed or not can be decided. - */ - saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - assert(pQuery->lastKey > pBlockInfo->keyLast && pQuery->lastKey <= pQuery->ekey); - - /* - * if current block is the last block of current file, we still close the result flag, and - * merge with other meters in the same group - */ - if (queryCompleted) { - saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); - } - } - - break; - } - } else { - if (pQuery->lastKey < pBlockInfo->keyFirst || pQuery->lastKey < pSupporter->rawEKey || - nextKey < pSupporter->rawEKey) { - if (pQuery->lastKey < pBlockInfo->keyFirst) { - assert(pQuery->ekey <= pBlockInfo->keyFirst); - } - - if (pQuery->lastKey < pSupporter->rawEKey || (nextKey < pSupporter->rawEKey && nextKey != -1)) { - /* whole query completed, save result and abort */ - assert(queryCompleted); - saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); - - /* - * save the pQuery->lastKey for retrieve data in cache, actually, - * there will be no qualified data in cache. - */ - saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - } else if (pQuery->ekey == pBlockInfo->keyFirst) { - // current interval query is completed, set the next query range on other data blocks if exist - int64_t prevEKey = pQuery->ekey; - - getAlignedIntervalQueryRange(pRuntimeEnv, pQuery->lastKey, pSupporter->rawSKey, pSupporter->rawEKey); - saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - - assert(queryCompleted && prevEKey > pQuery->skey); - if (pMeterQueryInfo->lastResRows > 0) { - saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); - } - } else { - /* - * Data that satisfy current query range may locate in current block and blocks that are - * directly right next to current block. Therefore, we need to keep the query range(interval) - * unchanged until reaching the direct next data block, while only forwards the pQuery->lastKey. - * - * With the information of the directly next data block, whether locates in cache or disk, - * current interval query being completed or not can be decided. - */ - saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - assert(pQuery->lastKey < pBlockInfo->keyFirst && pQuery->lastKey >= pQuery->ekey); - - /* - * if current block is the last block of current file, we still close the result - * flag, and merge with other meters in the same group - */ - if (queryCompleted) { - saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); - } - } - - break; - } - } - - assert(queryCompleted); - saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); - - assert((nextKey >= pQuery->lastKey && QUERY_IS_ASC_QUERY(pQuery)) || - (nextKey <= pQuery->lastKey && !QUERY_IS_ASC_QUERY(pQuery))); - - /* still in the same block to query */ - getAlignedIntervalQueryRange(pRuntimeEnv, nextKey, pSupporter->rawSKey, pSupporter->rawEKey); - saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); - - int32_t newPos = searchFn((char *)pPrimaryCol, pBlockInfo->size, pQuery->skey, pQuery->order.order); - assert(newPos == pQuery->pos + steps * factor); - - pQuery->pos = newPos; - } -} +//static void doApplyIntervalQueryOnBlock(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQueryInfo, +// SBlockInfo *pBlockInfo, int64_t *pPrimaryCol, SField *pFields, +// __block_search_fn_t searchFn) { +// SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; +// SQuery * pQuery = pRuntimeEnv->pQuery; +// int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); +// +// int64_t nextKey = -1; +// bool queryCompleted = false; +// +// while (1) { +// int32_t numOfRes = 0; +// int32_t steps = applyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, pPrimaryCol, pFields, searchFn, &numOfRes); +// assert(steps > 0); +// +// // NOTE: in case of stable query, only ONE(or ZERO) row of pos generated for each query range +// if (pMeterQueryInfo->lastResRows == 0) { +// pMeterQueryInfo->lastResRows = numOfRes; +// } else { +// assert(pMeterQueryInfo->lastResRows == 1); +// } +// +// int32_t pos = pQuery->pos + steps * factor; +// +// // query does not reach the end of current block +// if ((pos < pBlockInfo->size && QUERY_IS_ASC_QUERY(pQuery)) || (pos >= 0 && !QUERY_IS_ASC_QUERY(pQuery))) { +// nextKey = pPrimaryCol[pos]; +// } else { +// assert((pQuery->lastKey > pBlockInfo->keyLast && QUERY_IS_ASC_QUERY(pQuery)) || +// (pQuery->lastKey < pBlockInfo->keyFirst && !QUERY_IS_ASC_QUERY(pQuery))); +// } +// +// // all data satisfy current query are checked, query completed +// if (QUERY_IS_ASC_QUERY(pQuery)) { +// queryCompleted = (nextKey > pQuery->ekey || pQuery->ekey <= pBlockInfo->keyLast); +// } else { +// queryCompleted = (nextKey < pQuery->ekey || pQuery->ekey >= pBlockInfo->keyFirst); +// } +// +// /* +// * 1. there may be more date that satisfy current query interval, other than +// * current block, we need to try next data blocks +// * 2. query completed, since reaches the upper bound of the main query range +// */ +// if (QUERY_IS_ASC_QUERY(pQuery)) { +// if (pQuery->lastKey > pBlockInfo->keyLast || pQuery->lastKey > pSupporter->rawEKey || +// nextKey > pSupporter->rawEKey) { +// /* +// * current interval query is completed, set query pos flag closed and +// * try next data block if pQuery->ekey == pSupporter->rawEKey, whole query is completed +// */ +// if (pQuery->lastKey > pBlockInfo->keyLast) { +// assert(pQuery->ekey >= pBlockInfo->keyLast); +// } +// +// if (pQuery->lastKey > pSupporter->rawEKey || nextKey > pSupporter->rawEKey) { +// /* whole query completed, save pos and abort */ +// assert(queryCompleted); +// saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); +// +// // save the pQuery->lastKey for retrieve data in cache, actually, there will be no qualified data in cache. +// saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); +// } else if (pQuery->ekey == pBlockInfo->keyLast) { +// /* current interval query is completed, set the next query range on other data blocks if exist */ +// int64_t prevEKey = pQuery->ekey; +// +// getAlignedIntervalQueryRange(pRuntimeEnv, pQuery->lastKey, pSupporter->rawSKey, pSupporter->rawEKey); +// saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); +// +// assert(queryCompleted && prevEKey < pQuery->skey); +// if (pMeterQueryInfo->lastResRows > 0) { +// saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); +// } +// } else { +// /* +// * Data that satisfy current query range may locate in current block and blocks that are directly right +// * next to current block. Therefore, we need to keep the query range(interval) unchanged until reaching +// * the direct next data block, while only forwards the pQuery->lastKey. +// * +// * With the information of the directly next data block, whether locates in cache or disk, +// * current interval query being completed or not can be decided. +// */ +// saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); +// assert(pQuery->lastKey > pBlockInfo->keyLast && pQuery->lastKey <= pQuery->ekey); +// +// /* +// * if current block is the last block of current file, we still close the pos flag, and +// * merge with other meters in the same group +// */ +// if (queryCompleted) { +// saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); +// } +// } +// +// break; +// } +// } else { +// if (pQuery->lastKey < pBlockInfo->keyFirst || pQuery->lastKey < pSupporter->rawEKey || +// nextKey < pSupporter->rawEKey) { +// if (pQuery->lastKey < pBlockInfo->keyFirst) { +// assert(pQuery->ekey <= pBlockInfo->keyFirst); +// } +// +// if (pQuery->lastKey < pSupporter->rawEKey || (nextKey < pSupporter->rawEKey && nextKey != -1)) { +// /* whole query completed, save pos and abort */ +// assert(queryCompleted); +// saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); +// +// /* +// * save the pQuery->lastKey for retrieve data in cache, actually, +// * there will be no qualified data in cache. +// */ +// saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); +// } else if (pQuery->ekey == pBlockInfo->keyFirst) { +// // current interval query is completed, set the next query range on other data blocks if exist +// int64_t prevEKey = pQuery->ekey; +// +// getAlignedIntervalQueryRange(pRuntimeEnv, pQuery->lastKey, pSupporter->rawSKey, pSupporter->rawEKey); +// saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); +// +// assert(queryCompleted && prevEKey > pQuery->skey); +// if (pMeterQueryInfo->lastResRows > 0) { +// saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); +// } +// } else { +// /* +// * Data that satisfy current query range may locate in current block and blocks that are +// * directly right next to current block. Therefore, we need to keep the query range(interval) +// * unchanged until reaching the direct next data block, while only forwards the pQuery->lastKey. +// * +// * With the information of the directly next data block, whether locates in cache or disk, +// * current interval query being completed or not can be decided. +// */ +// saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); +// assert(pQuery->lastKey < pBlockInfo->keyFirst && pQuery->lastKey >= pQuery->ekey); +// +// /* +// * if current block is the last block of current file, we still close the pos +// * flag, and merge with other meters in the same group +// */ +// if (queryCompleted) { +// saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); +// } +// } +// +// break; +// } +// } +// +// assert(queryCompleted); +// saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); +// +// assert((nextKey >= pQuery->lastKey && QUERY_IS_ASC_QUERY(pQuery)) || +// (nextKey <= pQuery->lastKey && !QUERY_IS_ASC_QUERY(pQuery))); +// +// /* still in the same block to query */ +// getAlignedIntervalQueryRange(pRuntimeEnv, nextKey, pSupporter->rawSKey, pSupporter->rawEKey); +// saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); +// +// int32_t newPos = searchFn((char *)pPrimaryCol, pBlockInfo->size, pQuery->skey, pQuery->order.order); +// assert(newPos == pQuery->pos + steps * factor); +// +// pQuery->pos = newPos; +// } +//} static void doApplyIntervalQueryOnBlock_rv(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQueryInfo, SBlockInfo *pBlockInfo, int64_t *pPrimaryCol, SField *pFields, @@ -7244,7 +7246,10 @@ static void doApplyIntervalQueryOnBlock_rv(SMeterQuerySupportObj *pSupporter, SM assert(completed); - if (pQuery->ekey == pSupporter->rawEKey) { + // while the interval time window is less than the time range gap between two points, nextKey may be greater than + // pSupporter->rawEKey + if (pQuery->ekey == pSupporter->rawEKey || (nextKey > pSupporter->rawEKey && QUERY_IS_ASC_QUERY(pQuery)) || + (nextKey < pSupporter->rawEKey && !QUERY_IS_ASC_QUERY(pQuery))) { /* whole query completed, save result and abort */ saveResult(pSupporter, pMeterQueryInfo, pMeterQueryInfo->lastResRows); @@ -7493,8 +7498,12 @@ bool onDemandLoadDatablock(SQuery *pQuery, int16_t queryRangeSet) { static void validateResultBuf(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQueryInfo) { SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; SQuery * pQuery = pSupporter->runtimeEnv.pQuery; + SQueryResultBuf* pResultBuf = pRuntimeEnv->pResultBuf; - tFilePage *newOutput = getFilePage(pSupporter, pMeterQueryInfo->pageList[pMeterQueryInfo->numOfPages - 1]); + SIDList list = getDataBufPagesIdList(pResultBuf, pMeterQueryInfo->sid); + int32_t id = getLastPageId(&list); + + tFilePage* newOutput = getResultBufferPageById(pResultBuf, id); for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { assert(pRuntimeEnv->pCtx[i].aOutputBuf - newOutput->data < DEFAULT_INTERN_BUF_SIZE); } @@ -7547,12 +7556,14 @@ int32_t saveResult(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQue pMeterQueryInfo->reverseIndex -= 1; setCtxOutputPointerForSupplementScan(pSupporter, pMeterQueryInfo); } else { - int32_t pageId = pMeterQueryInfo->pageList[pMeterQueryInfo->numOfPages - 1]; - tFilePage *pData = getFilePage(pSupporter, pageId); - + SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, pMeterQueryInfo->sid); + + int32_t pageId = getLastPageId(&list); + tFilePage* pData = getResultBufferPageById(pRuntimeEnv->pResultBuf, pageId); + // in handling records occuring around '1970-01-01', the aligned start timestamp may be 0. TSKEY ts = *(TSKEY *)getOutputResPos(pRuntimeEnv, pData, pData->numOfElems, 0); - + SMeterObj *pMeterObj = pRuntimeEnv->pMeterObj; qTrace("QInfo:%p vid:%d sid:%d id:%s, save results, ts:%" PRId64 ", total:%d", GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, ts, pMeterQueryInfo->numOfRes + 1); @@ -7580,7 +7591,7 @@ int32_t saveResult(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQue sc[1].bytes = 8; UNUSED(sc); - tColModel *cm = tColModelCreate(sc, pQuery->numOfOutputCols, pRuntimeEnv->numOfRowsPerPage); + SColumnModel *cm = createColumnModel(sc, pQuery->numOfOutputCols, pRuntimeEnv->numOfRowsPerPage); // if (outputPage->numOfElems + numOfResult >= pRuntimeEnv->numOfRowsPerPage) tColModelDisplay(cm, outputPage->data, outputPage->numOfElems, pRuntimeEnv->numOfRowsPerPage); @@ -7590,7 +7601,7 @@ int32_t saveResult(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQue return TSDB_CODE_SUCCESS; } -static int32_t getSubsetNumber(SMeterQuerySupportObj *pSupporter) { +static int32_t getNumOfSubset(SMeterQuerySupportObj *pSupporter) { SQuery *pQuery = pSupporter->runtimeEnv.pQuery; int32_t totalSubset = 0; @@ -7613,7 +7624,7 @@ static int32_t doCopyFromGroupBuf(SMeterQuerySupportObj *pSupporter, SOutputRes dTrace("QInfo:%p start to copy data to dest buf", GET_QINFO_ADDR(pSupporter->runtimeEnv.pQuery)); - int32_t totalSubset = getSubsetNumber(pSupporter); + int32_t totalSubset = getNumOfSubset(pSupporter); if (orderType == TSQL_SO_ASC) { startIdx = pSupporter->subgroupIdx; @@ -7631,8 +7642,6 @@ static int32_t doCopyFromGroupBuf(SMeterQuerySupportObj *pSupporter, SOutputRes assert(result[i].numOfRows >= 0 && pSupporter->offset <= 1); - tFilePage **srcBuf = result[i].result; - int32_t numOfRowsToCopy = result[i].numOfRows - pSupporter->offset; int32_t oldOffset = pSupporter->offset; @@ -7648,8 +7657,8 @@ static int32_t doCopyFromGroupBuf(SMeterQuerySupportObj *pSupporter, SOutputRes for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { int32_t elemSize = pRuntimeEnv->pCtx[j].outputBytes; char * outputBuf = pQuery->sdata[j]->data + numOfResult * elemSize; - - memcpy(outputBuf, srcBuf[j]->data + oldOffset * elemSize, elemSize * numOfRowsToCopy); + char* p = getPosInResultPage(pRuntimeEnv, j, &result[i]); + memcpy(outputBuf, p + oldOffset * elemSize, elemSize * numOfRowsToCopy); } numOfResult += numOfRowsToCopy; @@ -7717,7 +7726,7 @@ static void applyIntervalQueryOnBlock(SMeterQuerySupportObj *pSupporter, SMeterD saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); } else { - doApplyIntervalQueryOnBlock(pSupporter, pMeterQueryInfo, pBlockInfo, pPrimaryKey, pFields, searchFn); + doApplyIntervalQueryOnBlock_rv(pSupporter, pMeterQueryInfo, pBlockInfo, pPrimaryKey, pFields, searchFn); } } @@ -7802,7 +7811,7 @@ static int32_t resultInterpolate(SQInfo *pQInfo, tFilePage **data, tFilePage **p pSchema[i].type = pQuery->pSelectExpr[i].resType; } - tColModel *pModel = tColModelCreate(pSchema, pQuery->numOfOutputCols, pQuery->pointsToRead); + SColumnModel *pModel = createColumnModel(pSchema, pQuery->numOfOutputCols, pQuery->pointsToRead); char * srcData[TSDB_MAX_COLUMNS] = {0}; int32_t functions[TSDB_MAX_COLUMNS] = {0}; @@ -7816,7 +7825,7 @@ static int32_t resultInterpolate(SQInfo *pQInfo, tFilePage **data, tFilePage **p pQuery->nAggTimeInterval, (int64_t *)pDataSrc[0]->data, pModel, srcData, pQuery->defaultVal, functions, pRuntimeEnv->pMeterObj->pointsPerFileBlock); - tColModelDestroy(pModel); + destroyColumnModel(pModel); free(pSchema); return numOfRes; @@ -7906,18 +7915,10 @@ int32_t vnodeQueryResultInterpolate(SQInfo *pQInfo, tFilePage **pDst, tFilePage ret -= pQuery->limit.offset; // todo !!!!there exactly number of interpo is not valid. // todo refactor move to the beginning of buffer - // if (QUERY_IS_ASC_QUERY(pQuery)) { for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].resBytes * pQuery->limit.offset, ret * pQuery->pSelectExpr[i].resBytes); } - // } else { - // for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { - // memmove(pDst[i]->data + (pQuery->pointsToRead - ret) * pQuery->pSelectExpr[i].resBytes, - // pDst[i]->data + (pQuery->pointsToRead - ret - pQuery->limit.offset) * - // pQuery->pSelectExpr[i].resBytes, ret * pQuery->pSelectExpr[i].resBytes); - // } - // } pQuery->limit.offset = 0; return ret; } else { @@ -7938,7 +7939,11 @@ void vnodePrintQueryStatistics(SMeterQuerySupportObj *pSupporter) { SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); SQueryCostSummary *pSummary = &pRuntimeEnv->summary; - pSummary->tmpBufferInDisk = pSupporter->bufSize; + if (pRuntimeEnv->pResultBuf == NULL) { + pSummary->tmpBufferInDisk = 0; + } else { + pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf); + } dTrace("QInfo:%p statis: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo, pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0); diff --git a/src/system/detail/src/vnodeQueryProcess.c b/src/system/detail/src/vnodeQueryProcess.c index 6fe8b2fa775b57799b150f57ba3f0eb71e7ab3f6..1b04806f7c4e68f8a6c3f98df82f5941414f700f 100644 --- a/src/system/detail/src/vnodeQueryProcess.c +++ b/src/system/detail/src/vnodeQueryProcess.c @@ -132,7 +132,7 @@ static void queryOnMultiDataCache(SQInfo *pQInfo, SMeterDataInfo *pMeterInfo) { pRuntimeEnv->pMeterObj = pMeterObj; if (pMeterInfo[k].pMeterQInfo == NULL) { - pMeterInfo[k].pMeterQInfo = createMeterQueryInfo(pQuery, pSupporter->rawSKey, pSupporter->rawEKey); + pMeterInfo[k].pMeterQInfo = createMeterQueryInfo(pQuery, pMeterObj->sid, pSupporter->rawSKey, pSupporter->rawEKey); } if (pMeterInfo[k].pMeterObj == NULL) { // no data in disk for this meter, set its pointer @@ -683,7 +683,7 @@ static void vnodeSTableSeqProcessor(SQInfo *pQInfo) { } resetCtxOutputBuf(pRuntimeEnv); - resetSlidingWindowInfo(&pRuntimeEnv->swindowResInfo, pQuery->numOfOutputCols); + resetSlidingWindowInfo(pRuntimeEnv, &pRuntimeEnv->swindowResInfo); while (pSupporter->meterIdx < pSupporter->numOfMeters) { int32_t k = pSupporter->meterIdx; @@ -858,7 +858,9 @@ static void doOrderedScan(SQInfo *pQInfo) { static void setupMeterQueryInfoForSupplementQuery(SMeterQuerySupportObj *pSupporter) { for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) { SMeterQueryInfo *pMeterQueryInfo = pSupporter->pMeterDataInfo[i].pMeterQInfo; - changeMeterQueryInfoForSuppleQuery(pMeterQueryInfo, pSupporter->rawSKey, pSupporter->rawEKey); + SQueryResultBuf* pResultBuf = pSupporter->runtimeEnv.pResultBuf; + + changeMeterQueryInfoForSuppleQuery(pResultBuf, pMeterQueryInfo, pSupporter->rawSKey, pSupporter->rawEKey); } } @@ -1086,7 +1088,7 @@ static void vnodeSingleMeterIntervalMainLooper(SMeterQuerySupportObj *pSupporter (pQuery->skey >= pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))); initCtxOutputBuf(pRuntimeEnv); - clearCompletedSlidingWindows(&pRuntimeEnv->swindowResInfo, pQuery->numOfOutputCols); + clearCompletedSlidingWindows(pRuntimeEnv); vnodeScanAllData(pRuntimeEnv); if (isQueryKilled(pQuery)) { diff --git a/src/system/detail/src/vnodeShell.c b/src/system/detail/src/vnodeShell.c index 69d502c61828154b6ea985399e41e5039a6f9805..6e43e7074243ba8480c4781139c14c50e311eeb4 100644 --- a/src/system/detail/src/vnodeShell.c +++ b/src/system/detail/src/vnodeShell.c @@ -442,22 +442,22 @@ void vnodeExecuteRetrieveReq(SSchedMsg *pSched) { if (code == TSDB_CODE_SUCCESS) { size = vnodeGetResultSize((void *)(pRetrieve->qhandle), &numOfRows); + + // buffer size for progress information, including meter count, + // and for each meter, including 'uid' and 'TSKEY'. + int progressSize = 0; + if (pQInfo->pMeterQuerySupporter != NULL) + progressSize = pQInfo->pMeterQuerySupporter->numOfMeters * (sizeof(int64_t) + sizeof(TSKEY)) + sizeof(int32_t); + else if (pQInfo->pObj != NULL) + progressSize = sizeof(int64_t) + sizeof(TSKEY) + sizeof(int32_t); + + pStart = taosBuildRspMsgWithSize(pObj->thandle, TSDB_MSG_TYPE_RETRIEVE_RSP, progressSize + size + 100); + if (pStart == NULL) { + taosSendSimpleRsp(pObj->thandle, TSDB_MSG_TYPE_RETRIEVE_RSP, TSDB_CODE_SERV_OUT_OF_MEMORY); + goto _exit; + } } - // buffer size for progress information, including meter count, - // and for each meter, including 'uid' and 'TSKEY'. - int progressSize = 0; - if (pQInfo->pMeterQuerySupporter != NULL) - progressSize = pQInfo->pMeterQuerySupporter->numOfMeters * (sizeof(int64_t) + sizeof(TSKEY)) + sizeof(int32_t); - else if (pQInfo->pObj != NULL) - progressSize = sizeof(int64_t) + sizeof(TSKEY) + sizeof(int32_t); - - pStart = taosBuildRspMsgWithSize(pObj->thandle, TSDB_MSG_TYPE_RETRIEVE_RSP, progressSize + size + 100); - if (pStart == NULL) { - taosSendSimpleRsp(pObj->thandle, TSDB_MSG_TYPE_RETRIEVE_RSP, TSDB_CODE_SERV_OUT_OF_MEMORY); - goto _exit; - } - pMsg = pStart; *pMsg = code; @@ -485,26 +485,28 @@ void vnodeExecuteRetrieveReq(SSchedMsg *pSched) { // write the progress information of each meter to response // this is required by subscriptions - if (pQInfo->pMeterQuerySupporter != NULL && pQInfo->pMeterQuerySupporter->pMeterSidExtInfo != NULL) { - *((int32_t*)pMsg) = htonl(pQInfo->pMeterQuerySupporter->numOfMeters); - pMsg += sizeof(int32_t); - for (int32_t i = 0; i < pQInfo->pMeterQuerySupporter->numOfMeters; i++) { - *((int64_t*)pMsg) = htobe64(pQInfo->pMeterQuerySupporter->pMeterSidExtInfo[i]->uid); + if (numOfRows > 0 && code == TSDB_CODE_SUCCESS) { + if (pQInfo->pMeterQuerySupporter != NULL && pQInfo->pMeterQuerySupporter->pMeterSidExtInfo != NULL) { + *((int32_t *)pMsg) = htonl(pQInfo->pMeterQuerySupporter->numOfMeters); + pMsg += sizeof(int32_t); + for (int32_t i = 0; i < pQInfo->pMeterQuerySupporter->numOfMeters; i++) { + *((int64_t *)pMsg) = htobe64(pQInfo->pMeterQuerySupporter->pMeterSidExtInfo[i]->uid); + pMsg += sizeof(int64_t); + *((TSKEY *)pMsg) = htobe64(pQInfo->pMeterQuerySupporter->pMeterSidExtInfo[i]->key); + pMsg += sizeof(TSKEY); + } + } else if (pQInfo->pObj != NULL) { + *((int32_t *)pMsg) = htonl(1); + pMsg += sizeof(int32_t); + *((int64_t *)pMsg) = htobe64(pQInfo->pObj->uid); pMsg += sizeof(int64_t); - *((TSKEY*)pMsg) = htobe64(pQInfo->pMeterQuerySupporter->pMeterSidExtInfo[i]->key); + if (pQInfo->pointsRead > 0) { + *((TSKEY *)pMsg) = htobe64(pQInfo->query.lastKey + 1); + } else { + *((TSKEY *)pMsg) = htobe64(pQInfo->query.lastKey); + } pMsg += sizeof(TSKEY); } - } else if (pQInfo->pObj != NULL) { - *((int32_t*)pMsg) = htonl(1); - pMsg += sizeof(int32_t); - *((int64_t*)pMsg) = htobe64(pQInfo->pObj->uid); - pMsg += sizeof(int64_t); - if (pQInfo->pointsRead > 0) { - *((TSKEY*)pMsg) = htobe64(pQInfo->query.lastKey + 1); - } else { - *((TSKEY*)pMsg) = htobe64(pQInfo->query.lastKey); - } - pMsg += sizeof(TSKEY); } msgLen = pMsg - pStart; diff --git a/src/system/detail/src/vnodeTagMgmt.c b/src/system/detail/src/vnodeTagMgmt.c index cea4f75f83fb200f1bae2473691a329ce91ccae5..5585813ec377571f8415ba6b949bb158285b1d3e 100644 --- a/src/system/detail/src/vnodeTagMgmt.c +++ b/src/system/detail/src/vnodeTagMgmt.c @@ -24,10 +24,10 @@ #include "tast.h" #include "vnodeTagMgmt.h" -#define GET_TAG_VAL_POINTER(s, col, sc, t) ((t *)(&((s)->tags[(sc)->colOffset[(col)]]))) +#define GET_TAG_VAL_POINTER(s, col, sc, t) ((t *)(&((s)->tags[getColumnModelOffset(sc, col)]))) #define GET_TAG_VAL(s, col, sc, t) (*GET_TAG_VAL_POINTER(s, col, sc, t)) -static void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, tOrderIdx *pOrder); +static void tTagsPrints(SMeterSidExtInfo *pMeterInfo, SColumnModel *pSchema, SColumnOrderInfo *pOrder); static void tSidSetDisplay(tSidSet *pSets); @@ -65,7 +65,7 @@ int32_t meterSidComparator(const void *p1, const void *p2, void *param) { SMeterSidExtInfo *s1 = (SMeterSidExtInfo *)p1; SMeterSidExtInfo *s2 = (SMeterSidExtInfo *)p2; - for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfOrderedCols; ++i) { + for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfCols; ++i) { int32_t colIdx = pOrderDesc->orderIdx.pData[i]; char * f1 = NULL; @@ -79,9 +79,9 @@ int32_t meterSidComparator(const void *p1, const void *p2, void *param) { type = TSDB_DATA_TYPE_BINARY; bytes = TSDB_METER_NAME_LEN; } else { - f1 = GET_TAG_VAL_POINTER(s1, colIdx, pOrderDesc->pTagSchema, char); - f2 = GET_TAG_VAL_POINTER(s2, colIdx, pOrderDesc->pTagSchema, char); - SSchema *pSchema = &pOrderDesc->pTagSchema->pSchema[colIdx]; + f1 = GET_TAG_VAL_POINTER(s1, colIdx, pOrderDesc->pColumnModel, char); + f2 = GET_TAG_VAL_POINTER(s2, colIdx, pOrderDesc->pColumnModel, char); + SSchema *pSchema = getColumnModelSchema(pOrderDesc->pColumnModel, colIdx); type = pSchema->type; bytes = pSchema->bytes; } @@ -116,9 +116,9 @@ static void median(void **pMeterSids, size_t size, int32_t s1, int32_t s2, tOrde compareFn(pMeterSids[s1], pMeterSids[s2], pOrderDesc) <= 0); #ifdef _DEBUG_VIEW - tTagsPrints(pMeterSids[s1], pOrderDesc->pTagSchema, &pOrderDesc->orderIdx); - tTagsPrints(pMeterSids[midIdx], pOrderDesc->pTagSchema, &pOrderDesc->orderIdx); - tTagsPrints(pMeterSids[s2], pOrderDesc->pTagSchema, &pOrderDesc->orderIdx); + tTagsPrints(pMeterSids[s1], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); + tTagsPrints(pMeterSids[midIdx], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); + tTagsPrints(pMeterSids[s2], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); #endif } @@ -241,25 +241,6 @@ int32_t *calculateSubGroup(void **pSids, int32_t numOfMeters, int32_t *numOfSubs return starterPos; } -tTagSchema *tCreateTagSchema(SSchema *pSchema, int32_t numOfTagCols) { - if (numOfTagCols == 0 || pSchema == NULL) { - return NULL; - } - - tTagSchema *pTagSchema = - (tTagSchema *)calloc(1, sizeof(tTagSchema) + numOfTagCols * sizeof(int32_t) + sizeof(SSchema) * numOfTagCols); - - pTagSchema->colOffset[0] = 0; - pTagSchema->numOfCols = numOfTagCols; - for (int32_t i = 1; i < numOfTagCols; ++i) { - pTagSchema->colOffset[i] = (pTagSchema->colOffset[i - 1] + pSchema[i - 1].bytes); - } - - pTagSchema->pSchema = (SSchema *)&(pTagSchema->colOffset[numOfTagCols]); - memcpy(pTagSchema->pSchema, pSchema, sizeof(SSchema) * numOfTagCols); - return pTagSchema; -} - tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOfMeters, SSchema *pSchema, int32_t numOfTags, SColIndexEx *colList, int32_t numOfCols) { tSidSet *pSidSet = (tSidSet *)calloc(1, sizeof(tSidSet) + numOfCols * sizeof(int16_t)); @@ -269,8 +250,8 @@ tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOf pSidSet->numOfSids = numOfMeters; pSidSet->pSids = pMeterSidExtInfo; - pSidSet->pTagSchema = tCreateTagSchema(pSchema, numOfTags); - pSidSet->orderIdx.numOfOrderedCols = numOfCols; + pSidSet->pColumnModel = createColumnModel(pSchema, numOfTags, 1); + pSidSet->orderIdx.numOfCols = numOfCols; /* * in case of "group by tbname,normal_col", the normal_col is ignored @@ -282,7 +263,7 @@ tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOf } } - pSidSet->orderIdx.numOfOrderedCols = numOfTagCols; + pSidSet->orderIdx.numOfCols = numOfTagCols; pSidSet->starterPos = NULL; return pSidSet; @@ -291,19 +272,19 @@ tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOf void tSidSetDestroy(tSidSet **pSets) { if ((*pSets) != NULL) { tfree((*pSets)->starterPos); - tfree((*pSets)->pTagSchema)(*pSets)->pSids = NULL; + tfree((*pSets)->pColumnModel)(*pSets)->pSids = NULL; tfree(*pSets); } } -void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, tOrderIdx *pOrder) { +void tTagsPrints(SMeterSidExtInfo *pMeterInfo, SColumnModel *pSchema, SColumnOrderInfo *pOrder) { if (pSchema == NULL) { return; } printf("sid: %-5d tags(", pMeterInfo->sid); - for (int32_t i = 0; i < pOrder->numOfOrderedCols; ++i) { + for (int32_t i = 0; i < pOrder->numOfCols; ++i) { int32_t colIndex = pOrder->pData[i]; // it is the tbname column @@ -312,7 +293,9 @@ void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, tOrderIdx *p continue; } - switch (pSchema->pSchema[colIndex].type) { + SSchema* s = getColumnModelSchema(pSchema, colIndex); + + switch (s->type) { case TSDB_DATA_TYPE_INT: printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int32_t)); break; @@ -336,9 +319,9 @@ void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, tOrderIdx *p break; case TSDB_DATA_TYPE_NCHAR: { char *data = GET_TAG_VAL_POINTER(pMeterInfo, colIndex, pSchema, char); + char buffer[512] = {0}; - - taosUcs4ToMbs(data, pSchema->pSchema[colIndex].bytes, buffer); + taosUcs4ToMbs(data, s->bytes, buffer); printf("%s, ", buffer); break; } @@ -370,16 +353,16 @@ static void UNUSED_FUNC tSidSetDisplay(tSidSet *pSets) { printf("the %d-th subgroup: \n", i + 1); for (int32_t j = s; j < e; ++j) { - tTagsPrints(pSets->pSids[j], pSets->pTagSchema, &pSets->orderIdx); + tTagsPrints(pSets->pSids[j], pSets->pColumnModel, &pSets->orderIdx); } } } void tSidSetSort(tSidSet *pSets) { pTrace("number of meters in sort: %d", pSets->numOfSids); - tOrderIdx *pOrderIdx = &pSets->orderIdx; + SColumnOrderInfo *pOrderIdx = &pSets->orderIdx; - if (pOrderIdx->numOfOrderedCols == 0 || pSets->numOfSids <= 1 || pSets->pTagSchema == NULL) { // no group by tags clause + if (pOrderIdx->numOfCols == 0 || pSets->numOfSids <= 1 || pSets->pColumnModel == NULL) { // no group by tags clause pSets->numOfSubSet = 1; pSets->starterPos = (int32_t *)malloc(sizeof(int32_t) * (pSets->numOfSubSet + 1)); pSets->starterPos[0] = 0; @@ -390,11 +373,11 @@ void tSidSetSort(tSidSet *pSets) { #endif } else { tOrderDescriptor *descriptor = - (tOrderDescriptor *)calloc(1, sizeof(tOrderDescriptor) + sizeof(int16_t) * pSets->orderIdx.numOfOrderedCols); - descriptor->pTagSchema = pSets->pTagSchema; + (tOrderDescriptor *)calloc(1, sizeof(tOrderDescriptor) + sizeof(int16_t) * pSets->orderIdx.numOfCols); + descriptor->pColumnModel = pSets->pColumnModel; descriptor->orderIdx = pSets->orderIdx; - memcpy(descriptor->orderIdx.pData, pOrderIdx->pData, sizeof(int16_t) * pSets->orderIdx.numOfOrderedCols); + memcpy(descriptor->orderIdx.pData, pOrderIdx->pData, sizeof(int16_t) * pSets->orderIdx.numOfCols); tQSortEx((void **)pSets->pSids, POINTER_BYTES, 0, pSets->numOfSids - 1, descriptor, meterSidComparator); pSets->starterPos = diff --git a/src/system/detail/src/vnodeUtil.c b/src/system/detail/src/vnodeUtil.c index c9d7ca0cf42d099c3e30906992084841950c6ef1..6f25d3a8b12c5cf730b8cbfadfaaa9e2837c5a63 100644 --- a/src/system/detail/src/vnodeUtil.c +++ b/src/system/detail/src/vnodeUtil.c @@ -247,7 +247,7 @@ SSqlFunctionExpr* vnodeCreateSqlFunctionExpr(SQueryMeterMsg* pQueryMsg, int32_t* SColIndexEx* pColumnIndexExInfo = &pExprs[i].pBase.colInfo; - // tag column schema is kept in pQueryMsg->pTagSchema + // tag column schema is kept in pQueryMsg->pColumnModel if (TSDB_COL_IS_TAG(pColumnIndexExInfo->flag)) { if (pColumnIndexExInfo->colIdx >= pQueryMsg->numOfTagsCols) { *code = TSDB_CODE_INVALID_QUERY_MSG; diff --git a/src/util/src/hash.c b/src/util/src/hash.c index 506829368812325d4c77492ea9411d9952944034..99643c92cc68d2964db90fdbf259b37c174bb5f9 100644 --- a/src/util/src/hash.c +++ b/src/util/src/hash.c @@ -346,6 +346,14 @@ static void doAddToHashTable(HashObj *pObj, SHashNode *pNode) { // pTrace("key:%s %p add to hash table", key, pNode); } +int32_t taosNumElemsInHashTable(HashObj *pObj) { + if (pObj == NULL) { + return 0; + } + + return pObj->size; +} + /** * add data node into hash table * @param pObj hash object @@ -392,7 +400,7 @@ int32_t taosAddToHashTable(HashObj *pObj, const char *key, uint32_t keyLen, void return 0; } -char *taosGetDataFromHash(HashObj *pObj, const char *key, uint32_t keyLen) { +char *taosGetDataFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen) { if (pObj->multithreadSafe) { __rd_lock(&pObj->lock); } diff --git a/src/util/src/textbuffer.c b/src/util/src/textbuffer.c index e1c571f4c28a3b40ba7f7d2cdd33be5e4d965946..860de6782be97ce83032cf60d3d2f303af18c795 100644 --- a/src/util/src/textbuffer.c +++ b/src/util/src/textbuffer.c @@ -23,7 +23,7 @@ #include "tutil.h" #define COLMODEL_GET_VAL(data, schema, allrow, rowId, colId) \ - (data + (schema)->colOffset[colId] * (allrow) + (rowId) * (schema)->pFields[colId].bytes) + (data + (schema)->pFields[colId].offset * (allrow) + (rowId) * (schema)->pFields[colId].field.bytes) int32_t tmpFileSerialNum = 0; @@ -49,46 +49,50 @@ void getTmpfilePath(const char *fileNamePrefix, char *dstPath) { } /* - * tColModel is deeply copy + * SColumnModel is deeply copy */ -void tExtMemBufferCreate(tExtMemBuffer **pMemBuffer, int32_t nBufferSize, int32_t elemSize, const char *tmpDataFilePath, - tColModel *pModel) { - (*pMemBuffer) = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer)); +tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel) { + tExtMemBuffer* pMemBuffer = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer)); - (*pMemBuffer)->nPageSize = DEFAULT_PAGE_SIZE; - (*pMemBuffer)->nMaxSizeInPages = ALIGN8(nBufferSize) / (*pMemBuffer)->nPageSize; - (*pMemBuffer)->nElemSize = elemSize; + pMemBuffer->pageSize = DEFAULT_PAGE_SIZE; + pMemBuffer->inMemCapacity = ALIGN8(inMemSize) / pMemBuffer->pageSize; + pMemBuffer->nElemSize = elemSize; - (*pMemBuffer)->numOfElemsPerPage = ((*pMemBuffer)->nPageSize - sizeof(tFilePage)) / (*pMemBuffer)->nElemSize; + pMemBuffer->numOfElemsPerPage = (pMemBuffer->pageSize - sizeof(tFilePage)) / pMemBuffer->nElemSize; + + char name[MAX_TMPFILE_PATH_LENGTH] = {0}; + getTmpfilePath("extbuf", name); + + pMemBuffer->path = strdup(name); + pTrace("create tmp file:%s", pMemBuffer->path); + + SFileInfo *pFMeta = &pMemBuffer->fileMeta; - strcpy((*pMemBuffer)->dataFilePath, tmpDataFilePath); - - tFileMeta *pFMeta = &(*pMemBuffer)->fileMeta; - - pFMeta->numOfElemsInFile = 0; - pFMeta->nFileSize = 0; - pFMeta->nPageSize = DEFAULT_PAGE_SIZE; + pFMeta->pageSize = DEFAULT_PAGE_SIZE; pFMeta->flushoutData.nAllocSize = 4; pFMeta->flushoutData.nLength = 0; pFMeta->flushoutData.pFlushoutInfo = (tFlushoutInfo *)calloc(4, sizeof(tFlushoutInfo)); - (*pMemBuffer)->pColModel = tColModelCreate(pModel->pFields, pModel->numOfCols, (*pMemBuffer)->numOfElemsPerPage); + pMemBuffer->pColumnModel = cloneColumnModel(pModel); + pMemBuffer->pColumnModel->capacity = pMemBuffer->numOfElemsPerPage; + + return pMemBuffer; } -void tExtMemBufferDestroy(tExtMemBuffer **pMemBuffer) { - if ((*pMemBuffer) == NULL) { - return; +void* destoryExtMemBuffer(tExtMemBuffer *pMemBuffer) { + if (pMemBuffer == NULL) { + return NULL; } // release flush out info link - tFileMeta *pFileMeta = &(*pMemBuffer)->fileMeta; + SFileInfo *pFileMeta = &pMemBuffer->fileMeta; if (pFileMeta->flushoutData.nAllocSize != 0 && pFileMeta->flushoutData.pFlushoutInfo != NULL) { tfree(pFileMeta->flushoutData.pFlushoutInfo); } // release all in-memory buffer pages - tFilePagesItem *pFilePages = (*pMemBuffer)->pHead; + tFilePagesItem *pFilePages = pMemBuffer->pHead; while (pFilePages != NULL) { tFilePagesItem *pTmp = pFilePages; pFilePages = pFilePages->pNext; @@ -96,24 +100,27 @@ void tExtMemBufferDestroy(tExtMemBuffer **pMemBuffer) { } // close temp file - if ((*pMemBuffer)->dataFile != 0) { - if (fclose((*pMemBuffer)->dataFile) != 0) { - pError("failed to close file:%s, reason:%s", (*pMemBuffer)->dataFilePath, strerror(errno)); + if (pMemBuffer->file != 0) { + if (fclose(pMemBuffer->file) != 0) { + pError("failed to close file:%s, reason:%s", pMemBuffer->path, strerror(errno)); } - pTrace("remove temp file:%s for external buffer", (*pMemBuffer)->dataFilePath); - unlink((*pMemBuffer)->dataFilePath); + pTrace("remove temp file:%s for external buffer", pMemBuffer->path); + unlink(pMemBuffer->path); } - tColModelDestroy((*pMemBuffer)->pColModel); + destroyColumnModel(pMemBuffer->pColumnModel); - tfree(*pMemBuffer); + tfree(pMemBuffer->path); + tfree(pMemBuffer); + + return NULL; } /* * alloc more memory for flush out info entries. */ -static bool allocFlushoutInfoEntries(tFileMeta *pFileMeta) { +static bool allocFlushoutInfoEntries(SFileInfo *pFileMeta) { pFileMeta->flushoutData.nAllocSize = pFileMeta->flushoutData.nAllocSize << 1; tFlushoutInfo *tmp = (tFlushoutInfo *)realloc(pFileMeta->flushoutData.pFlushoutInfo, @@ -127,12 +134,12 @@ static bool allocFlushoutInfoEntries(tFileMeta *pFileMeta) { return true; } -bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { - if (pMemBuffer->numOfPagesInMem > 0 && pMemBuffer->numOfPagesInMem == pMemBuffer->nMaxSizeInPages) { - /* - * the in-mem buffer is full. - * To flush data to disk to accommodate more data - */ +static bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { + /* + * the in-mem buffer is full. + * To flush data to disk to accommodate more data + */ + if (pMemBuffer->numOfInMemPages > 0 && pMemBuffer->numOfInMemPages == pMemBuffer->inMemCapacity) { if (!tExtMemBufferFlush(pMemBuffer)) { return false; } @@ -140,12 +147,12 @@ bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { /* * We do not recycle the file page structure. And in flush data operations, all - * filepage that are full of data are destroyed after data being flushed to disk. + * file page that are full of data are destroyed after data being flushed to disk. * * The memory buffer pages may be recycle in order to avoid unnecessary memory * allocation later. */ - tFilePagesItem *item = (tFilePagesItem *)calloc(1, pMemBuffer->nPageSize + sizeof(tFilePagesItem)); + tFilePagesItem *item = (tFilePagesItem *)calloc(1, pMemBuffer->pageSize + sizeof(tFilePagesItem)); if (item == NULL) { return false; } @@ -161,8 +168,7 @@ bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { pMemBuffer->pHead = item; } - pMemBuffer->numOfPagesInMem += 1; - + pMemBuffer->numOfInMemPages += 1; return true; } @@ -171,7 +177,7 @@ bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { */ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRows) { if (numOfRows == 0) { - return pMemBuffer->numOfPagesInMem; + return pMemBuffer->numOfInMemPages; } tFilePagesItem *pLast = pMemBuffer->pTail; @@ -183,24 +189,23 @@ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRow pLast = pMemBuffer->pTail; } - if (pLast->item.numOfElems + numOfRows <= pMemBuffer->numOfElemsPerPage) { - // enough space for records - tColModelAppend(pMemBuffer->pColModel, &pLast->item, data, 0, numOfRows, numOfRows); + if (pLast->item.numOfElems + numOfRows <= pMemBuffer->numOfElemsPerPage) { // enough space for records + tColModelAppend(pMemBuffer->pColumnModel, &pLast->item, data, 0, numOfRows, numOfRows); + pMemBuffer->numOfElemsInBuffer += numOfRows; - pMemBuffer->numOfAllElems += numOfRows; + pMemBuffer->numOfTotalElems += numOfRows; } else { int32_t numOfRemainEntries = pMemBuffer->numOfElemsPerPage - pLast->item.numOfElems; - tColModelAppend(pMemBuffer->pColModel, &pLast->item, data, 0, numOfRemainEntries, numOfRows); + tColModelAppend(pMemBuffer->pColumnModel, &pLast->item, data, 0, numOfRemainEntries, numOfRows); pMemBuffer->numOfElemsInBuffer += numOfRemainEntries; - pMemBuffer->numOfAllElems += numOfRemainEntries; + pMemBuffer->numOfTotalElems += numOfRemainEntries; int32_t hasWritten = numOfRemainEntries; int32_t remain = numOfRows - numOfRemainEntries; while (remain > 0) { - if (!tExtMemBufferAlloc(pMemBuffer)) { - // failed to allocate memory buffer + if (!tExtMemBufferAlloc(pMemBuffer)) { // failed to allocate memory buffer return -1; } @@ -211,10 +216,10 @@ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRow numOfWriteElems = remain; } - pMemBuffer->numOfAllElems += numOfWriteElems; + pMemBuffer->numOfTotalElems += numOfWriteElems; pLast = pMemBuffer->pTail; - tColModelAppend(pMemBuffer->pColModel, &pLast->item, data, hasWritten, numOfWriteElems, numOfRows); + tColModelAppend(pMemBuffer->pColumnModel, &pLast->item, data, hasWritten, numOfWriteElems, numOfRows); remain -= numOfWriteElems; pMemBuffer->numOfElemsInBuffer += numOfWriteElems; @@ -222,11 +227,11 @@ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRow } } - return pMemBuffer->numOfPagesInMem; + return pMemBuffer->numOfInMemPages; } static bool tExtMemBufferUpdateFlushoutInfo(tExtMemBuffer *pMemBuffer) { - tFileMeta *pFileMeta = &pMemBuffer->fileMeta; + SFileInfo *pFileMeta = &pMemBuffer->fileMeta; if (pMemBuffer->flushModel == MULTIPLE_APPEND_MODEL) { if (pFileMeta->flushoutData.nLength == pFileMeta->flushoutData.nAllocSize && !allocFlushoutInfoEntries(pFileMeta)) { @@ -243,46 +248,47 @@ static bool tExtMemBufferUpdateFlushoutInfo(tExtMemBuffer *pMemBuffer) { } // only the page still in buffer is flushed out to disk - pFlushoutInfo->numOfPages = pMemBuffer->numOfPagesInMem; + pFlushoutInfo->numOfPages = pMemBuffer->numOfInMemPages; pFileMeta->flushoutData.nLength += 1; } else { - // always update the first flushout array in single_flush_model + // always update the first flush out array in single_flush_model pFileMeta->flushoutData.nLength = 1; tFlushoutInfo *pFlushoutInfo = &pFileMeta->flushoutData.pFlushoutInfo[0]; - pFlushoutInfo->numOfPages += pMemBuffer->numOfPagesInMem; + pFlushoutInfo->numOfPages += pMemBuffer->numOfInMemPages; } return true; } static void tExtMemBufferClearFlushoutInfo(tExtMemBuffer *pMemBuffer) { - tFileMeta *pFileMeta = &pMemBuffer->fileMeta; + SFileInfo *pFileMeta = &pMemBuffer->fileMeta; pFileMeta->flushoutData.nLength = 0; memset(pFileMeta->flushoutData.pFlushoutInfo, 0, sizeof(tFlushoutInfo) * pFileMeta->flushoutData.nAllocSize); } bool tExtMemBufferFlush(tExtMemBuffer *pMemBuffer) { - if (pMemBuffer->numOfAllElems == 0) { + if (pMemBuffer->numOfTotalElems == 0) { return true; } - if (pMemBuffer->dataFile == NULL) { - if ((pMemBuffer->dataFile = fopen(pMemBuffer->dataFilePath, "wb+")) == NULL) { + if (pMemBuffer->file == NULL) { + if ((pMemBuffer->file = fopen(pMemBuffer->path, "wb+")) == NULL) { return false; } } + /* all data has been flushed to disk, ignore flush operation */ if (pMemBuffer->numOfElemsInBuffer == 0) { - /* all data has been flushed to disk, ignore flush operation */ return true; } - bool ret = true; + bool ret = true; + tFilePagesItem *first = pMemBuffer->pHead; while (first != NULL) { - size_t retVal = fwrite((char *)&(first->item), pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); + size_t retVal = fwrite((char *)&(first->item), pMemBuffer->pageSize, 1, pMemBuffer->file); if (retVal <= 0) { // failed to write to buffer, may be not enough space ret = false; } @@ -296,12 +302,12 @@ bool tExtMemBufferFlush(tExtMemBuffer *pMemBuffer) { tfree(ptmp); // release all data in memory buffer } - fflush(pMemBuffer->dataFile); // flush to disk + fflush(pMemBuffer->file); // flush to disk tExtMemBufferUpdateFlushoutInfo(pMemBuffer); pMemBuffer->numOfElemsInBuffer = 0; - pMemBuffer->numOfPagesInMem = 0; + pMemBuffer->numOfInMemPages = 0; pMemBuffer->pHead = NULL; pMemBuffer->pTail = NULL; @@ -309,11 +315,11 @@ bool tExtMemBufferFlush(tExtMemBuffer *pMemBuffer) { } void tExtMemBufferClear(tExtMemBuffer *pMemBuffer) { - if (pMemBuffer == NULL || pMemBuffer->numOfAllElems == 0) return; + if (pMemBuffer == NULL || pMemBuffer->numOfTotalElems == 0) { + return; + } - /* - * release all data in memory buffer - */ + //release all data in memory buffer tFilePagesItem *first = pMemBuffer->pHead; while (first != NULL) { tFilePagesItem *ptmp = first; @@ -325,15 +331,16 @@ void tExtMemBufferClear(tExtMemBuffer *pMemBuffer) { pMemBuffer->fileMeta.nFileSize = 0; pMemBuffer->numOfElemsInBuffer = 0; - pMemBuffer->numOfPagesInMem = 0; + pMemBuffer->numOfInMemPages = 0; + pMemBuffer->pHead = NULL; pMemBuffer->pTail = NULL; tExtMemBufferClearFlushoutInfo(pMemBuffer); - if (pMemBuffer->dataFile != NULL) { - // reset the write pointer to the header - fseek(pMemBuffer->dataFile, 0, SEEK_SET); + // reset the write pointer to the header + if (pMemBuffer->file != NULL) { + fseek(pMemBuffer->file, 0, SEEK_SET); } } @@ -347,8 +354,8 @@ bool tExtMemBufferLoadData(tExtMemBuffer *pMemBuffer, tFilePage *pFilePage, int3 return false; } - size_t ret = fseek(pMemBuffer->dataFile, (pInfo->startPageId + pageIdx) * pMemBuffer->nPageSize, SEEK_SET); - ret = fread(pFilePage, pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); + size_t ret = fseek(pMemBuffer->file, (pInfo->startPageId + pageIdx) * pMemBuffer->pageSize, SEEK_SET); + ret = fread(pFilePage, pMemBuffer->pageSize, 1, pMemBuffer->file); return (ret > 0); } @@ -356,474 +363,11 @@ bool tExtMemBufferLoadData(tExtMemBuffer *pMemBuffer, tFilePage *pFilePage, int3 bool tExtMemBufferIsAllDataInMem(tExtMemBuffer *pMemBuffer) { return (pMemBuffer->fileMeta.nFileSize == 0); } ////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TODO safty check in result -void tBucketBigIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { - int64_t v = *(int64_t *)value; - - if (pBucket->nRange.i64MaxVal == INT64_MIN) { - if (v >= 0) { - *segIdx = ((v >> (64 - 9)) >> 6) + 8; - *slotIdx = (v >> (64 - 9)) & 0x3F; - } else { // v<0 - *segIdx = ((-v) >> (64 - 9)) >> 6; - *slotIdx = ((-v) >> (64 - 9)) & 0x3F; - *segIdx = 7 - (*segIdx); - } - } else { - // todo hash for bigint and float and double - int64_t span = pBucket->nRange.i64MaxVal - pBucket->nRange.i64MinVal; - if (span < pBucket->nTotalSlots) { - int32_t delta = (int32_t)(v - pBucket->nRange.i64MinVal); - *segIdx = delta / pBucket->nSlotsOfSeg; - *slotIdx = delta % pBucket->nSlotsOfSeg; - } else { - double x = (double)span / pBucket->nTotalSlots; - double posx = (v - pBucket->nRange.i64MinVal) / x; - if (v == pBucket->nRange.i64MaxVal) { - posx -= 1; - } - - *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; - *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; - } - } -} - -// todo refactor to more generic -void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { - int32_t v = *(int32_t *)value; - - if (pBucket->nRange.iMaxVal == INT32_MIN) { - /* - * taking negative integer into consideration, - * there is only half of pBucket->segs available for non-negative integer - */ - // int32_t numOfSlots = pBucket->nTotalSlots>>1; - // int32_t bits = bitsOfNumber(numOfSlots)-1; - - if (v >= 0) { - *segIdx = ((v >> (32 - 9)) >> 6) + 8; - *slotIdx = (v >> (32 - 9)) & 0x3F; - } else { // v<0 - *segIdx = ((-v) >> (32 - 9)) >> 6; - *slotIdx = ((-v) >> (32 - 9)) & 0x3F; - *segIdx = 7 - (*segIdx); - } - } else { - // divide a range of [iMinVal, iMaxVal] into 1024 buckets - int32_t span = pBucket->nRange.iMaxVal - pBucket->nRange.iMinVal; - if (span < pBucket->nTotalSlots) { - int32_t delta = v - pBucket->nRange.iMinVal; - *segIdx = delta / pBucket->nSlotsOfSeg; - *slotIdx = delta % pBucket->nSlotsOfSeg; - } else { - double x = (double)span / pBucket->nTotalSlots; - double posx = (v - pBucket->nRange.iMinVal) / x; - if (v == pBucket->nRange.iMaxVal) { - posx -= 1; - } - *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; - *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; - } - } -} - -void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { - //double v = *(double *)value; - double v = GET_DOUBLE_VAL(value); - - if (pBucket->nRange.dMinVal == DBL_MAX) { - /* - * taking negative integer into consideration, - * there is only half of pBucket->segs available for non-negative integer - */ - double x = DBL_MAX / (pBucket->nTotalSlots >> 1); - double posx = (v + DBL_MAX) / x; - *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; - *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; - } else { - // divide a range of [dMinVal, dMaxVal] into 1024 buckets - double span = pBucket->nRange.dMaxVal - pBucket->nRange.dMinVal; - if (span < pBucket->nTotalSlots) { - int32_t delta = (int32_t)(v - pBucket->nRange.dMinVal); - *segIdx = delta / pBucket->nSlotsOfSeg; - *slotIdx = delta % pBucket->nSlotsOfSeg; - } else { - double x = span / pBucket->nTotalSlots; - double posx = (v - pBucket->nRange.dMinVal) / x; - if (v == pBucket->nRange.dMaxVal) { - posx -= 1; - } - *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; - *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; - } - - if (*segIdx < 0 || *segIdx > 16 || *slotIdx < 0 || *slotIdx > 64) { - pError("error in hash process. segment is: %d, slot id is: %d\n", *segIdx, *slotIdx); - } - } -} - -tMemBucket* tMemBucketCreate(int32_t totalSlots, int32_t nBufferSize, int16_t nElemSize, int16_t dataType, tOrderDescriptor *pDesc) { - tMemBucket* pBucket = (tMemBucket *)malloc(sizeof(tMemBucket)); - - pBucket->nTotalSlots = totalSlots; - pBucket->nSlotsOfSeg = 1 << 6; // 64 Segments, 16 slots each seg. - pBucket->dataType = dataType; - pBucket->nElemSize = nElemSize; - pBucket->nPageSize = DEFAULT_PAGE_SIZE; - - pBucket->numOfElems = 0; - pBucket->numOfSegs = pBucket->nTotalSlots / pBucket->nSlotsOfSeg; - - pBucket->nTotalBufferSize = nBufferSize; - - pBucket->maxElemsCapacity = pBucket->nTotalBufferSize / pBucket->nElemSize; - - pBucket->numOfTotalPages = pBucket->nTotalBufferSize / pBucket->nPageSize; - pBucket->numOfAvailPages = pBucket->numOfTotalPages; - - pBucket->pOrderDesc = pDesc; - - switch (pBucket->dataType) { - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: { - pBucket->nRange.iMinVal = INT32_MAX; - pBucket->nRange.iMaxVal = INT32_MIN; - pBucket->HashFunc = tBucketIntHash; - break; - }; - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_FLOAT: { - pBucket->nRange.dMinVal = DBL_MAX; - pBucket->nRange.dMaxVal = -DBL_MAX; - pBucket->HashFunc = tBucketDoubleHash; - break; - }; - case TSDB_DATA_TYPE_BIGINT: { - pBucket->nRange.i64MinVal = INT64_MAX; - pBucket->nRange.i64MaxVal = INT64_MIN; - pBucket->HashFunc = tBucketBigIntHash; - break; - }; - default: { - pError("MemBucket:%p,not support data type %d,failed", *pBucket, pBucket->dataType); - tfree(pBucket); - return NULL; - } - } - - if (pDesc->pSchema->numOfCols != 1 || pDesc->pSchema->colOffset[0] != 0) { - pError("MemBucket:%p,only consecutive data is allowed,invalid numOfCols:%d or offset:%d", - pBucket, pDesc->pSchema->numOfCols, pDesc->pSchema->colOffset[0]); - tfree(pBucket); - return NULL; - } - - if (pDesc->pSchema->pFields[0].type != dataType) { - pError("MemBucket:%p,data type is not consistent,%d in schema, %d in param", pBucket, - pDesc->pSchema->pFields[0].type, dataType); - tfree(pBucket); - return NULL; - } - - if (pBucket->numOfTotalPages < pBucket->nTotalSlots) { - pWarn("MemBucket:%p,total buffer pages %d are not enough for all slots", pBucket, pBucket->numOfTotalPages); - } - - pBucket->pSegs = (tMemBucketSegment *)malloc(pBucket->numOfSegs * sizeof(tMemBucketSegment)); - - for (int32_t i = 0; i < pBucket->numOfSegs; ++i) { - pBucket->pSegs[i].numOfSlots = pBucket->nSlotsOfSeg; - pBucket->pSegs[i].pBuffer = NULL; - pBucket->pSegs[i].pBoundingEntries = NULL; - } - - pTrace("MemBucket:%p,created,buffer size:%d,elem size:%d", pBucket, pBucket->numOfTotalPages * DEFAULT_PAGE_SIZE, - pBucket->nElemSize); - - return pBucket; -} - -void tMemBucketDestroy(tMemBucket *pBucket) { - if (pBucket == NULL) { - return; - } - - if (pBucket->pSegs) { - for (int32_t i = 0; i < pBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &(pBucket->pSegs[i]); - tfree(pSeg->pBoundingEntries); - - if (pSeg->pBuffer == NULL || pSeg->numOfSlots == 0) { - continue; - } - - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - if (pSeg->pBuffer[j] != NULL) { - tExtMemBufferDestroy(&pSeg->pBuffer[j]); - } - } - tfree(pSeg->pBuffer); - } - } - - tfree(pBucket->pSegs); - tfree(pBucket); -} - -/* - * find the slots which accounts for largest proportion of total in-memory buffer - */ -static void tBucketGetMaxMemSlot(tMemBucket *pBucket, int16_t *segIdx, int16_t *slotIdx) { - *segIdx = -1; - *slotIdx = -1; - - int32_t val = 0; - for (int32_t k = 0; k < pBucket->numOfSegs; ++k) { - tMemBucketSegment *pSeg = &pBucket->pSegs[k]; - for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { - if (pSeg->pBuffer == NULL || pSeg->pBuffer[i] == NULL) { - continue; - } - - if (val < pSeg->pBuffer[i]->numOfPagesInMem) { - val = pSeg->pBuffer[i]->numOfPagesInMem; - *segIdx = k; - *slotIdx = i; - } - } - } -} - -static void resetBoundingBox(tMemBucketSegment *pSeg, int32_t type) { - switch (type) { - case TSDB_DATA_TYPE_BIGINT: { - for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { - pSeg->pBoundingEntries[i].i64MaxVal = INT64_MIN; - pSeg->pBoundingEntries[i].i64MinVal = INT64_MAX; - } - break; - }; - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: { - for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { - pSeg->pBoundingEntries[i].iMaxVal = INT32_MIN; - pSeg->pBoundingEntries[i].iMinVal = INT32_MAX; - } - break; - }; - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_FLOAT: { - for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { - pSeg->pBoundingEntries[i].dMaxVal = -DBL_MAX; - pSeg->pBoundingEntries[i].dMinVal = DBL_MAX; - } - break; - } - } -} - -void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) { - switch (dataType) { - case TSDB_DATA_TYPE_INT: { - int32_t val = *(int32_t *)data; - if (r->iMinVal > val) { - r->iMinVal = val; - } - - if (r->iMaxVal < val) { - r->iMaxVal = val; - } - break; - }; - case TSDB_DATA_TYPE_BIGINT: { - int64_t val = *(int64_t *)data; - if (r->i64MinVal > val) { - r->i64MinVal = val; - } - - if (r->i64MaxVal < val) { - r->i64MaxVal = val; - } - break; - }; - case TSDB_DATA_TYPE_SMALLINT: { - int32_t val = *(int16_t *)data; - if (r->iMinVal > val) { - r->iMinVal = val; - } - - if (r->iMaxVal < val) { - r->iMaxVal = val; - } - break; - }; - case TSDB_DATA_TYPE_TINYINT: { - int32_t val = *(int8_t *)data; - if (r->iMinVal > val) { - r->iMinVal = val; - } - - if (r->iMaxVal < val) { - r->iMaxVal = val; - } - - break; - }; - case TSDB_DATA_TYPE_DOUBLE: { - //double val = *(double *)data; - double val = GET_DOUBLE_VAL(data); - if (r->dMinVal > val) { - r->dMinVal = val; - } - - if (r->dMaxVal < val) { - r->dMaxVal = val; - } - break; - }; - case TSDB_DATA_TYPE_FLOAT: { - //double val = *(float *)data; - double val = GET_FLOAT_VAL(data); - - if (r->dMinVal > val) { - r->dMinVal = val; - } - - if (r->dMaxVal < val) { - r->dMaxVal = val; - } - break; - }; - default: { assert(false); } - } -} - -/* - * in memory bucket, we only accept the simple data consecutive put in a row/column - * no column-model in this case. - */ -void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) { - pBucket->numOfElems += numOfRows; - int16_t segIdx = 0, slotIdx = 0; - - for (int32_t i = 0; i < numOfRows; ++i) { - char *d = (char *)data + i * tDataTypeDesc[pBucket->dataType].nSize; - - switch (pBucket->dataType) { - case TSDB_DATA_TYPE_SMALLINT: { - int32_t val = *(int16_t *)d; - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_TINYINT: { - int32_t val = *(int8_t *)d; - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_INT: { - int32_t val = *(int32_t *)d; - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_BIGINT: { - int64_t val = *(int64_t *)d; - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_DOUBLE: { - //double val = *(double *)d; - double val = GET_DOUBLE_VAL(d); - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_FLOAT: { - //double val = *(float *)d; - double val = GET_FLOAT_VAL(d); - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - } - - tMemBucketSegment *pSeg = &pBucket->pSegs[segIdx]; - if (pSeg->pBoundingEntries == NULL) { - pSeg->pBoundingEntries = (MinMaxEntry *)malloc(sizeof(MinMaxEntry) * pBucket->nSlotsOfSeg); - resetBoundingBox(pSeg, pBucket->dataType); - } - - if (pSeg->pBuffer == NULL) { - pSeg->pBuffer = (tExtMemBuffer **)calloc(pBucket->nSlotsOfSeg, sizeof(void *)); - } - - if (pSeg->pBuffer[slotIdx] == NULL) { - char name[MAX_TMPFILE_PATH_LENGTH] = {0}; - getTmpfilePath("tb_ex_bk_%lld_%lld_%d_%d", name); - - tExtMemBufferCreate(&pSeg->pBuffer[slotIdx], pBucket->numOfTotalPages * pBucket->nPageSize, pBucket->nElemSize, - name, pBucket->pOrderDesc->pSchema); - pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL; - pBucket->pOrderDesc->pSchema->maxCapacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage; - } - - tMemBucketUpdateBoundingBox(&pSeg->pBoundingEntries[slotIdx], d, pBucket->dataType); - - // ensure available memory pages to allocate - int16_t cseg = 0, cslot = 0; - if (pBucket->numOfAvailPages == 0) { - pTrace("MemBucket:%p,max avail size:%d, no avail memory pages,", pBucket, pBucket->numOfTotalPages); - - tBucketGetMaxMemSlot(pBucket, &cseg, &cslot); - if (cseg == -1 || cslot == -1) { - pError("MemBucket:%p,failed to find appropriated avail buffer", pBucket); - return; - } - - if (cseg != segIdx || cslot != slotIdx) { - pBucket->numOfAvailPages += pBucket->pSegs[cseg].pBuffer[cslot]->numOfPagesInMem; - - int32_t avail = pBucket->pSegs[cseg].pBuffer[cslot]->numOfPagesInMem; - UNUSED(avail); - tExtMemBufferFlush(pBucket->pSegs[cseg].pBuffer[cslot]); - - pTrace("MemBucket:%p,seg:%d,slot:%d flushed to disk,new avail pages:%d", pBucket, cseg, cslot, - pBucket->numOfAvailPages); - } else { - pTrace("MemBucket:%p,failed to choose slot to flush to disk seg:%d,slot:%d", - pBucket, cseg, cslot); - } - } - int16_t consumedPgs = pSeg->pBuffer[slotIdx]->numOfPagesInMem; - - int16_t newPgs = tExtMemBufferPut(pSeg->pBuffer[slotIdx], d, 1); - /* - * trigger 1. page re-allocation, to reduce the available pages - * 2. page flushout, to increase the available pages - */ - pBucket->numOfAvailPages += (consumedPgs - newPgs); - } -} - -void releaseBucket(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { - if (segIdx < 0 || segIdx > pMemBucket->numOfSegs || slotIdx < 0) { - return; - } - - tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; - if (slotIdx < 0 || slotIdx >= pSeg->numOfSlots || pSeg->pBuffer[slotIdx] == NULL) { - return; - } - - tExtMemBufferDestroy(&pSeg->pBuffer[slotIdx]); -} - static FORCE_INLINE int32_t primaryKeyComparator(int64_t f1, int64_t f2, int32_t colIdx, int32_t tsOrder) { if (f1 == f2) { return 0; } - + if (colIdx == 0 && tsOrder == TSQL_SO_DESC) { // primary column desc order return (f1 < f2) ? 1 : -1; } else { // asc @@ -831,7 +375,6 @@ static FORCE_INLINE int32_t primaryKeyComparator(int64_t f1, int64_t f2, int32_t } } -// todo refactor static FORCE_INLINE int32_t columnValueAscendingComparator(char *f1, char *f2, int32_t type, int32_t bytes) { switch (type) { case TSDB_DATA_TYPE_INT: { @@ -902,7 +445,7 @@ static FORCE_INLINE int32_t columnValueAscendingComparator(char *f1, char *f2, i return (ret < 0) ? -1 : 1; }; } - + return 0; } @@ -910,14 +453,14 @@ int32_t compare_a(tOrderDescriptor *pDescriptor, int32_t numOfRows1, int32_t s1, int32_t s2, char *data2) { assert(numOfRows1 == numOfRows2); - int32_t cmpCnt = pDescriptor->orderIdx.numOfOrderedCols; + int32_t cmpCnt = pDescriptor->orderIdx.numOfCols; for (int32_t i = 0; i < cmpCnt; ++i) { int32_t colIdx = pDescriptor->orderIdx.pData[i]; - char *f1 = COLMODEL_GET_VAL(data1, pDescriptor->pSchema, numOfRows1, s1, colIdx); - char *f2 = COLMODEL_GET_VAL(data2, pDescriptor->pSchema, numOfRows2, s2, colIdx); + char *f1 = COLMODEL_GET_VAL(data1, pDescriptor->pColumnModel, numOfRows1, s1, colIdx); + char *f2 = COLMODEL_GET_VAL(data2, pDescriptor->pColumnModel, numOfRows2, s2, colIdx); - if (pDescriptor->pSchema->pFields[colIdx].type == TSDB_DATA_TYPE_TIMESTAMP) { + if (pDescriptor->pColumnModel->pFields[colIdx].field.type == TSDB_DATA_TYPE_TIMESTAMP) { int32_t ret = primaryKeyComparator(*(int64_t *)f1, *(int64_t *)f2, colIdx, pDescriptor->tsOrder); if (ret == 0) { continue; @@ -925,7 +468,7 @@ int32_t compare_a(tOrderDescriptor *pDescriptor, int32_t numOfRows1, int32_t s1, return ret; } } else { - SSchema *pSchema = &pDescriptor->pSchema->pFields[colIdx]; + SSchema *pSchema = &pDescriptor->pColumnModel->pFields[colIdx]; int32_t ret = columnValueAscendingComparator(f1, f2, pSchema->type, pSchema->bytes); if (ret == 0) { continue; @@ -942,14 +485,14 @@ int32_t compare_d(tOrderDescriptor *pDescriptor, int32_t numOfRows1, int32_t s1, int32_t s2, char *data2) { assert(numOfRows1 == numOfRows2); - int32_t cmpCnt = pDescriptor->orderIdx.numOfOrderedCols; + int32_t cmpCnt = pDescriptor->orderIdx.numOfCols; for (int32_t i = 0; i < cmpCnt; ++i) { int32_t colIdx = pDescriptor->orderIdx.pData[i]; - char *f1 = COLMODEL_GET_VAL(data1, pDescriptor->pSchema, numOfRows1, s1, colIdx); - char *f2 = COLMODEL_GET_VAL(data2, pDescriptor->pSchema, numOfRows2, s2, colIdx); + char *f1 = COLMODEL_GET_VAL(data1, pDescriptor->pColumnModel, numOfRows1, s1, colIdx); + char *f2 = COLMODEL_GET_VAL(data2, pDescriptor->pColumnModel, numOfRows2, s2, colIdx); - if (pDescriptor->pSchema->pFields[colIdx].type == TSDB_DATA_TYPE_TIMESTAMP) { + if (pDescriptor->pColumnModel->pFields[colIdx].field.type == TSDB_DATA_TYPE_TIMESTAMP) { int32_t ret = primaryKeyComparator(*(int64_t *)f1, *(int64_t *)f2, colIdx, pDescriptor->tsOrder); if (ret == 0) { continue; @@ -957,7 +500,7 @@ int32_t compare_d(tOrderDescriptor *pDescriptor, int32_t numOfRows1, int32_t s1, return ret; } } else { - SSchema *pSchema = &pDescriptor->pSchema->pFields[colIdx]; + SSchema *pSchema = &pDescriptor->pColumnModel->pFields[colIdx]; int32_t ret = columnValueAscendingComparator(f1, f2, pSchema->type, pSchema->bytes); if (ret == 0) { continue; @@ -979,12 +522,13 @@ FORCE_INLINE int32_t compare_sd(tOrderDescriptor *pDescriptor, int32_t numOfRows return compare_d(pDescriptor, numOfRows, idx1, data, numOfRows, idx2, data); } -static void swap(tOrderDescriptor *pDescriptor, int32_t count, int32_t s1, char *data1, int32_t s2) { - for (int32_t i = 0; i < pDescriptor->pSchema->numOfCols; ++i) { - void *first = COLMODEL_GET_VAL(data1, pDescriptor->pSchema, count, s1, i); - void *second = COLMODEL_GET_VAL(data1, pDescriptor->pSchema, count, s2, i); +static void swap(SColumnModel *pColumnModel, int32_t count, int32_t s1, char *data1, int32_t s2) { + for (int32_t i = 0; i < pColumnModel->numOfCols; ++i) { + void *first = COLMODEL_GET_VAL(data1, pColumnModel, count, s1, i); + void *second = COLMODEL_GET_VAL(data1, pColumnModel, count, s2, i); - tsDataSwap(first, second, pDescriptor->pSchema->pFields[i].type, pDescriptor->pSchema->pFields[i].bytes); + SSchema* pSchema = &pColumnModel->pFields[i].field; + tsDataSwap(first, second, pSchema->type, pSchema->bytes); } } @@ -993,7 +537,7 @@ static void tColDataInsertSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, for (int32_t i = start + 1; i <= end; ++i) { for (int32_t j = i; j > start; --j) { if (compareFn(pDescriptor, numOfRows, j, j - 1, data) == -1) { - swap(pDescriptor, numOfRows, j - 1, data, j); + swap(pDescriptor->pColumnModel, numOfRows, j - 1, data, j); } else { break; } @@ -1035,33 +579,33 @@ static void median(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta #if defined(_DEBUG_VIEW) int32_t f = pDescriptor->orderIdx.pData[0]; - char *midx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, midIdx, f); - char *startx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, start, f); - char *endx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, end, f); + char *midx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, midIdx, f); + char *startx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, start, f); + char *endx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, end, f); int32_t colIdx = pDescriptor->orderIdx.pData[0]; - tSortDataPrint(pDescriptor->pSchema->pFields[colIdx].type, "before", startx, midx, endx); + tSortDataPrint(pDescriptor->pColumnModel->pFields[colIdx].field.type, "before", startx, midx, endx); #endif if (compareFn(pDescriptor, numOfRows, midIdx, start, data) == 1) { - swap(pDescriptor, numOfRows, start, data, midIdx); + swap(pDescriptor->pColumnModel, numOfRows, start, data, midIdx); } if (compareFn(pDescriptor, numOfRows, midIdx, end, data) == 1) { - swap(pDescriptor, numOfRows, midIdx, data, start); - swap(pDescriptor, numOfRows, midIdx, data, end); + swap(pDescriptor->pColumnModel, numOfRows, midIdx, data, start); + swap(pDescriptor->pColumnModel, numOfRows, midIdx, data, end); } else if (compareFn(pDescriptor, numOfRows, start, end, data) == 1) { - swap(pDescriptor, numOfRows, start, data, end); + swap(pDescriptor->pColumnModel, numOfRows, start, data, end); } assert(compareFn(pDescriptor, numOfRows, midIdx, start, data) <= 0 && compareFn(pDescriptor, numOfRows, start, end, data) <= 0); #if defined(_DEBUG_VIEW) - midx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, midIdx, f); - startx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, start, f); - endx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, end, f); - tSortDataPrint(pDescriptor->pSchema->pFields[colIdx].type, "after", startx, midx, endx); + midx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, midIdx, f); + startx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, start, f); + endx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, end, f); + tSortDataPrint(pDescriptor->pColumnModel->pFields[colIdx].field.type, "after", startx, midx, endx); #endif } @@ -1069,9 +613,9 @@ static UNUSED_FUNC void tRowModelDisplay(tOrderDescriptor *pDescriptor, int32_t int32_t colIdx = pDescriptor->orderIdx.pData[0]; for (int32_t i = 0; i < len; ++i) { - char *startx = COLMODEL_GET_VAL(d, pDescriptor->pSchema, numOfRows, i, colIdx); + char *startx = COLMODEL_GET_VAL(d, pDescriptor->pColumnModel, numOfRows, i, colIdx); - switch (pDescriptor->pSchema->pFields[colIdx].type) { + switch (pDescriptor->pColumnModel->pFields[colIdx].field.type) { case TSDB_DATA_TYPE_DOUBLE: printf("%lf\t", *(double *)startx); break; @@ -1115,15 +659,15 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta } #ifdef _DEBUG_VIEW - printf("before sort:\n"); - tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); +// printf("before sort:\n"); +// tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); #endif int32_t s = start, e = end; median(pDescriptor, numOfRows, start, end, data, compareFn); #ifdef _DEBUG_VIEW - printf("%s called: %d\n", __FUNCTION__, qsort_call++); +// printf("%s called: %d\n", __FUNCTION__, qsort_call++); #endif UNUSED(qsort_call); @@ -1139,17 +683,17 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta } if (ret == 0 && e != end_same) { - swap(pDescriptor, numOfRows, e, data, end_same--); + swap(pDescriptor->pColumnModel, numOfRows, e, data, end_same--); } e--; } if (e != s) { - swap(pDescriptor, numOfRows, s, data, e); + swap(pDescriptor->pColumnModel, numOfRows, s, data, e); } #ifdef _DEBUG_VIEW - tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); +// tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); #endif while (s < e) { @@ -1159,16 +703,16 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta } if (ret == 0 && s != start_same) { - swap(pDescriptor, numOfRows, s, data, start_same++); + swap(pDescriptor->pColumnModel, numOfRows, s, data, start_same++); } s++; } if (s != e) { - swap(pDescriptor, numOfRows, s, data, e); + swap(pDescriptor->pColumnModel, numOfRows, s, data, e); } #ifdef _DEBUG_VIEW - tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); +// tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); #endif } @@ -1178,14 +722,14 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta int32_t right = end; while (right > end_same && left <= end_same) { - swap(pDescriptor, numOfRows, left++, data, right--); + swap(pDescriptor->pColumnModel, numOfRows, left++, data, right--); } // (pivotal+1) + steps of number that are identical pivotal rightx += (end - end_same); #ifdef _DEBUG_VIEW - tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); +// tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); #endif } @@ -1195,14 +739,14 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta int32_t right = e - 1; while (left < start_same && right >= start_same) { - swap(pDescriptor, numOfRows, left++, data, right--); + swap(pDescriptor->pColumnModel, numOfRows, left++, data, right--); } // (pivotal-1) - steps of number that are identical pivotal leftx -= (start_same - start); #ifdef _DEBUG_VIEW - tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); +// tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); #endif } @@ -1215,143 +759,50 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta } } -tExtMemBuffer *releaseBucketsExceptFor(tMemBucket *pMemBucket, int16_t segIdx, int16_t slotIdx) { - tExtMemBuffer *pBuffer = NULL; - - for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - if (i == segIdx && j == slotIdx) { - pBuffer = pSeg->pBuffer[j]; - } else { - if (pSeg->pBuffer && pSeg->pBuffer[j]) { - tExtMemBufferDestroy(&pSeg->pBuffer[j]); - } - } - } - } - - return pBuffer; -} - -static tFilePage *loadIntoBucketFromDisk(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx, - tOrderDescriptor *pDesc) { - // release all data in other slots - tExtMemBuffer *pMemBuffer = pMemBucket->pSegs[segIdx].pBuffer[slotIdx]; - tFilePage * buffer = (tFilePage *)calloc(1, pMemBuffer->nElemSize * pMemBuffer->numOfAllElems + sizeof(tFilePage)); - int32_t oldCapacity = pDesc->pSchema->maxCapacity; - pDesc->pSchema->maxCapacity = pMemBuffer->numOfAllElems; - - if (!tExtMemBufferIsAllDataInMem(pMemBuffer)) { - pMemBuffer = releaseBucketsExceptFor(pMemBucket, segIdx, slotIdx); - assert(pMemBuffer->numOfAllElems > 0); - - // load data in disk to memory - tFilePage *pPage = (tFilePage *)calloc(1, pMemBuffer->nPageSize); - - for (int32_t i = 0; i < pMemBuffer->fileMeta.flushoutData.nLength; ++i) { - tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[i]; - - int32_t ret = fseek(pMemBuffer->dataFile, pFlushInfo->startPageId * pMemBuffer->nPageSize, SEEK_SET); - UNUSED(ret); - - for (uint32_t j = 0; j < pFlushInfo->numOfPages; ++j) { - ret = fread(pPage, pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); - UNUSED(ret); - assert(pPage->numOfElems > 0); - - tColModelAppend(pDesc->pSchema, buffer, pPage->data, 0, pPage->numOfElems, pPage->numOfElems); - printf("id: %d count: %" PRIu64 "\n", j, buffer->numOfElems); - } - } - tfree(pPage); - - assert(buffer->numOfElems == pMemBuffer->fileMeta.numOfElemsInFile); +/* + * deep copy of sschema + */ +SColumnModel *createColumnModel(SSchema *fields, int32_t numOfCols, int32_t blockCapacity) { + SColumnModel *pColumnModel = (SColumnModel *)calloc(1, sizeof(SColumnModel) + numOfCols * sizeof(SSchemaEx)); + if (pColumnModel == NULL) { + return NULL; } - // load data in pMemBuffer to buffer - tFilePagesItem *pListItem = pMemBuffer->pHead; - while (pListItem != NULL) { - tColModelAppend(pDesc->pSchema, buffer, pListItem->item.data, 0, pListItem->item.numOfElems, - pListItem->item.numOfElems); - pListItem = pListItem->pNext; + pColumnModel->pFields = (SSchemaEx *)(&pColumnModel[1]); + + for(int32_t i = 0; i < numOfCols; ++i) { + SSchemaEx* pSchemaEx = &pColumnModel->pFields[i]; + pSchemaEx->field = fields[i]; + pSchemaEx->offset = pColumnModel->rowSize; + + pColumnModel->rowSize += pSchemaEx->field.bytes; } - tColDataQSort(pDesc, buffer->numOfElems, 0, buffer->numOfElems - 1, buffer->data, TSQL_SO_ASC); - - pDesc->pSchema->maxCapacity = oldCapacity; // restore value - return buffer; -} + pColumnModel->numOfCols = numOfCols; + pColumnModel->capacity = blockCapacity; -double findOnlyResult(tMemBucket *pMemBucket) { - assert(pMemBucket->numOfElems == 1); - - for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - if (pSeg->pBuffer) { - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - tExtMemBuffer *pBuffer = pSeg->pBuffer[j]; - if (pBuffer) { - assert(pBuffer->numOfAllElems == 1); - tFilePage *pPage = &pBuffer->pHead->item; - if (pBuffer->numOfElemsInBuffer == 1) { - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_INT: - return *(int32_t *)pPage->data; - case TSDB_DATA_TYPE_SMALLINT: - return *(int16_t *)pPage->data; - case TSDB_DATA_TYPE_TINYINT: - return *(int8_t *)pPage->data; - case TSDB_DATA_TYPE_BIGINT: - return (double)(*(int64_t *)pPage->data); - case TSDB_DATA_TYPE_DOUBLE: { - double dv = GET_DOUBLE_VAL(pPage->data); - //return *(double *)pPage->data; - return dv; - } - case TSDB_DATA_TYPE_FLOAT: { - float fv = GET_FLOAT_VAL(pPage->data); - //return *(float *)pPage->data; - return fv; - } - default: - return 0; - } - } - } - } - } - } - return 0; + return pColumnModel; } -/* - * deep copy of sschema - */ -tColModel *tColModelCreate(SSchema *field, int32_t numOfCols, int32_t maxCapacity) { - tColModel *pSchema = - (tColModel *)calloc(1, sizeof(tColModel) + numOfCols * sizeof(SSchema) + numOfCols * sizeof(int16_t)); - if (pSchema == NULL) { +SColumnModel *cloneColumnModel(SColumnModel *pSrc) { + if (pSrc == NULL) { return NULL; } - - pSchema->pFields = (SSchema *)(&pSchema[1]); - memcpy(pSchema->pFields, field, sizeof(SSchema) * numOfCols); - - pSchema->colOffset = (int16_t *)(&pSchema->pFields[numOfCols]); - pSchema->colOffset[0] = 0; - for (int32_t i = 1; i < numOfCols; ++i) { - pSchema->colOffset[i] = pSchema->colOffset[i - 1] + pSchema->pFields[i - 1].bytes; + + SColumnModel *pColumnModel = (SColumnModel *)calloc(1, sizeof(SColumnModel) + pSrc->numOfCols * sizeof(SSchemaEx)); + if (pColumnModel == NULL) { + return NULL; } - - pSchema->numOfCols = numOfCols; - pSchema->maxCapacity = maxCapacity; - - return pSchema; + + *pColumnModel = *pSrc; + + pColumnModel->pFields = (SSchemaEx*) (&pColumnModel[1]); + memcpy(pColumnModel->pFields, pSrc->pFields, pSrc->numOfCols * sizeof(SSchemaEx)); + + return pColumnModel; } -void tColModelDestroy(tColModel *pModel) { +void destroyColumnModel(SColumnModel *pModel) { if (pModel == NULL) { return; } @@ -1444,12 +895,12 @@ static void printBinaryDataEx(char *data, int32_t len, SSrcColumnInfo *param) { } } -void tColModelDisplay(tColModel *pModel, void *pData, int32_t numOfRows, int32_t totalCapacity) { +void tColModelDisplay(SColumnModel *pModel, void *pData, int32_t numOfRows, int32_t totalCapacity) { for (int32_t i = 0; i < numOfRows; ++i) { for (int32_t j = 0; j < pModel->numOfCols; ++j) { char *val = COLMODEL_GET_VAL((char *)pData, pModel, totalCapacity, i, j); - int type = pModel->pFields[j].type; + int type = pModel->pFields[j].field.type; printf("type:%d ", type); switch (type) { @@ -1461,11 +912,11 @@ void tColModelDisplay(tColModel *pModel, void *pData, int32_t numOfRows, int32_t break; case TSDB_DATA_TYPE_NCHAR: { char buf[4096] = {0}; - taosUcs4ToMbs(val, pModel->pFields[j].bytes, buf); + taosUcs4ToMbs(val, pModel->pFields[j].field.bytes, buf); printf("%s\t", buf); } case TSDB_DATA_TYPE_BINARY: { - printBinaryData(val, pModel->pFields[j].bytes); + printBinaryData(val, pModel->pFields[j].field.bytes); break; } case TSDB_DATA_TYPE_DOUBLE: @@ -1495,15 +946,15 @@ void tColModelDisplay(tColModel *pModel, void *pData, int32_t numOfRows, int32_t printf("\n"); } -void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32_t totalCapacity, +void tColModelDisplayEx(SColumnModel *pModel, void *pData, int32_t numOfRows, int32_t totalCapacity, SSrcColumnInfo *param) { for (int32_t i = 0; i < numOfRows; ++i) { for (int32_t j = 0; j < pModel->numOfCols; ++j) { char *val = COLMODEL_GET_VAL((char *)pData, pModel, totalCapacity, i, j); - printf("type:%d\t", pModel->pFields[j].type); + printf("type:%d\t", pModel->pFields[j].field.type); - switch (pModel->pFields[j].type) { + switch (pModel->pFields[j].field.type) { case TSDB_DATA_TYPE_BIGINT: printf("%" PRId64 "\t", *(int64_t *)val); break; @@ -1512,11 +963,11 @@ void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32 break; case TSDB_DATA_TYPE_NCHAR: { char buf[128] = {0}; - taosUcs4ToMbs(val, pModel->pFields[j].bytes, buf); + taosUcs4ToMbs(val, pModel->pFields[j].field.bytes, buf); printf("%s\t", buf); } case TSDB_DATA_TYPE_BINARY: { - printBinaryDataEx(val, pModel->pFields[j].bytes, ¶m[j]); + printBinaryDataEx(val, pModel->pFields[j].field.bytes, ¶m[j]); break; } case TSDB_DATA_TYPE_DOUBLE: @@ -1547,20 +998,31 @@ void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32 } //////////////////////////////////////////////////////////////////////////////////////////// -void tColModelCompact(tColModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity) { +void tColModelCompact(SColumnModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity) { if (inputBuffer->numOfElems == 0 || maxElemsCapacity == inputBuffer->numOfElems) { return; } /* start from the second column */ for (int32_t i = 1; i < pModel->numOfCols; ++i) { - memmove(inputBuffer->data + pModel->colOffset[i] * inputBuffer->numOfElems, - inputBuffer->data + pModel->colOffset[i] * maxElemsCapacity, - pModel->pFields[i].bytes * inputBuffer->numOfElems); + SSchemaEx* pSchemaEx = &pModel->pFields[i]; + memmove(inputBuffer->data + pSchemaEx->offset * inputBuffer->numOfElems, + inputBuffer->data + pSchemaEx->offset * maxElemsCapacity, + pSchemaEx->field.bytes * inputBuffer->numOfElems); } } -void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapacity, int32_t s, int32_t e) { +SSchema* getColumnModelSchema(SColumnModel *pColumnModel, int32_t index) { + assert(pColumnModel != NULL && index >= 0 && index < pColumnModel->numOfCols); + return &pColumnModel->pFields[index].field; +} + +int16_t getColumnModelOffset(SColumnModel *pColumnModel, int32_t index) { + assert(pColumnModel != NULL && index >= 0 && index < pColumnModel->numOfCols); + return pColumnModel->pFields[index].offset; +} + +void tColModelErase(SColumnModel *pModel, tFilePage *inputBuffer, int32_t blockCapacity, int32_t s, int32_t e) { if (inputBuffer->numOfElems == 0 || (e - s + 1) <= 0) { return; } @@ -1571,10 +1033,13 @@ void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapaci /* start from the second column */ for (int32_t i = 0; i < pModel->numOfCols; ++i) { - char *startPos = inputBuffer->data + pModel->colOffset[i] * maxCapacity + s * pModel->pFields[i].bytes; - char *endPos = startPos + pModel->pFields[i].bytes * removed; + int16_t offset = getColumnModelOffset(pModel, i); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char *startPos = inputBuffer->data + offset * blockCapacity + s * pSchema->bytes; + char *endPos = startPos + pSchema->bytes * removed; - memmove(startPos, endPos, pModel->pFields[i].bytes * secPart); + memmove(startPos, endPos, pSchema->bytes * secPart); } inputBuffer->numOfElems = remain; @@ -1587,31 +1052,31 @@ void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapaci * data in srcData must has the same schema as data in dstPage, that can be * described by dstModel */ -void tColModelAppend(tColModel *dstModel, tFilePage *dstPage, void *srcData, int32_t start, int32_t numOfRows, +void tColModelAppend(SColumnModel *dstModel, tFilePage *dstPage, void *srcData, int32_t start, int32_t numOfRows, int32_t srcCapacity) { - assert(dstPage->numOfElems + numOfRows <= dstModel->maxCapacity); + assert(dstPage->numOfElems + numOfRows <= dstModel->capacity); for (int32_t col = 0; col < dstModel->numOfCols; ++col) { - char *dst = COLMODEL_GET_VAL(dstPage->data, dstModel, dstModel->maxCapacity, dstPage->numOfElems, col); + char *dst = COLMODEL_GET_VAL(dstPage->data, dstModel, dstModel->capacity, dstPage->numOfElems, col); char *src = COLMODEL_GET_VAL((char *)srcData, dstModel, srcCapacity, start, col); - memmove(dst, src, dstModel->pFields[col].bytes * numOfRows); + memmove(dst, src, dstModel->pFields[col].field.bytes * numOfRows); } dstPage->numOfElems += numOfRows; } -tOrderDescriptor *tOrderDesCreate(int32_t *orderColIdx, int32_t numOfOrderCols, tColModel *pModel, +tOrderDescriptor *tOrderDesCreate(const int32_t *orderColIdx, int32_t numOfOrderCols, SColumnModel *pModel, int32_t tsOrderType) { tOrderDescriptor *desc = (tOrderDescriptor *)calloc(1, sizeof(tOrderDescriptor) + sizeof(int32_t) * numOfOrderCols); if (desc == NULL) { return NULL; } - desc->pSchema = pModel; + desc->pColumnModel = pModel; desc->tsOrder = tsOrderType; - desc->orderIdx.numOfOrderedCols = numOfOrderCols; + desc->orderIdx.numOfCols = numOfOrderCols; for (int32_t i = 0; i < numOfOrderCols; ++i) { desc->orderIdx.pData[i] = orderColIdx[i]; } @@ -1624,390 +1089,6 @@ void tOrderDescDestroy(tOrderDescriptor *pDesc) { return; } - tColModelDestroy(pDesc->pSchema); + destroyColumnModel(pDesc->pColumnModel); tfree(pDesc); } - -//////////////////////////////////////////////////////////////////////////////////////////// -static void findMaxMinValue(tMemBucket *pMemBucket, double *maxVal, double *minVal) { - *minVal = DBL_MAX; - *maxVal = -DBL_MAX; - - for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - if (pSeg->pBuffer == NULL) { - continue; - } - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: { - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - double minv = pSeg->pBoundingEntries[j].iMinVal; - double maxv = pSeg->pBoundingEntries[j].iMaxVal; - - if (*minVal > minv) { - *minVal = minv; - } - if (*maxVal < maxv) { - *maxVal = maxv; - } - } - break; - } - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_FLOAT: { - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - double minv = pSeg->pBoundingEntries[j].dMinVal; - double maxv = pSeg->pBoundingEntries[j].dMaxVal; - - if (*minVal > minv) { - *minVal = minv; - } - if (*maxVal < maxv) { - *maxVal = maxv; - } - } - break; - } - case TSDB_DATA_TYPE_BIGINT: { - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - double minv = (double)pSeg->pBoundingEntries[j].i64MinVal; - double maxv = (double)pSeg->pBoundingEntries[j].i64MaxVal; - - if (*minVal > minv) { - *minVal = minv; - } - if (*maxVal < maxv) { - *maxVal = maxv; - } - } - break; - } - } - } -} - -static MinMaxEntry getMinMaxEntryOfNearestSlotInNextSegment(tMemBucket *pMemBucket, int32_t segIdx) { - int32_t i = segIdx + 1; - while (i < pMemBucket->numOfSegs && pMemBucket->pSegs[i].numOfSlots == 0) ++i; - - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - assert(pMemBucket->numOfSegs > i && pMemBucket->pSegs[i].pBuffer != NULL); - - i = 0; - while (i < pMemBucket->nSlotsOfSeg && pSeg->pBuffer[i] == NULL) ++i; - - assert(i < pMemBucket->nSlotsOfSeg); - return pSeg->pBoundingEntries[i]; -} - -/* - * - * now, we need to find the minimum value of the next slot for - * interpolating the percentile value - * j is the last slot of current segment, we need to get the first - * slot of the next segment. - */ -static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; - - MinMaxEntry next; - if (slotIdx == pSeg->numOfSlots - 1) { // find next segment with data - return getMinMaxEntryOfNearestSlotInNextSegment(pMemBucket, segIdx); - } else { - int32_t j = slotIdx + 1; - for (; j < pMemBucket->nSlotsOfSeg && pMemBucket->pSegs[segIdx].pBuffer[j] == 0; ++j) { - }; - - if (j == pMemBucket->nSlotsOfSeg) { // current slot has no available - // slot,try next segment - return getMinMaxEntryOfNearestSlotInNextSegment(pMemBucket, segIdx); - } else { - next = pSeg->pBoundingEntries[slotIdx + 1]; - assert(pSeg->pBuffer[slotIdx + 1] != NULL); - } - } - - return next; -} - -bool isIdenticalData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx); -char *getFirstElemOfMemBuffer(tMemBucketSegment *pSeg, int32_t slotIdx, tFilePage *pPage); - -double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) { - int32_t num = 0; - - for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - if (pSeg->pBuffer == NULL || pSeg->pBuffer[j] == NULL) { - continue; - } - // required value in current slot - if (num < (count + 1) && num + pSeg->pBuffer[j]->numOfAllElems >= (count + 1)) { - if (pSeg->pBuffer[j]->numOfAllElems + num == (count + 1)) { - /* - * now, we need to find the minimum value of the next slot for interpolating the percentile value - * j is the last slot of current segment, we need to get the first slot of the next segment. - * - */ - MinMaxEntry next = getMinMaxEntryOfNextSlotWithData(pMemBucket, i, j); - - double maxOfThisSlot = 0; - double minOfNextSlot = 0; - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: { - maxOfThisSlot = pSeg->pBoundingEntries[j].iMaxVal; - minOfNextSlot = next.iMinVal; - break; - }; - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_DOUBLE: { - maxOfThisSlot = pSeg->pBoundingEntries[j].dMaxVal; - minOfNextSlot = next.dMinVal; - break; - }; - case TSDB_DATA_TYPE_BIGINT: { - maxOfThisSlot = (double)pSeg->pBoundingEntries[j].i64MaxVal; - minOfNextSlot = (double)next.i64MinVal; - break; - } - }; - - assert(minOfNextSlot > maxOfThisSlot); - - double val = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot; - return val; - } - if (pSeg->pBuffer[j]->numOfAllElems <= pMemBucket->maxElemsCapacity) { - // data in buffer and file are merged together to be processed. - tFilePage *buffer = loadIntoBucketFromDisk(pMemBucket, i, j, pMemBucket->pOrderDesc); - int32_t currentIdx = count - num; - - char * thisVal = buffer->data + pMemBucket->nElemSize * currentIdx; - char * nextVal = thisVal + pMemBucket->nElemSize; - double td, nd; - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_SMALLINT: { - td = *(int16_t *)thisVal; - nd = *(int16_t *)nextVal; - break; - } - case TSDB_DATA_TYPE_TINYINT: { - td = *(int8_t *)thisVal; - nd = *(int8_t *)nextVal; - break; - } - case TSDB_DATA_TYPE_INT: { - td = *(int32_t *)thisVal; - nd = *(int32_t *)nextVal; - break; - }; - case TSDB_DATA_TYPE_FLOAT: { - //td = *(float *)thisVal; - //nd = *(float *)nextVal; - td = GET_FLOAT_VAL(thisVal); - nd = GET_FLOAT_VAL(nextVal); - break; - } - case TSDB_DATA_TYPE_DOUBLE: { - //td = *(double *)thisVal; - td = GET_DOUBLE_VAL(thisVal); - //nd = *(double *)nextVal; - nd = GET_DOUBLE_VAL(nextVal); - break; - } - case TSDB_DATA_TYPE_BIGINT: { - td = (double)*(int64_t *)thisVal; - nd = (double)*(int64_t *)nextVal; - break; - } - } - double val = (1 - fraction) * td + fraction * nd; - tfree(buffer); - - return val; - } else { // incur a second round bucket split - if (isIdenticalData(pMemBucket, i, j)) { - tExtMemBuffer *pMemBuffer = pSeg->pBuffer[j]; - - tFilePage *pPage = (tFilePage *)malloc(pMemBuffer->nPageSize); - - char *thisVal = getFirstElemOfMemBuffer(pSeg, j, pPage); - - double finalResult = 0.0; - - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_SMALLINT: { - finalResult = *(int16_t *)thisVal; - break; - } - case TSDB_DATA_TYPE_TINYINT: { - finalResult = *(int8_t *)thisVal; - break; - } - case TSDB_DATA_TYPE_INT: { - finalResult = *(int32_t *)thisVal; - break; - }; - case TSDB_DATA_TYPE_FLOAT: { - //finalResult = *(float *)thisVal; - finalResult = GET_FLOAT_VAL(thisVal); - break; - } - case TSDB_DATA_TYPE_DOUBLE: { - //finalResult = *(double *)thisVal; - finalResult = GET_DOUBLE_VAL(thisVal); - break; - } - case TSDB_DATA_TYPE_BIGINT: { - finalResult = (double)(*(int64_t *)thisVal); - break; - } - } - - free(pPage); - return finalResult; - } - - pTrace("MemBucket:%p,start second round bucketing", pMemBucket); - - if (pSeg->pBuffer[j]->numOfElemsInBuffer != 0) { - pTrace("MemBucket:%p,flush %d pages to disk, clear status", pMemBucket, pSeg->pBuffer[j]->numOfPagesInMem); - - pMemBucket->numOfAvailPages += pSeg->pBuffer[j]->numOfPagesInMem; - tExtMemBufferFlush(pSeg->pBuffer[j]); - } - - tExtMemBuffer *pMemBuffer = pSeg->pBuffer[j]; - pSeg->pBuffer[j] = NULL; - - // release all - for (int32_t tt = 0; tt < pMemBucket->numOfSegs; ++tt) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[tt]; - for (int32_t ttx = 0; ttx < pSeg->numOfSlots; ++ttx) { - if (pSeg->pBuffer && pSeg->pBuffer[ttx]) { - tExtMemBufferDestroy(&pSeg->pBuffer[ttx]); - } - } - } - - pMemBucket->nRange.i64MaxVal = pSeg->pBoundingEntries->i64MaxVal; - pMemBucket->nRange.i64MinVal = pSeg->pBoundingEntries->i64MinVal; - pMemBucket->numOfElems = 0; - - for (int32_t tt = 0; tt < pMemBucket->numOfSegs; ++tt) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[tt]; - for (int32_t ttx = 0; ttx < pSeg->numOfSlots; ++ttx) { - if (pSeg->pBoundingEntries) { - resetBoundingBox(pSeg, pMemBucket->dataType); - } - } - } - - tFilePage *pPage = (tFilePage *)malloc(pMemBuffer->nPageSize); - - tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0]; - assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize); - - int32_t ret = fseek(pMemBuffer->dataFile, pFlushInfo->startPageId * pMemBuffer->nPageSize, SEEK_SET); - UNUSED(ret); - - for (uint32_t jx = 0; jx < pFlushInfo->numOfPages; ++jx) { - ret = fread(pPage, pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); - UNUSED(ret); - tMemBucketPut(pMemBucket, pPage->data, pPage->numOfElems); - } - - fclose(pMemBuffer->dataFile); - if (unlink(pMemBuffer->dataFilePath) != 0) { - pError("MemBucket:%p,remove tmp file %s failed", pMemBucket, pMemBuffer->dataFilePath); - } - tfree(pMemBuffer); - tfree(pPage); - - return getPercentileImpl(pMemBucket, count - num, fraction); - } - } else { - num += pSeg->pBuffer[j]->numOfAllElems; - } - } - } - return 0; -} - -double getPercentile(tMemBucket *pMemBucket, double percent) { - if (pMemBucket->numOfElems == 0) { - return 0.0; - } - - if (pMemBucket->numOfElems == 1) { // return the only element - return findOnlyResult(pMemBucket); - } - - percent = fabs(percent); - - // validate the parameters - if (fabs(percent - 100.0) < DBL_EPSILON || (percent < DBL_EPSILON)) { - double minx = 0, maxx = 0; - /* - * find the min/max value, no need to scan all data in bucket - */ - findMaxMinValue(pMemBucket, &maxx, &minx); - - return fabs(percent - 100) < DBL_EPSILON ? maxx : minx; - } - - double percentVal = (percent * (pMemBucket->numOfElems - 1)) / ((double)100.0); - int32_t orderIdx = (int32_t)percentVal; - - // do put data by using buckets - return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx); -} - -/* - * check if data in one slot are all identical - * only need to compare with the bounding box - */ -bool isIdenticalData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; - - if (pMemBucket->dataType == TSDB_DATA_TYPE_INT || pMemBucket->dataType == TSDB_DATA_TYPE_BIGINT || - pMemBucket->dataType == TSDB_DATA_TYPE_SMALLINT || pMemBucket->dataType == TSDB_DATA_TYPE_TINYINT) { - return pSeg->pBoundingEntries[slotIdx].i64MinVal == pSeg->pBoundingEntries[slotIdx].i64MaxVal; - } - - if (pMemBucket->dataType == TSDB_DATA_TYPE_FLOAT || pMemBucket->dataType == TSDB_DATA_TYPE_DOUBLE) { - return fabs(pSeg->pBoundingEntries[slotIdx].dMaxVal - pSeg->pBoundingEntries[slotIdx].dMinVal) < DBL_EPSILON; - } - - return false; -} - -/* - * get the first element of one slot into memory. - * if no data of current slot in memory, load it from disk - */ -char *getFirstElemOfMemBuffer(tMemBucketSegment *pSeg, int32_t slotIdx, tFilePage *pPage) { - tExtMemBuffer *pMemBuffer = pSeg->pBuffer[slotIdx]; - char * thisVal = NULL; - - if (pSeg->pBuffer[slotIdx]->numOfElemsInBuffer != 0) { - thisVal = pSeg->pBuffer[slotIdx]->pHead->item.data; - } else { - /* - * no data in memory, load one page into memory - */ - tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0]; - assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize); - - fseek(pMemBuffer->dataFile, pFlushInfo->startPageId * pMemBuffer->nPageSize, SEEK_SET); - size_t ret = fread(pPage, pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); - UNUSED(ret); - thisVal = pPage->data; - } - return thisVal; -} diff --git a/src/util/src/tinterpolation.c b/src/util/src/tinterpolation.c index 5df07a5c430947ecbac516cedc79238424d70c17..82cc52cd42ef2a4c8c40d61d40aa6e956b96a1be 100644 --- a/src/util/src/tinterpolation.c +++ b/src/util/src/tinterpolation.c @@ -205,16 +205,18 @@ static char* getPos(char* data, int32_t bytes, int32_t order, int32_t capacity, // } } -static void setTagsValueInInterpolation(tFilePage** data, char** pTags, tColModel* pModel, int32_t order, int32_t start, +static void setTagsValueInInterpolation(tFilePage** data, char** pTags, SColumnModel* pModel, int32_t order, int32_t start, int32_t capacity, int32_t num) { for (int32_t j = 0, i = start; i < pModel->numOfCols; ++i, ++j) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, order, capacity, num); - assignVal(val1, pTags[j], pModel->pFields[i].bytes, pModel->pFields[i].type); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, order, capacity, num); + assignVal(val1, pTags[j], pSchema->bytes, pSchema->type); } } static void doInterpoResultImpl(SInterpolationInfo* pInterpoInfo, int16_t interpoType, tFilePage** data, - tColModel* pModel, int32_t* num, char** srcData, int64_t nInterval, int64_t* defaultVal, + SColumnModel* pModel, int32_t* num, char** srcData, int64_t nInterval, int64_t* defaultVal, int64_t currentTimestamp, int32_t capacity, int32_t numOfTags, char** pTags, bool outOfBound) { char** prevValues = &pInterpoInfo->prevValues; @@ -234,18 +236,23 @@ static void doInterpoResultImpl(SInterpolationInfo* pInterpoInfo, int16_t interp char* pInterpolationData = INTERPOL_IS_ASC_INTERPOL(pInterpoInfo) ? *prevValues : *nextValues; if (pInterpolationData != NULL) { for (int32_t i = 1; i < numOfValCols; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); + SSchema* pSchema = getColumnModelSchema(pModel, i); + int16_t offset = getColumnModelOffset(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); - if (isNull(pInterpolationData + pModel->colOffset[i], pModel->pFields[i].type)) { - setNull(val1, pModel->pFields[i].type, pModel->pFields[i].bytes); + if (isNull(pInterpolationData + offset, pSchema->type)) { + setNull(val1, pSchema->type, pSchema->bytes); } else { - assignVal(val1, pInterpolationData + pModel->colOffset[i], pModel->pFields[i].bytes, pModel->pFields[i].type); + assignVal(val1, pInterpolationData + offset, pSchema->bytes, pSchema->type); } } } else { /* no prev value yet, set the value for null */ for (int32_t i = 1; i < numOfValCols; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); - setNull(val1, pModel->pFields[i].type, pModel->pFields[i].bytes); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); + setNull(val1, pSchema->type, pSchema->bytes); } } @@ -254,34 +261,41 @@ static void doInterpoResultImpl(SInterpolationInfo* pInterpoInfo, int16_t interp // TODO : linear interpolation supports NULL value if (*prevValues != NULL && !outOfBound) { for (int32_t i = 1; i < numOfValCols; ++i) { - int32_t type = pModel->pFields[i].type; - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); + SSchema* pSchema = getColumnModelSchema(pModel, i); + int16_t offset = getColumnModelOffset(pModel, i); + + int16_t type = pSchema->type; + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR || type == TSDB_DATA_TYPE_BOOL) { - setNull(val1, pModel->pFields[i].type, pModel->pFields[i].bytes); + setNull(val1, type, pSchema->bytes); continue; } - point1 = (SPoint){.key = *(TSKEY*)(*prevValues), .val = *prevValues + pModel->colOffset[i]}; - point2 = (SPoint){.key = currentTimestamp, .val = srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes}; + point1 = (SPoint){.key = *(TSKEY*)(*prevValues), .val = *prevValues + offset}; + point2 = (SPoint){.key = currentTimestamp, .val = srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes}; point = (SPoint){.key = pInterpoInfo->startTimestamp, .val = val1}; - taosDoLinearInterpolation(pModel->pFields[i].type, &point1, &point2, &point); + taosDoLinearInterpolation(type, &point1, &point2, &point); } setTagsValueInInterpolation(data, pTags, pModel, pInterpoInfo->order, numOfValCols, capacity, *num); } else { for (int32_t i = 1; i < numOfValCols; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); - setNull(val1, pModel->pFields[i].type, pModel->pFields[i].bytes); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); + setNull(val1, pSchema->type, pSchema->bytes); } setTagsValueInInterpolation(data, pTags, pModel, pInterpoInfo->order, numOfValCols, capacity, *num); } } else { /* default value interpolation */ for (int32_t i = 1; i < numOfValCols; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); - assignVal(val1, (char*)&defaultVal[i], pModel->pFields[i].bytes, pModel->pFields[i].type); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); + assignVal(val1, (char*)&defaultVal[i], pSchema->bytes, pSchema->type); } setTagsValueInInterpolation(data, pTags, pModel, pInterpoInfo->order, numOfValCols, capacity, *num); @@ -295,7 +309,7 @@ static void doInterpoResultImpl(SInterpolationInfo* pInterpoInfo, int16_t interp int32_t taosDoInterpoResult(SInterpolationInfo* pInterpoInfo, int16_t interpoType, tFilePage** data, int32_t numOfRawDataInRows, int32_t outputRows, int64_t nInterval, - const int64_t* pPrimaryKeyArray, tColModel* pModel, char** srcData, int64_t* defaultVal, + const int64_t* pPrimaryKeyArray, SColumnModel* pModel, char** srcData, int64_t* defaultVal, const int32_t* functionIDs, int32_t bufSize) { int32_t num = 0; pInterpoInfo->numOfCurrentInterpo = 0; @@ -328,17 +342,21 @@ int32_t taosDoInterpoResult(SInterpolationInfo* pInterpoInfo, int16_t interpoTyp (pInterpoInfo->startTimestamp > currentTimestamp && !INTERPOL_IS_ASC_INTERPOL(pInterpoInfo))) { /* set the next value for interpolation */ if (*nextValues == NULL) { - *nextValues = - calloc(1, pModel->colOffset[pModel->numOfCols - 1] + pModel->pFields[pModel->numOfCols - 1].bytes); + *nextValues = calloc(1, pModel->rowSize); for (int i = 1; i < pModel->numOfCols; i++) { - setNull(*nextValues + pModel->colOffset[i], pModel->pFields[i].type, pModel->pFields[i].bytes); + int16_t offset = getColumnModelOffset(pModel, i); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + setNull(*nextValues + offset, pSchema->type, pSchema->bytes); } } int32_t offset = pInterpoInfo->rowIdx; for (int32_t tlen = 0, i = 0; i < pModel->numOfCols - numOfTags; ++i) { - memcpy(*nextValues + tlen, srcData[i] + offset * pModel->pFields[i].bytes, pModel->pFields[i].bytes); - tlen += pModel->pFields[i].bytes; + SSchema* pSchema = getColumnModelSchema(pModel, i); + + memcpy(*nextValues + tlen, srcData[i] + offset * pSchema->bytes, pSchema->bytes); + tlen += pSchema->bytes; } } @@ -358,37 +376,41 @@ int32_t taosDoInterpoResult(SInterpolationInfo* pInterpoInfo, int16_t interpoTyp if (pInterpoInfo->startTimestamp == currentTimestamp) { if (*prevValues == NULL) { - *prevValues = - calloc(1, pModel->colOffset[pModel->numOfCols - 1] + pModel->pFields[pModel->numOfCols - 1].bytes); + *prevValues = calloc(1, pModel->rowSize); for (int i = 1; i < pModel->numOfCols; i++) { - setNull(*prevValues + pModel->colOffset[i], pModel->pFields[i].type, pModel->pFields[i].bytes); + int16_t offset = getColumnModelOffset(pModel, i); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + setNull(*prevValues + offset, pSchema->type, pSchema->bytes); } } // assign rows to dst buffer int32_t i = 0; for (int32_t tlen = 0; i < pModel->numOfCols - numOfTags; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, bufSize, num); + int16_t offset = getColumnModelOffset(pModel, i); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, bufSize, num); if (i == 0 || (functionIDs[i] != TSDB_FUNC_COUNT && - !isNull(srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes, pModel->pFields[i].type)) || + !isNull(srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes, pSchema->type)) || (functionIDs[i] == TSDB_FUNC_COUNT && - *(int64_t*)(srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes) != 0)) { - assignVal(val1, srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes, pModel->pFields[i].bytes, - pModel->pFields[i].type); - memcpy(*prevValues + tlen, srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes, - pModel->pFields[i].bytes); + *(int64_t*)(srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes) != 0)) { + + assignVal(val1, srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes, pSchema->bytes, pSchema->type); + memcpy(*prevValues + tlen, srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes, pSchema->bytes); } else { // i > 0 and isNULL, do interpolation if (interpoType == TSDB_INTERPO_PREV) { - assignVal(val1, *prevValues + pModel->colOffset[i], pModel->pFields[i].bytes, pModel->pFields[i].type); + assignVal(val1, *prevValues + offset, pSchema->bytes, pSchema->type); } else if (interpoType == TSDB_INTERPO_LINEAR) { // TODO: } else { - assignVal(val1, (char*)&defaultVal[i], pModel->pFields[i].bytes, pModel->pFields[i].type); + assignVal(val1, (char*)&defaultVal[i], pSchema->bytes, pSchema->type); } } - tlen += pModel->pFields[i].bytes; + tlen += pSchema->bytes; } /* set the tag value for final result */ diff --git a/src/util/src/tpercentile.c b/src/util/src/tpercentile.c new file mode 100644 index 0000000000000000000000000000000000000000..b3c09033b4d48f5d00d2ad3deafea6d29e1be3ec --- /dev/null +++ b/src/util/src/tpercentile.c @@ -0,0 +1,976 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" + +#include "taosmsg.h" +#include "tsdb.h" +#include "tlog.h" +#include "ttypes.h" +#include "tpercentile.h" + +tExtMemBuffer *releaseBucketsExceptFor(tMemBucket *pMemBucket, int16_t segIdx, int16_t slotIdx) { + tExtMemBuffer *pBuffer = NULL; + + for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + if (i == segIdx && j == slotIdx) { + pBuffer = pSeg->pBuffer[j]; + } else { + if (pSeg->pBuffer && pSeg->pBuffer[j]) { + pSeg->pBuffer[j] = destoryExtMemBuffer(pSeg->pBuffer[j]); + } + } + } + } + + return pBuffer; +} + +static tFilePage *loadIntoBucketFromDisk(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx, + tOrderDescriptor *pDesc) { + // release all data in other slots + tExtMemBuffer *pMemBuffer = pMemBucket->pSegs[segIdx].pBuffer[slotIdx]; + tFilePage * buffer = (tFilePage *)calloc(1, pMemBuffer->nElemSize * pMemBuffer->numOfTotalElems + sizeof(tFilePage)); + int32_t oldCapacity = pDesc->pColumnModel->capacity; + pDesc->pColumnModel->capacity = pMemBuffer->numOfTotalElems; + + if (!tExtMemBufferIsAllDataInMem(pMemBuffer)) { + pMemBuffer = releaseBucketsExceptFor(pMemBucket, segIdx, slotIdx); + assert(pMemBuffer->numOfTotalElems > 0); + + // load data in disk to memory + tFilePage *pPage = (tFilePage *)calloc(1, pMemBuffer->pageSize); + + for (int32_t i = 0; i < pMemBuffer->fileMeta.flushoutData.nLength; ++i) { + tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[i]; + + int32_t ret = fseek(pMemBuffer->file, pFlushInfo->startPageId * pMemBuffer->pageSize, SEEK_SET); + UNUSED(ret); + + for (uint32_t j = 0; j < pFlushInfo->numOfPages; ++j) { + ret = fread(pPage, pMemBuffer->pageSize, 1, pMemBuffer->file); + UNUSED(ret); + assert(pPage->numOfElems > 0); + + tColModelAppend(pDesc->pColumnModel, buffer, pPage->data, 0, pPage->numOfElems, pPage->numOfElems); + printf("id: %d count: %" PRIu64 "\n", j, buffer->numOfElems); + } + } + tfree(pPage); + + assert(buffer->numOfElems == pMemBuffer->fileMeta.numOfElemsInFile); + } + + // load data in pMemBuffer to buffer + tFilePagesItem *pListItem = pMemBuffer->pHead; + while (pListItem != NULL) { + tColModelAppend(pDesc->pColumnModel, buffer, pListItem->item.data, 0, pListItem->item.numOfElems, + pListItem->item.numOfElems); + pListItem = pListItem->pNext; + } + + tColDataQSort(pDesc, buffer->numOfElems, 0, buffer->numOfElems - 1, buffer->data, TSQL_SO_ASC); + + pDesc->pColumnModel->capacity = oldCapacity; // restore value + return buffer; +} + +double findOnlyResult(tMemBucket *pMemBucket) { + assert(pMemBucket->numOfElems == 1); + + for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + if (pSeg->pBuffer) { + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + tExtMemBuffer *pBuffer = pSeg->pBuffer[j]; + if (pBuffer) { + assert(pBuffer->numOfTotalElems == 1); + tFilePage *pPage = &pBuffer->pHead->item; + if (pBuffer->numOfElemsInBuffer == 1) { + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_INT: + return *(int32_t *)pPage->data; + case TSDB_DATA_TYPE_SMALLINT: + return *(int16_t *)pPage->data; + case TSDB_DATA_TYPE_TINYINT: + return *(int8_t *)pPage->data; + case TSDB_DATA_TYPE_BIGINT: + return (double)(*(int64_t *)pPage->data); + case TSDB_DATA_TYPE_DOUBLE: { + double dv = GET_DOUBLE_VAL(pPage->data); + //return *(double *)pPage->data; + return dv; + } + case TSDB_DATA_TYPE_FLOAT: { + float fv = GET_FLOAT_VAL(pPage->data); + //return *(float *)pPage->data; + return fv; + } + default: + return 0; + } + } + } + } + } + } + return 0; +} + +void tBucketBigIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { + int64_t v = *(int64_t *)value; + + if (pBucket->nRange.i64MaxVal == INT64_MIN) { + if (v >= 0) { + *segIdx = ((v >> (64 - 9)) >> 6) + 8; + *slotIdx = (v >> (64 - 9)) & 0x3F; + } else { // v<0 + *segIdx = ((-v) >> (64 - 9)) >> 6; + *slotIdx = ((-v) >> (64 - 9)) & 0x3F; + *segIdx = 7 - (*segIdx); + } + } else { + // todo hash for bigint and float and double + int64_t span = pBucket->nRange.i64MaxVal - pBucket->nRange.i64MinVal; + if (span < pBucket->nTotalSlots) { + int32_t delta = (int32_t)(v - pBucket->nRange.i64MinVal); + *segIdx = delta / pBucket->nSlotsOfSeg; + *slotIdx = delta % pBucket->nSlotsOfSeg; + } else { + double x = (double)span / pBucket->nTotalSlots; + double posx = (v - pBucket->nRange.i64MinVal) / x; + if (v == pBucket->nRange.i64MaxVal) { + posx -= 1; + } + + *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; + *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; + } + } +} + +// todo refactor to more generic +void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { + int32_t v = *(int32_t *)value; + + if (pBucket->nRange.iMaxVal == INT32_MIN) { + /* + * taking negative integer into consideration, + * there is only half of pBucket->segs available for non-negative integer + */ + // int32_t numOfSlots = pBucket->nTotalSlots>>1; + // int32_t bits = bitsOfNumber(numOfSlots)-1; + + if (v >= 0) { + *segIdx = ((v >> (32 - 9)) >> 6) + 8; + *slotIdx = (v >> (32 - 9)) & 0x3F; + } else { // v<0 + *segIdx = ((-v) >> (32 - 9)) >> 6; + *slotIdx = ((-v) >> (32 - 9)) & 0x3F; + *segIdx = 7 - (*segIdx); + } + } else { + // divide a range of [iMinVal, iMaxVal] into 1024 buckets + int32_t span = pBucket->nRange.iMaxVal - pBucket->nRange.iMinVal; + if (span < pBucket->nTotalSlots) { + int32_t delta = v - pBucket->nRange.iMinVal; + *segIdx = delta / pBucket->nSlotsOfSeg; + *slotIdx = delta % pBucket->nSlotsOfSeg; + } else { + double x = (double)span / pBucket->nTotalSlots; + double posx = (v - pBucket->nRange.iMinVal) / x; + if (v == pBucket->nRange.iMaxVal) { + posx -= 1; + } + *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; + *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; + } + } +} + +void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { + // double v = *(double *)value; + double v = GET_DOUBLE_VAL(value); + + if (pBucket->nRange.dMinVal == DBL_MAX) { + /* + * taking negative integer into consideration, + * there is only half of pBucket->segs available for non-negative integer + */ + double x = DBL_MAX / (pBucket->nTotalSlots >> 1); + double posx = (v + DBL_MAX) / x; + *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; + *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; + } else { + // divide a range of [dMinVal, dMaxVal] into 1024 buckets + double span = pBucket->nRange.dMaxVal - pBucket->nRange.dMinVal; + if (span < pBucket->nTotalSlots) { + int32_t delta = (int32_t)(v - pBucket->nRange.dMinVal); + *segIdx = delta / pBucket->nSlotsOfSeg; + *slotIdx = delta % pBucket->nSlotsOfSeg; + } else { + double x = span / pBucket->nTotalSlots; + double posx = (v - pBucket->nRange.dMinVal) / x; + if (v == pBucket->nRange.dMaxVal) { + posx -= 1; + } + *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; + *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; + } + + if (*segIdx < 0 || *segIdx > 16 || *slotIdx < 0 || *slotIdx > 64) { + pError("error in hash process. segment is: %d, slot id is: %d\n", *segIdx, *slotIdx); + } + } +} + +tMemBucket *tMemBucketCreate(int32_t totalSlots, int32_t nBufferSize, int16_t nElemSize, int16_t dataType, + tOrderDescriptor *pDesc) { + tMemBucket *pBucket = (tMemBucket *)malloc(sizeof(tMemBucket)); + + pBucket->nTotalSlots = totalSlots; + pBucket->nSlotsOfSeg = 1 << 6; // 64 Segments, 16 slots each seg. + pBucket->dataType = dataType; + pBucket->nElemSize = nElemSize; + pBucket->pageSize = DEFAULT_PAGE_SIZE; + + pBucket->numOfElems = 0; + pBucket->numOfSegs = pBucket->nTotalSlots / pBucket->nSlotsOfSeg; + + pBucket->nTotalBufferSize = nBufferSize; + + pBucket->maxElemsCapacity = pBucket->nTotalBufferSize / pBucket->nElemSize; + + pBucket->numOfTotalPages = pBucket->nTotalBufferSize / pBucket->pageSize; + pBucket->numOfAvailPages = pBucket->numOfTotalPages; + + pBucket->pOrderDesc = pDesc; + + switch (pBucket->dataType) { + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TINYINT: { + pBucket->nRange.iMinVal = INT32_MAX; + pBucket->nRange.iMaxVal = INT32_MIN; + pBucket->HashFunc = tBucketIntHash; + break; + }; + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_FLOAT: { + pBucket->nRange.dMinVal = DBL_MAX; + pBucket->nRange.dMaxVal = -DBL_MAX; + pBucket->HashFunc = tBucketDoubleHash; + break; + }; + case TSDB_DATA_TYPE_BIGINT: { + pBucket->nRange.i64MinVal = INT64_MAX; + pBucket->nRange.i64MaxVal = INT64_MIN; + pBucket->HashFunc = tBucketBigIntHash; + break; + }; + default: { + pError("MemBucket:%p,not support data type %d,failed", *pBucket, pBucket->dataType); + tfree(pBucket); + return NULL; + } + } + + int32_t numOfCols = pDesc->pColumnModel->numOfCols; + if (numOfCols != 1) { + pError("MemBucket:%p,only consecutive data is allowed,invalid numOfCols:%d", pBucket, numOfCols); + tfree(pBucket); + return NULL; + } + + SSchema* pSchema = getColumnModelSchema(pDesc->pColumnModel, 0); + if (pSchema->type != dataType) { + pError("MemBucket:%p,data type is not consistent,%d in schema, %d in param", pBucket, pSchema->type, dataType); + tfree(pBucket); + return NULL; + } + + if (pBucket->numOfTotalPages < pBucket->nTotalSlots) { + pWarn("MemBucket:%p,total buffer pages %d are not enough for all slots", pBucket, pBucket->numOfTotalPages); + } + + pBucket->pSegs = (tMemBucketSegment *)malloc(pBucket->numOfSegs * sizeof(tMemBucketSegment)); + + for (int32_t i = 0; i < pBucket->numOfSegs; ++i) { + pBucket->pSegs[i].numOfSlots = pBucket->nSlotsOfSeg; + pBucket->pSegs[i].pBuffer = NULL; + pBucket->pSegs[i].pBoundingEntries = NULL; + } + + pTrace("MemBucket:%p,created,buffer size:%d,elem size:%d", pBucket, pBucket->numOfTotalPages * DEFAULT_PAGE_SIZE, + pBucket->nElemSize); + + return pBucket; +} + +void tMemBucketDestroy(tMemBucket *pBucket) { + if (pBucket == NULL) { + return; + } + + if (pBucket->pSegs) { + for (int32_t i = 0; i < pBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &(pBucket->pSegs[i]); + tfree(pSeg->pBoundingEntries); + + if (pSeg->pBuffer == NULL || pSeg->numOfSlots == 0) { + continue; + } + + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + if (pSeg->pBuffer[j] != NULL) { + pSeg->pBuffer[j] = destoryExtMemBuffer(pSeg->pBuffer[j]); + } + } + tfree(pSeg->pBuffer); + } + } + + tfree(pBucket->pSegs); + tfree(pBucket); +} + +/* + * find the slots which accounts for largest proportion of total in-memory buffer + */ +static void tBucketGetMaxMemSlot(tMemBucket *pBucket, int16_t *segIdx, int16_t *slotIdx) { + *segIdx = -1; + *slotIdx = -1; + + int32_t val = 0; + for (int32_t k = 0; k < pBucket->numOfSegs; ++k) { + tMemBucketSegment *pSeg = &pBucket->pSegs[k]; + for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { + if (pSeg->pBuffer == NULL || pSeg->pBuffer[i] == NULL) { + continue; + } + + if (val < pSeg->pBuffer[i]->numOfInMemPages) { + val = pSeg->pBuffer[i]->numOfInMemPages; + *segIdx = k; + *slotIdx = i; + } + } + } +} + +static void resetBoundingBox(tMemBucketSegment *pSeg, int32_t type) { + switch (type) { + case TSDB_DATA_TYPE_BIGINT: { + for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { + pSeg->pBoundingEntries[i].i64MaxVal = INT64_MIN; + pSeg->pBoundingEntries[i].i64MinVal = INT64_MAX; + } + break; + }; + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TINYINT: { + for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { + pSeg->pBoundingEntries[i].iMaxVal = INT32_MIN; + pSeg->pBoundingEntries[i].iMinVal = INT32_MAX; + } + break; + }; + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_FLOAT: { + for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { + pSeg->pBoundingEntries[i].dMaxVal = -DBL_MAX; + pSeg->pBoundingEntries[i].dMinVal = DBL_MAX; + } + break; + } + } +} + +void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) { + switch (dataType) { + case TSDB_DATA_TYPE_INT: { + int32_t val = *(int32_t *)data; + if (r->iMinVal > val) { + r->iMinVal = val; + } + + if (r->iMaxVal < val) { + r->iMaxVal = val; + } + break; + }; + case TSDB_DATA_TYPE_BIGINT: { + int64_t val = *(int64_t *)data; + if (r->i64MinVal > val) { + r->i64MinVal = val; + } + + if (r->i64MaxVal < val) { + r->i64MaxVal = val; + } + break; + }; + case TSDB_DATA_TYPE_SMALLINT: { + int32_t val = *(int16_t *)data; + if (r->iMinVal > val) { + r->iMinVal = val; + } + + if (r->iMaxVal < val) { + r->iMaxVal = val; + } + break; + }; + case TSDB_DATA_TYPE_TINYINT: { + int32_t val = *(int8_t *)data; + if (r->iMinVal > val) { + r->iMinVal = val; + } + + if (r->iMaxVal < val) { + r->iMaxVal = val; + } + + break; + }; + case TSDB_DATA_TYPE_DOUBLE: { + // double val = *(double *)data; + double val = GET_DOUBLE_VAL(data); + if (r->dMinVal > val) { + r->dMinVal = val; + } + + if (r->dMaxVal < val) { + r->dMaxVal = val; + } + break; + }; + case TSDB_DATA_TYPE_FLOAT: { + // double val = *(float *)data; + double val = GET_FLOAT_VAL(data); + + if (r->dMinVal > val) { + r->dMinVal = val; + } + + if (r->dMaxVal < val) { + r->dMaxVal = val; + } + break; + }; + default: { assert(false); } + } +} + +/* + * in memory bucket, we only accept the simple data consecutive put in a row/column + * no column-model in this case. + */ +void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) { + pBucket->numOfElems += numOfRows; + int16_t segIdx = 0, slotIdx = 0; + + for (int32_t i = 0; i < numOfRows; ++i) { + char *d = (char *)data + i * tDataTypeDesc[pBucket->dataType].nSize; + + switch (pBucket->dataType) { + case TSDB_DATA_TYPE_SMALLINT: { + int32_t val = *(int16_t *)d; + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_TINYINT: { + int32_t val = *(int8_t *)d; + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_INT: { + int32_t val = *(int32_t *)d; + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + int64_t val = *(int64_t *)d; + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + // double val = *(double *)d; + double val = GET_DOUBLE_VAL(d); + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_FLOAT: { + // double val = *(float *)d; + double val = GET_FLOAT_VAL(d); + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + } + + tMemBucketSegment *pSeg = &pBucket->pSegs[segIdx]; + if (pSeg->pBoundingEntries == NULL) { + pSeg->pBoundingEntries = (MinMaxEntry *)malloc(sizeof(MinMaxEntry) * pBucket->nSlotsOfSeg); + resetBoundingBox(pSeg, pBucket->dataType); + } + + if (pSeg->pBuffer == NULL) { + pSeg->pBuffer = (tExtMemBuffer **)calloc(pBucket->nSlotsOfSeg, sizeof(void *)); + } + + if (pSeg->pBuffer[slotIdx] == NULL) { + pSeg->pBuffer[slotIdx] = createExtMemBuffer(pBucket->numOfTotalPages * pBucket->pageSize, pBucket->nElemSize, + pBucket->pOrderDesc->pColumnModel); + pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL; + pBucket->pOrderDesc->pColumnModel->capacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage; + } + + tMemBucketUpdateBoundingBox(&pSeg->pBoundingEntries[slotIdx], d, pBucket->dataType); + + // ensure available memory pages to allocate + int16_t cseg = 0, cslot = 0; + if (pBucket->numOfAvailPages == 0) { + pTrace("MemBucket:%p,max avail size:%d, no avail memory pages,", pBucket, pBucket->numOfTotalPages); + + tBucketGetMaxMemSlot(pBucket, &cseg, &cslot); + if (cseg == -1 || cslot == -1) { + pError("MemBucket:%p,failed to find appropriated avail buffer", pBucket); + return; + } + + if (cseg != segIdx || cslot != slotIdx) { + pBucket->numOfAvailPages += pBucket->pSegs[cseg].pBuffer[cslot]->numOfInMemPages; + + int32_t avail = pBucket->pSegs[cseg].pBuffer[cslot]->numOfInMemPages; + UNUSED(avail); + tExtMemBufferFlush(pBucket->pSegs[cseg].pBuffer[cslot]); + + pTrace("MemBucket:%p,seg:%d,slot:%d flushed to disk,new avail pages:%d", pBucket, cseg, cslot, + pBucket->numOfAvailPages); + } else { + pTrace("MemBucket:%p,failed to choose slot to flush to disk seg:%d,slot:%d", pBucket, cseg, cslot); + } + } + int16_t consumedPgs = pSeg->pBuffer[slotIdx]->numOfInMemPages; + + int16_t newPgs = tExtMemBufferPut(pSeg->pBuffer[slotIdx], d, 1); + /* + * trigger 1. page re-allocation, to reduce the available pages + * 2. page flushout, to increase the available pages + */ + pBucket->numOfAvailPages += (consumedPgs - newPgs); + } +} + +void releaseBucket(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { + if (segIdx < 0 || segIdx > pMemBucket->numOfSegs || slotIdx < 0) { + return; + } + + tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; + if (slotIdx < 0 || slotIdx >= pSeg->numOfSlots || pSeg->pBuffer[slotIdx] == NULL) { + return; + } + + pSeg->pBuffer[slotIdx] = destoryExtMemBuffer(pSeg->pBuffer[slotIdx]); +} + +//////////////////////////////////////////////////////////////////////////////////////////// +static void findMaxMinValue(tMemBucket *pMemBucket, double *maxVal, double *minVal) { + *minVal = DBL_MAX; + *maxVal = -DBL_MAX; + + for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + if (pSeg->pBuffer == NULL) { + continue; + } + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TINYINT: { + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + double minv = pSeg->pBoundingEntries[j].iMinVal; + double maxv = pSeg->pBoundingEntries[j].iMaxVal; + + if (*minVal > minv) { + *minVal = minv; + } + if (*maxVal < maxv) { + *maxVal = maxv; + } + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_FLOAT: { + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + double minv = pSeg->pBoundingEntries[j].dMinVal; + double maxv = pSeg->pBoundingEntries[j].dMaxVal; + + if (*minVal > minv) { + *minVal = minv; + } + if (*maxVal < maxv) { + *maxVal = maxv; + } + } + break; + } + case TSDB_DATA_TYPE_BIGINT: { + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + double minv = (double)pSeg->pBoundingEntries[j].i64MinVal; + double maxv = (double)pSeg->pBoundingEntries[j].i64MaxVal; + + if (*minVal > minv) { + *minVal = minv; + } + if (*maxVal < maxv) { + *maxVal = maxv; + } + } + break; + } + } + } +} + +static MinMaxEntry getMinMaxEntryOfNearestSlotInNextSegment(tMemBucket *pMemBucket, int32_t segIdx) { + int32_t i = segIdx + 1; + while (i < pMemBucket->numOfSegs && pMemBucket->pSegs[i].numOfSlots == 0) ++i; + + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + assert(pMemBucket->numOfSegs > i && pMemBucket->pSegs[i].pBuffer != NULL); + + i = 0; + while (i < pMemBucket->nSlotsOfSeg && pSeg->pBuffer[i] == NULL) ++i; + + assert(i < pMemBucket->nSlotsOfSeg); + return pSeg->pBoundingEntries[i]; +} + +/* + * + * now, we need to find the minimum value of the next slot for + * interpolating the percentile value + * j is the last slot of current segment, we need to get the first + * slot of the next segment. + */ +static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; + + MinMaxEntry next; + if (slotIdx == pSeg->numOfSlots - 1) { // find next segment with data + return getMinMaxEntryOfNearestSlotInNextSegment(pMemBucket, segIdx); + } else { + int32_t j = slotIdx + 1; + for (; j < pMemBucket->nSlotsOfSeg && pMemBucket->pSegs[segIdx].pBuffer[j] == 0; ++j) { + }; + + if (j == pMemBucket->nSlotsOfSeg) { // current slot has no available + // slot,try next segment + return getMinMaxEntryOfNearestSlotInNextSegment(pMemBucket, segIdx); + } else { + next = pSeg->pBoundingEntries[slotIdx + 1]; + assert(pSeg->pBuffer[slotIdx + 1] != NULL); + } + } + + return next; +} + +bool isIdenticalData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx); +char *getFirstElemOfMemBuffer(tMemBucketSegment *pSeg, int32_t slotIdx, tFilePage *pPage); + +double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) { + int32_t num = 0; + + for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + if (pSeg->pBuffer == NULL || pSeg->pBuffer[j] == NULL) { + continue; + } + // required value in current slot + if (num < (count + 1) && num + pSeg->pBuffer[j]->numOfTotalElems >= (count + 1)) { + if (pSeg->pBuffer[j]->numOfTotalElems + num == (count + 1)) { + /* + * now, we need to find the minimum value of the next slot for interpolating the percentile value + * j is the last slot of current segment, we need to get the first slot of the next segment. + * + */ + MinMaxEntry next = getMinMaxEntryOfNextSlotWithData(pMemBucket, i, j); + + double maxOfThisSlot = 0; + double minOfNextSlot = 0; + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TINYINT: { + maxOfThisSlot = pSeg->pBoundingEntries[j].iMaxVal; + minOfNextSlot = next.iMinVal; + break; + }; + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_DOUBLE: { + maxOfThisSlot = pSeg->pBoundingEntries[j].dMaxVal; + minOfNextSlot = next.dMinVal; + break; + }; + case TSDB_DATA_TYPE_BIGINT: { + maxOfThisSlot = (double)pSeg->pBoundingEntries[j].i64MaxVal; + minOfNextSlot = (double)next.i64MinVal; + break; + } + }; + + assert(minOfNextSlot > maxOfThisSlot); + + double val = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot; + return val; + } + if (pSeg->pBuffer[j]->numOfTotalElems <= pMemBucket->maxElemsCapacity) { + // data in buffer and file are merged together to be processed. + tFilePage *buffer = loadIntoBucketFromDisk(pMemBucket, i, j, pMemBucket->pOrderDesc); + int32_t currentIdx = count - num; + + char * thisVal = buffer->data + pMemBucket->nElemSize * currentIdx; + char * nextVal = thisVal + pMemBucket->nElemSize; + double td, nd; + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_SMALLINT: { + td = *(int16_t *)thisVal; + nd = *(int16_t *)nextVal; + break; + } + case TSDB_DATA_TYPE_TINYINT: { + td = *(int8_t *)thisVal; + nd = *(int8_t *)nextVal; + break; + } + case TSDB_DATA_TYPE_INT: { + td = *(int32_t *)thisVal; + nd = *(int32_t *)nextVal; + break; + }; + case TSDB_DATA_TYPE_FLOAT: { + // td = *(float *)thisVal; + // nd = *(float *)nextVal; + td = GET_FLOAT_VAL(thisVal); + nd = GET_FLOAT_VAL(nextVal); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + // td = *(double *)thisVal; + td = GET_DOUBLE_VAL(thisVal); + // nd = *(double *)nextVal; + nd = GET_DOUBLE_VAL(nextVal); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + td = (double)*(int64_t *)thisVal; + nd = (double)*(int64_t *)nextVal; + break; + } + } + double val = (1 - fraction) * td + fraction * nd; + tfree(buffer); + + return val; + } else { // incur a second round bucket split + if (isIdenticalData(pMemBucket, i, j)) { + tExtMemBuffer *pMemBuffer = pSeg->pBuffer[j]; + + tFilePage *pPage = (tFilePage *)malloc(pMemBuffer->pageSize); + + char *thisVal = getFirstElemOfMemBuffer(pSeg, j, pPage); + + double finalResult = 0.0; + + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_SMALLINT: { + finalResult = *(int16_t *)thisVal; + break; + } + case TSDB_DATA_TYPE_TINYINT: { + finalResult = *(int8_t *)thisVal; + break; + } + case TSDB_DATA_TYPE_INT: { + finalResult = *(int32_t *)thisVal; + break; + }; + case TSDB_DATA_TYPE_FLOAT: { + // finalResult = *(float *)thisVal; + finalResult = GET_FLOAT_VAL(thisVal); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + // finalResult = *(double *)thisVal; + finalResult = GET_DOUBLE_VAL(thisVal); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + finalResult = (double)(*(int64_t *)thisVal); + break; + } + } + + free(pPage); + return finalResult; + } + + pTrace("MemBucket:%p,start second round bucketing", pMemBucket); + + if (pSeg->pBuffer[j]->numOfElemsInBuffer != 0) { + pTrace("MemBucket:%p,flush %d pages to disk, clear status", pMemBucket, pSeg->pBuffer[j]->numOfInMemPages); + + pMemBucket->numOfAvailPages += pSeg->pBuffer[j]->numOfInMemPages; + tExtMemBufferFlush(pSeg->pBuffer[j]); + } + + tExtMemBuffer *pMemBuffer = pSeg->pBuffer[j]; + pSeg->pBuffer[j] = NULL; + + // release all + for (int32_t tt = 0; tt < pMemBucket->numOfSegs; ++tt) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[tt]; + for (int32_t ttx = 0; ttx < pSeg->numOfSlots; ++ttx) { + if (pSeg->pBuffer && pSeg->pBuffer[ttx]) { + pSeg->pBuffer[ttx] = destoryExtMemBuffer(pSeg->pBuffer[ttx]); + } + } + } + + pMemBucket->nRange.i64MaxVal = pSeg->pBoundingEntries->i64MaxVal; + pMemBucket->nRange.i64MinVal = pSeg->pBoundingEntries->i64MinVal; + pMemBucket->numOfElems = 0; + + for (int32_t tt = 0; tt < pMemBucket->numOfSegs; ++tt) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[tt]; + for (int32_t ttx = 0; ttx < pSeg->numOfSlots; ++ttx) { + if (pSeg->pBoundingEntries) { + resetBoundingBox(pSeg, pMemBucket->dataType); + } + } + } + + tFilePage *pPage = (tFilePage *)malloc(pMemBuffer->pageSize); + + tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0]; + assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize); + + int32_t ret = fseek(pMemBuffer->file, pFlushInfo->startPageId * pMemBuffer->pageSize, SEEK_SET); + UNUSED(ret); + + for (uint32_t jx = 0; jx < pFlushInfo->numOfPages; ++jx) { + ret = fread(pPage, pMemBuffer->pageSize, 1, pMemBuffer->file); + UNUSED(ret); + tMemBucketPut(pMemBucket, pPage->data, pPage->numOfElems); + } + + fclose(pMemBuffer->file); + if (unlink(pMemBuffer->path) != 0) { + pError("MemBucket:%p, remove tmp file %s failed", pMemBucket, pMemBuffer->path); + } + tfree(pMemBuffer); + tfree(pPage); + + return getPercentileImpl(pMemBucket, count - num, fraction); + } + } else { + num += pSeg->pBuffer[j]->numOfTotalElems; + } + } + } + return 0; +} + +double getPercentile(tMemBucket *pMemBucket, double percent) { + if (pMemBucket->numOfElems == 0) { + return 0.0; + } + + if (pMemBucket->numOfElems == 1) { // return the only element + return findOnlyResult(pMemBucket); + } + + percent = fabs(percent); + + // validate the parameters + if (fabs(percent - 100.0) < DBL_EPSILON || (percent < DBL_EPSILON)) { + double minx = 0, maxx = 0; + /* + * find the min/max value, no need to scan all data in bucket + */ + findMaxMinValue(pMemBucket, &maxx, &minx); + + return fabs(percent - 100) < DBL_EPSILON ? maxx : minx; + } + + double percentVal = (percent * (pMemBucket->numOfElems - 1)) / ((double)100.0); + int32_t orderIdx = (int32_t)percentVal; + + // do put data by using buckets + return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx); +} + +/* + * check if data in one slot are all identical + * only need to compare with the bounding box + */ +bool isIdenticalData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; + + if (pMemBucket->dataType == TSDB_DATA_TYPE_INT || pMemBucket->dataType == TSDB_DATA_TYPE_BIGINT || + pMemBucket->dataType == TSDB_DATA_TYPE_SMALLINT || pMemBucket->dataType == TSDB_DATA_TYPE_TINYINT) { + return pSeg->pBoundingEntries[slotIdx].i64MinVal == pSeg->pBoundingEntries[slotIdx].i64MaxVal; + } + + if (pMemBucket->dataType == TSDB_DATA_TYPE_FLOAT || pMemBucket->dataType == TSDB_DATA_TYPE_DOUBLE) { + return fabs(pSeg->pBoundingEntries[slotIdx].dMaxVal - pSeg->pBoundingEntries[slotIdx].dMinVal) < DBL_EPSILON; + } + + return false; +} + +/* + * get the first element of one slot into memory. + * if no data of current slot in memory, load it from disk + */ +char *getFirstElemOfMemBuffer(tMemBucketSegment *pSeg, int32_t slotIdx, tFilePage *pPage) { + tExtMemBuffer *pMemBuffer = pSeg->pBuffer[slotIdx]; + char * thisVal = NULL; + + if (pSeg->pBuffer[slotIdx]->numOfElemsInBuffer != 0) { + thisVal = pSeg->pBuffer[slotIdx]->pHead->item.data; + } else { + /* + * no data in memory, load one page into memory + */ + tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0]; + assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize); + + fseek(pMemBuffer->file, pFlushInfo->startPageId * pMemBuffer->pageSize, SEEK_SET); + size_t ret = fread(pPage, pMemBuffer->pageSize, 1, pMemBuffer->file); + UNUSED(ret); + thisVal = pPage->data; + } + return thisVal; +} diff --git a/src/util/src/tresultBuf.c b/src/util/src/tresultBuf.c new file mode 100644 index 0000000000000000000000000000000000000000..31218670acc0a95c865de27ea945d1ed5ee19e29 --- /dev/null +++ b/src/util/src/tresultBuf.c @@ -0,0 +1,225 @@ +#include "hash.h" +#include "taoserror.h" +#include "textbuffer.h" +#include "tlog.h" +#include "tsqlfunction.h" +#include "tresultBuf.h" + +#define DEFAULT_INTERN_BUF_SIZE 16384L + +int32_t createResultBuf(SQueryResultBuf** pResultBuf, int32_t size, int32_t rowSize) { + SQueryResultBuf* pResBuf = calloc(1, sizeof(SQueryResultBuf)); + pResBuf->numOfRowsPerPage = (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / rowSize; + pResBuf->numOfPages = size; + + pResBuf->totalBufSize = pResBuf->numOfPages * DEFAULT_INTERN_BUF_SIZE; + pResBuf->incStep = 4; + + // init id hash table + pResBuf->idsTable = taosInitHashTable(size, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); + pResBuf->list = calloc(size, sizeof(SIDList)); + pResBuf->numOfAllocGroupIds = size; + + char path[4096] = {0}; + getTmpfilePath("tsdb_q_buf", path); + pResBuf->path = strdup(path); + + pResBuf->fd = open(pResBuf->path, O_CREAT | O_RDWR, 0666); + + memset(path, 0, tListLen(path)); + + if (!FD_VALID(pResBuf->fd)) { + pError("failed to create tmp file: %s on disk. %s", pResBuf->path, strerror(errno)); + return TSDB_CODE_CLI_NO_DISKSPACE; + } + + int32_t ret = ftruncate(pResBuf->fd, pResBuf->numOfPages * DEFAULT_INTERN_BUF_SIZE); + if (ret != TSDB_CODE_SUCCESS) { + pError("failed to create tmp file: %s on disk. %s", pResBuf->path, strerror(errno)); + return TSDB_CODE_CLI_NO_DISKSPACE; + } + + pResBuf->pBuf = mmap(NULL, pResBuf->totalBufSize, PROT_READ | PROT_WRITE, MAP_SHARED, pResBuf->fd, 0); + if (pResBuf->pBuf == MAP_FAILED) { + pError("QInfo:%p failed to map temp file: %s. %s", pResBuf->path, strerror(errno)); + return TSDB_CODE_CLI_OUT_OF_MEMORY; // todo change error code + } + + pTrace("create tmp file for output result, %s, " PRId64 "bytes", pResBuf->path, pResBuf->totalBufSize); + *pResultBuf = pResBuf; + return TSDB_CODE_SUCCESS; +} + +tFilePage* getResultBufferPageById(SQueryResultBuf* pResultBuf, int32_t id) { + assert(id < pResultBuf->numOfPages && id >= 0); + + return (tFilePage*)(pResultBuf->pBuf + DEFAULT_INTERN_BUF_SIZE * id); +} + +int32_t getNumOfResultBufGroupId(SQueryResultBuf* pResultBuf) { return taosNumElemsInHashTable(pResultBuf->idsTable); } + +int32_t getResBufSize(SQueryResultBuf* pResultBuf) { return pResultBuf->totalBufSize; } + +static int32_t extendDiskFileSize(SQueryResultBuf* pResultBuf, int32_t numOfPages) { + assert(pResultBuf->numOfPages * DEFAULT_INTERN_BUF_SIZE == pResultBuf->totalBufSize); + + int32_t ret = munmap(pResultBuf->pBuf, pResultBuf->totalBufSize); + pResultBuf->numOfPages += numOfPages; + + /* + * disk-based output buffer is exhausted, try to extend the disk-based buffer, the available disk space may + * be insufficient + */ + ret = ftruncate(pResultBuf->fd, pResultBuf->numOfPages * DEFAULT_INTERN_BUF_SIZE); + if (ret != 0) { + // dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile, + // strerror(errno)); + return -TSDB_CODE_SERV_NO_DISKSPACE; + } + + pResultBuf->totalBufSize = pResultBuf->numOfPages * DEFAULT_INTERN_BUF_SIZE; + pResultBuf->pBuf = mmap(NULL, pResultBuf->totalBufSize, PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); + + if (pResultBuf->pBuf == MAP_FAILED) { + // dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); + return -TSDB_CODE_SERV_OUT_OF_MEMORY; + } + + return TSDB_CODE_SUCCESS; +} + +static bool noMoreAvailablePages(SQueryResultBuf* pResultBuf) { + return (pResultBuf->allocateId == pResultBuf->numOfPages - 1); +} + +static int32_t getGroupIndex(SQueryResultBuf* pResultBuf, int32_t groupId) { + assert(pResultBuf != NULL); + + char* p = taosGetDataFromHashTable(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); + if (p == NULL) { // it is a new group id + return -1; + } + + int32_t slot = GET_INT32_VAL(p); + assert(slot >= 0 && slot < pResultBuf->numOfAllocGroupIds); + + return slot; +} + +static int32_t addNewGroupId(SQueryResultBuf* pResultBuf, int32_t groupId) { + int32_t num = getNumOfResultBufGroupId(pResultBuf); // the num is the newest allocated group id slot + + if (pResultBuf->numOfAllocGroupIds <= num) { + size_t n = pResultBuf->numOfAllocGroupIds << 1u; + + SIDList* p = (SIDList*)realloc(pResultBuf->list, sizeof(SIDList) * n); + assert(p != NULL); + + memset(&p[pResultBuf->numOfAllocGroupIds], 0, sizeof(SIDList) * pResultBuf->numOfAllocGroupIds); + + pResultBuf->list = p; + pResultBuf->numOfAllocGroupIds = n; + } + + taosAddToHashTable(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &num, sizeof(int32_t)); + return num; +} + +static int32_t doRegisterId(SIDList* pList, int32_t id) { + if (pList->size >= pList->alloc) { + int32_t s = 0; + if (pList->alloc == 0) { + s = 4; + assert(pList->pData == NULL); + } else { + s = pList->alloc << 1u; + } + + int32_t* c = realloc(pList->pData, s * sizeof(int32_t)); + assert(c); + + memset(&c[pList->alloc], 0, sizeof(int32_t) * pList->alloc); + + pList->pData = c; + pList->alloc = s; + } + + pList->pData[pList->size++] = id; + return 0; +} + +static void registerPageId(SQueryResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { + int32_t slot = getGroupIndex(pResultBuf, groupId); + if (slot < 0) { + slot = addNewGroupId(pResultBuf, groupId); + } + + SIDList* pList = &pResultBuf->list[slot]; + doRegisterId(pList, pageId); +} + +tFilePage* getNewDataBuf(SQueryResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { + if (noMoreAvailablePages(pResultBuf)) { + if (extendDiskFileSize(pResultBuf, pResultBuf->incStep) != TSDB_CODE_SUCCESS) { + return NULL; + } + } + + // register new id in this group + *pageId = (pResultBuf->allocateId++); + registerPageId(pResultBuf, groupId, *pageId); + + tFilePage* page = getResultBufferPageById(pResultBuf, *pageId); + + // clear memory for the new page + memset(page, 0, DEFAULT_INTERN_BUF_SIZE); + + return page; +} + +int32_t getNumOfRowsPerPage(SQueryResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } + +SIDList getDataBufPagesIdList(SQueryResultBuf* pResultBuf, int32_t groupId) { + SIDList list = {0}; + int32_t slot = getGroupIndex(pResultBuf, groupId); + if (slot < 0) { + return list; + } else { + return pResultBuf->list[slot]; + } +} + +void destroyResultBuf(SQueryResultBuf* pResultBuf) { + if (pResultBuf == NULL) { + return; + } + + if (FD_VALID(pResultBuf->fd)) { + close(pResultBuf->fd); + } + + pTrace("disk-based output buffer closed, %" PRId64 " bytes, file:%s", pResultBuf->totalBufSize, pResultBuf->path); + munmap(pResultBuf->pBuf, pResultBuf->totalBufSize); + unlink(pResultBuf->path); + + tfree(pResultBuf->path); + + for (int32_t i = 0; i < pResultBuf->numOfAllocGroupIds; ++i) { + SIDList* pList = &pResultBuf->list[i]; + tfree(pList->pData); + } + + tfree(pResultBuf->list); + taosCleanUpHashTable(pResultBuf->idsTable); + + tfree(pResultBuf); +} + +int32_t getLastPageId(SIDList *pList) { + if (pList == NULL && pList->size <= 0) { + return -1; + } + + return pList->pData[pList->size - 1]; +} + diff --git a/src/util/src/ttokenizer.c b/src/util/src/ttokenizer.c index a1e7a6828c87e846fd1600a33a06d8b171b4523e..d4f3bd6879dae6b9e8573a9230f39fe3405b5927 100644 --- a/src/util/src/ttokenizer.c +++ b/src/util/src/ttokenizer.c @@ -274,7 +274,7 @@ int tSQLKeywordCode(const char* z, int n) { } } - SKeyword** pKey = (SKeyword**)taosGetDataFromHash(KeywordHashTable, key, n); + SKeyword** pKey = (SKeyword**)taosGetDataFromHashTable(KeywordHashTable, key, n); if (pKey != NULL) { return (*pKey)->type; } else {