From 1de610a48d9b75fb21541b40ecfb0bf68eeab185 Mon Sep 17 00:00:00 2001 From: hjxilinx Date: Wed, 5 Feb 2020 17:31:58 +0800 Subject: [PATCH] refactor extbuffer model. --- src/client/inc/tscSecondaryMerge.h | 10 +- src/client/src/tscFunctionImpl.c | 3 +- src/client/src/tscParseInsert.c | 2 +- src/client/src/tscSQLParser.c | 26 - src/client/src/tscSecondaryMerge.c | 150 +-- src/client/src/tscServer.c | 14 +- src/inc/textbuffer.h | 220 ++-- src/inc/thistogram.h | 2 - src/inc/tinterpolation.h | 2 +- src/inc/tpercentile.h | 77 ++ src/system/detail/inc/vnodeTagMgmt.h | 18 +- src/system/detail/src/mgmtDnodeInt.c | 2 +- src/system/detail/src/mgmtSupertableQuery.c | 24 +- src/system/detail/src/vnodeFile.c | 2 +- src/system/detail/src/vnodeQueryImpl.c | 54 +- src/system/detail/src/vnodeShell.c | 64 +- src/system/detail/src/vnodeTagMgmt.c | 86 +- src/system/detail/src/vnodeUtil.c | 2 +- src/util/src/textbuffer.c | 1279 +++---------------- src/util/src/tinterpolation.c | 100 +- src/util/src/tpercentile.c | 976 ++++++++++++++ 21 files changed, 1618 insertions(+), 1495 deletions(-) create mode 100644 src/inc/tpercentile.h create mode 100644 src/util/src/tpercentile.c diff --git a/src/client/inc/tscSecondaryMerge.h b/src/client/inc/tscSecondaryMerge.h index bcfe14fcb7..d423b32356 100644 --- a/src/client/inc/tscSecondaryMerge.h +++ b/src/client/inc/tscSecondaryMerge.h @@ -68,7 +68,7 @@ typedef struct SLocalReducer { bool hasPrevRow; // cannot be released bool hasUnprocessedRow; tOrderDescriptor * pDesc; - tColModel * resColModel; + SColumnModel * resColModel; tExtMemBuffer ** pExtMemBuffer; // disk-based buffer SInterpolationInfo interpolationInfo; // interpolation support structure char * pFinalRes; // result data after interpo @@ -92,7 +92,7 @@ typedef struct SSubqueryState { typedef struct SRetrieveSupport { tExtMemBuffer ** pExtMemBuffer; // for build loser tree tOrderDescriptor *pOrderDescriptor; - tColModel * pFinalColModel; // colModel for final result + SColumnModel * pFinalColModel; // colModel for final result SSubqueryState * pState; int32_t subqueryIndex; // index of current vnode in vnode list SSqlObj * pParentSqlObj; @@ -102,9 +102,9 @@ typedef struct SRetrieveSupport { } SRetrieveSupport; int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOrderDescriptor **pDesc, - tColModel **pFinalModel, uint32_t nBufferSize); + SColumnModel **pFinalModel, uint32_t nBufferSize); -void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, tColModel *pFinalModel, +void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, SColumnModel *pFinalModel, int32_t numOfVnodes); int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, void *data, @@ -116,7 +116,7 @@ int32_t tscFlushTmpBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tF * create local reducer to launch the second-stage reduce process at client site */ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc, - tColModel *finalModel, SSqlCmd *pSqlCmd, SSqlRes *pRes); + SColumnModel *finalModel, SSqlCmd *pSqlCmd, SSqlRes *pRes); void tscDestroyLocalReducer(SSqlObj *pSql); diff --git a/src/client/src/tscFunctionImpl.c b/src/client/src/tscFunctionImpl.c index e02e0b3ceb..f8a8d0d6a7 100644 --- a/src/client/src/tscFunctionImpl.c +++ b/src/client/src/tscFunctionImpl.c @@ -27,6 +27,7 @@ #include "ttime.h" #include "ttypes.h" #include "tutil.h" +#include "tpercentile.h" #define GET_INPUT_CHAR(x) (((char *)((x)->aInputElemBuf)) + ((x)->startOffset) * ((x)->inputBytes)) #define GET_INPUT_CHAR_INDEX(x, y) (GET_INPUT_CHAR(x) + (y) * (x)->inputBytes) @@ -2416,7 +2417,7 @@ static bool percentile_function_setup(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); SSchema field[1] = {{pCtx->inputType, "dummyCol", 0, pCtx->inputBytes}}; - tColModel *pModel = tColModelCreate(field, 1, 1000); + SColumnModel *pModel = createColumnModel(field, 1, 1000); int32_t orderIdx = 0; // tOrderDesc object diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 6ef28a55f6..b0d5a58bcf 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -321,7 +321,7 @@ int32_t tsParseOneColumnData(SSchema *pSchema, SSQLToken *pToken, char *payload, if (pToken->type == TK_NULL) { *(uint32_t *)payload = TSDB_DATA_NCHAR_NULL; } else { - // if the converted output len is over than pSchema->bytes, return error: 'Argument list too long' + // if the converted output len is over than pColumnModel->bytes, return error: 'Argument list too long' if (!taosMbsToUcs4(pToken->z, pToken->n, payload, pSchema->bytes)) { char buf[512] = {0}; snprintf(buf, 512, "%s", strerror(errno)); diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index ef5ec5808b..83e41c36a9 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -5453,8 +5453,6 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { const char* msg0 = "invalid table name"; const char* msg1 = "table name too long"; const char* msg2 = "point interpolation query needs timestamp"; - const char* msg3 = "sliding value too small"; - const char* msg4 = "sliding value no larger than the interval value"; const char* msg5 = "fill only available for interval query"; const char* msg6 = "start(end) time of query range required or time range too large"; const char* msg7 = "illegal number of tables in from clause"; @@ -5587,30 +5585,6 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { if (!hasTimestampForPointInterpQuery(pQueryInfo)) { return invalidSqlErrMsg(pQueryInfo->msg, msg2); } - -// // set sliding value, the query time range needs to be decide in the first place -// SSQLToken* pSliding = &pQuerySql->sliding; -// if (pSliding->n != 0) { -// if (!tscEmbedded && pCmd->inStream == 0 && hasDefaultQueryTimeRange(pQueryInfo)) { // sliding only allowed in stream -// const char* msg = "time range expected for sliding window query"; -// return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg); -// } -// -// getTimestampInUsFromStr(pSliding->z, pSliding->n, &pQueryInfo->nSlidingTime); -// if (pMeterMetaInfo->pMeterMeta->precision == TSDB_TIME_PRECISION_MILLI) { -// pQueryInfo->nSlidingTime /= 1000; -// } -// -// if (pQueryInfo->nSlidingTime < tsMinSlidingTime) { -// return invalidSqlErrMsg(pQueryInfo->msg, msg3); -// } -// -// if (pQueryInfo->nSlidingTime > pQueryInfo->nAggTimeInterval) { -// return invalidSqlErrMsg(pQueryInfo->msg, msg4); -// } -// } else { -// pQueryInfo->nSlidingTime = -1; -// } // in case of join query, time range is required. if (QUERY_IS_JOIN_QUERY(pQueryInfo->type)) { diff --git a/src/client/src/tscSecondaryMerge.c b/src/client/src/tscSecondaryMerge.c index 30f1dfad77..73d7f28951 100644 --- a/src/client/src/tscSecondaryMerge.c +++ b/src/client/src/tscSecondaryMerge.c @@ -62,16 +62,19 @@ static void tscInitSqlContext(SSqlCmd *pCmd, SSqlRes *pRes, SLocalReducer *pRedu for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { SQLFunctionCtx *pCtx = &pReducer->pCtx[i]; - pCtx->aOutputBuf = pReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pReducer->resColModel->maxCapacity; + pCtx->aOutputBuf = pReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pReducer->resColModel->capacity; pCtx->order = pQueryInfo->order.order; pCtx->functionId = pQueryInfo->exprsInfo.pExprs[i].functionId; // input buffer hold only one point data - pCtx->aInputElemBuf = pReducer->pTempBuffer->data + pDesc->pSchema->colOffset[i]; + int16_t offset = getColumnModelOffset(pDesc->pColumnModel, i); + SSchema* pSchema = getColumnModelSchema(pDesc->pColumnModel, i); + + pCtx->aInputElemBuf = pReducer->pTempBuffer->data + offset; // input data format comes from pModel - pCtx->inputType = pDesc->pSchema->pFields[i].type; - pCtx->inputBytes = pDesc->pSchema->pFields[i].bytes; + pCtx->inputType = pSchema->type; + pCtx->inputBytes = pSchema->bytes; TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, i); // output data format yet comes from pCmd. @@ -132,11 +135,11 @@ static void tscInitSqlContext(SSqlCmd *pCmd, SSqlRes *pRes, SLocalReducer *pRedu * todo release allocated memory process with async process */ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc, - tColModel *finalmodel, SSqlCmd *pCmd, SSqlRes *pRes) { + SColumnModel *finalmodel, SSqlCmd *pCmd, SSqlRes *pRes) { // offset of cmd in SSqlObj structure char *pSqlObjAddr = (char *)pCmd - offsetof(SSqlObj, cmd); - if (pMemBuffer == NULL || pDesc->pSchema == NULL) { + if (pMemBuffer == NULL || pDesc->pColumnModel == NULL) { tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); tscError("%p no local buffer or intermediate result format model", pSqlObjAddr); @@ -162,9 +165,9 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd return; } - if (pDesc->pSchema->maxCapacity >= pMemBuffer[0]->nPageSize) { - tscError("%p Invalid value of buffer capacity %d and page size %d ", pSqlObjAddr, pDesc->pSchema->maxCapacity, - pMemBuffer[0]->nPageSize); + if (pDesc->pColumnModel->capacity >= pMemBuffer[0]->pageSize) { + tscError("%p Invalid value of buffer capacity %d and page size %d ", pSqlObjAddr, pDesc->pColumnModel->capacity, + pMemBuffer[0]->pageSize); tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); pRes->code = TSDB_CODE_APP_ERROR; @@ -196,7 +199,7 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd int32_t numOfFlushoutInFile = pMemBuffer[i]->fileMeta.flushoutData.nLength; for (int32_t j = 0; j < numOfFlushoutInFile; ++j) { - SLocalDataSource *pDS = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->nPageSize); + SLocalDataSource *pDS = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->pageSize); if (pDS == NULL) { tscError("%p failed to create merge structure", pSqlObjAddr); pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; @@ -219,7 +222,7 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pDesc->pSchema, pDS->filePage.data, pDS->filePage.numOfElems, pMemBuffer[0]->numOfElemsPerPage, + tColModelDisplayEx(pDesc->pColumnModel, pDS->filePage.data, pDS->filePage.numOfElems, pMemBuffer[0]->numOfElemsPerPage, colInfo); #endif if (pDS->filePage.numOfElems == 0) { // no data in this flush @@ -259,7 +262,7 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd tscRestoreSQLFunctionForMetricQuery(pQueryInfo); tscFieldInfoCalOffset(pQueryInfo); - if (pReducer->rowSize > pMemBuffer[0]->nPageSize) { + if (pReducer->rowSize > pMemBuffer[0]->pageSize) { assert(false); // todo fixed row size is larger than the minimum page size; } @@ -274,15 +277,15 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd pReducer->discardData = (tFilePage *)calloc(1, pReducer->rowSize + sizeof(tFilePage)); pReducer->discard = false; - pReducer->nResultBufSize = pMemBuffer[0]->nPageSize * 16; + pReducer->nResultBufSize = pMemBuffer[0]->pageSize * 16; pReducer->pResultBuf = (tFilePage *)calloc(1, pReducer->nResultBufSize + sizeof(tFilePage)); int32_t finalRowLength = tscGetResRowLength(pQueryInfo); pReducer->resColModel = finalmodel; - pReducer->resColModel->maxCapacity = pReducer->nResultBufSize / finalRowLength; + pReducer->resColModel->capacity = pReducer->nResultBufSize / finalRowLength; assert(finalRowLength <= pReducer->rowSize); - pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->maxCapacity); + pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->capacity); pReducer->pBufForInterpo = calloc(1, pReducer->nResultBufSize); if (pReducer->pTempBuffer == NULL|| pReducer->discardData == NULL || pReducer->pResultBuf == NULL || @@ -304,8 +307,8 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd tscCreateResPointerInfo(pRes, pQueryInfo); tscInitSqlContext(pCmd, pRes, pReducer, pDesc); - // we change the maxCapacity of schema to denote that there is only one row in temp buffer - pReducer->pDesc->pSchema->maxCapacity = 1; + // we change the capacity of schema to denote that there is only one row in temp buffer + pReducer->pDesc->pColumnModel->capacity = 1; //restore the limitation value at the last stage if (tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) { @@ -333,7 +336,8 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) { pInterpoInfo->pTags[0] = (char *)pInterpoInfo->pTags + POINTER_BYTES * pQueryInfo->groupbyExpr.numOfGroupCols; for (int32_t i = 1; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) { - pInterpoInfo->pTags[i] = pReducer->resColModel->pFields[startIndex + i - 1].bytes + pInterpoInfo->pTags[i - 1]; + SSchema* pSchema = getColumnModelSchema(pReducer->resColModel, startIndex + i - 1); + pInterpoInfo->pTags[i] = pSchema->bytes + pInterpoInfo->pTags[i - 1]; } } else { assert(pInterpoInfo->pTags == NULL); @@ -346,16 +350,16 @@ static int32_t tscFlushTmpBufferImpl(tExtMemBuffer *pMemoryBuf, tOrderDescriptor return 0; } - assert(pPage->numOfElems <= pDesc->pSchema->maxCapacity); + assert(pPage->numOfElems <= pDesc->pColumnModel->capacity); // sort before flush to disk, the data must be consecutively put on tFilePage. - if (pDesc->orderIdx.numOfOrderedCols > 0) { + if (pDesc->orderIdx.numOfCols > 0) { tColDataQSort(pDesc, pPage->numOfElems, 0, pPage->numOfElems - 1, pPage->data, orderType); } #ifdef _DEBUG_VIEW printf("%" PRIu64 " rows data flushed to disk after been sorted:\n", pPage->numOfElems); - tColModelDisplay(pDesc->pSchema, pPage->data, pPage->numOfElems, pPage->numOfElems); + tColModelDisplay(pDesc->pColumnModel, pPage->data, pPage->numOfElems, pPage->numOfElems); #endif // write to cache after being sorted @@ -383,18 +387,18 @@ int32_t tscFlushTmpBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tF int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePage *pPage, void *data, int32_t numOfRows, int32_t orderType) { - if (pPage->numOfElems + numOfRows <= pDesc->pSchema->maxCapacity) { - tColModelAppend(pDesc->pSchema, pPage, data, 0, numOfRows, numOfRows); + if (pPage->numOfElems + numOfRows <= pDesc->pColumnModel->capacity) { + tColModelAppend(pDesc->pColumnModel, pPage, data, 0, numOfRows, numOfRows); return 0; } - tColModel *pModel = pDesc->pSchema; + SColumnModel *pModel = pDesc->pColumnModel; - int32_t numOfRemainEntries = pDesc->pSchema->maxCapacity - pPage->numOfElems; + int32_t numOfRemainEntries = pDesc->pColumnModel->capacity - pPage->numOfElems; tColModelAppend(pModel, pPage, data, 0, numOfRemainEntries, numOfRows); /* current buffer is full, need to flushed to disk */ - assert(pPage->numOfElems == pDesc->pSchema->maxCapacity); + assert(pPage->numOfElems == pDesc->pColumnModel->capacity); int32_t ret = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType); if (ret != 0) { return -1; @@ -404,15 +408,15 @@ int32_t saveToBuffer(tExtMemBuffer *pMemoryBuf, tOrderDescriptor *pDesc, tFilePa while (remain > 0) { int32_t numOfWriteElems = 0; - if (remain > pModel->maxCapacity) { - numOfWriteElems = pModel->maxCapacity; + if (remain > pModel->capacity) { + numOfWriteElems = pModel->capacity; } else { numOfWriteElems = remain; } tColModelAppend(pModel, pPage, data, numOfRows - remain, numOfWriteElems, numOfRows); - if (pPage->numOfElems == pModel->maxCapacity) { + if (pPage->numOfElems == pModel->capacity) { int32_t ret = tscFlushTmpBuffer(pMemoryBuf, pDesc, pPage, orderType); if (ret != 0) { return -1; @@ -508,7 +512,7 @@ void tscDestroyLocalReducer(SSqlObj *pSql) { tscTrace("%p free local reducer finished", pSql); } -static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCmd, tColModel *pModel) { +static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCmd, SColumnModel *pModel) { int32_t numOfGroupByCols = 0; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); @@ -567,7 +571,7 @@ bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage } tOrderDescriptor *pOrderDesc = pReducer->pDesc; - int32_t numOfCols = pOrderDesc->orderIdx.numOfOrderedCols; + int32_t numOfCols = pOrderDesc->orderIdx.numOfCols; // no group by columns, all data belongs to one group if (numOfCols <= 0) { @@ -577,25 +581,25 @@ bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage if (pOrderDesc->orderIdx.pData[numOfCols - 1] == PRIMARYKEY_TIMESTAMP_COL_INDEX) { //<= 0 // super table interval query assert(pQueryInfo->nAggTimeInterval > 0); - pOrderDesc->orderIdx.numOfOrderedCols -= 1; + pOrderDesc->orderIdx.numOfCols -= 1; } else { // simple group by query assert(pQueryInfo->nAggTimeInterval == 0); } // only one row exists int32_t ret = compare_a(pOrderDesc, 1, 0, pPrev, 1, 0, tmpBuffer->data); - pOrderDesc->orderIdx.numOfOrderedCols = numOfCols; + pOrderDesc->orderIdx.numOfCols = numOfCols; return (ret == 0); } int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOrderDescriptor **pOrderDesc, - tColModel **pFinalModel, uint32_t nBufferSizes) { + SColumnModel **pFinalModel, uint32_t nBufferSizes) { SSqlCmd *pCmd = &pSql->cmd; SSqlRes *pRes = &pSql->res; SSchema * pSchema = NULL; - tColModel *pModel = NULL; + SColumnModel *pModel = NULL; *pFinalModel = NULL; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); @@ -630,14 +634,10 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr capacity = nBufferSizes / rlen; } - pModel = tColModelCreate(pSchema, pQueryInfo->fieldsInfo.numOfOutputCols, capacity); + pModel = createColumnModel(pSchema, pQueryInfo->fieldsInfo.numOfOutputCols, capacity); for (int32_t i = 0; i < pMeterMetaInfo->pMetricMeta->numOfVnodes; ++i) { - char tmpPath[512] = {0}; - getTmpfilePath("tv_bf_db", tmpPath); - tscTrace("%p create [%d](%d) tmp file for subquery:%s", pSql, pMeterMetaInfo->pMetricMeta->numOfVnodes, i, tmpPath); - - tExtMemBufferCreate(&(*pMemBuffer)[i], nBufferSizes, rlen, tmpPath, pModel); + (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel); (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL; } @@ -655,7 +655,7 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr strcpy(pSchema[i].name, pField->name); } - *pFinalModel = tColModelCreate(pSchema, pQueryInfo->fieldsInfo.numOfOutputCols, capacity); + *pFinalModel = createColumnModel(pSchema, pQueryInfo->fieldsInfo.numOfOutputCols, capacity); tfree(pSchema); return TSDB_CODE_SUCCESS; @@ -667,12 +667,12 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr * @param pFinalModel * @param numOfVnodes */ -void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, tColModel *pFinalModel, +void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDesc, SColumnModel *pFinalModel, int32_t numOfVnodes) { - tColModelDestroy(pFinalModel); + destroyColumnModel(pFinalModel); tOrderDescDestroy(pDesc); for (int32_t i = 0; i < numOfVnodes; ++i) { - tExtMemBufferDestroy(&pMemBuffer[i]); + pMemBuffer[i] = destoryExtMemBuffer(pMemBuffer[i]); } tfree(pMemBuffer); @@ -697,8 +697,8 @@ int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSource *p #if defined(_DEBUG_VIEW) printf("new page load to buffer\n"); - tColModelDisplay(pOneInterDataSrc->pMemBuffer->pColModel, pOneInterDataSrc->filePage.data, - pOneInterDataSrc->filePage.numOfElems, pOneInterDataSrc->pMemBuffer->pColModel->maxCapacity); + tColModelDisplay(pOneInterDataSrc->pMemBuffer->pColumnModel, pOneInterDataSrc->filePage.data, + pOneInterDataSrc->filePage.numOfElems, pOneInterDataSrc->pMemBuffer->pColumnModel->capacity); #endif *needAdjustLoserTree = true; } else { @@ -759,7 +759,7 @@ void savePrevRecordAndSetupInterpoInfo(SLocalReducer *pLocalReducer, SQueryInfo* pLocalReducer->discard = true; pLocalReducer->discardData->numOfElems = 0; - tColModel *pModel = pLocalReducer->pDesc->pSchema; + SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel; tColModelAppend(pModel, pLocalReducer->discardData, pLocalReducer->prevRowOfInput, 0, 1, 1); } @@ -782,11 +782,12 @@ static void reversedCopyResultToDstBuf(SQueryInfo* pQueryInfo, SSqlRes *pRes, tF } static void reversedCopyFromInterpolationToDstBuf(SQueryInfo* pQueryInfo, SSqlRes *pRes, tFilePage **pResPages, SLocalReducer *pLocalReducer) { + assert(0); for (int32_t i = 0; i < pQueryInfo->exprsInfo.numOfExprs; ++i) { TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, i); int32_t offset = tscFieldInfoGetOffset(pQueryInfo, i); - assert(offset == pLocalReducer->resColModel->colOffset[i]); + assert(offset == getColumnModelOffset(pLocalReducer->resColModel, i)); char *src = pResPages[i]->data + (pRes->numOfRows - 1) * pField->bytes; char *dst = pRes->data + pRes->numOfRows * offset; @@ -880,7 +881,7 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo tFilePage **pResPages = malloc(POINTER_BYTES * pQueryInfo->fieldsInfo.numOfOutputCols); for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, i); - pResPages[i] = calloc(1, sizeof(tFilePage) + pField->bytes * pLocalReducer->resColModel->maxCapacity); + pResPages[i] = calloc(1, sizeof(tFilePage) + pField->bytes * pLocalReducer->resColModel->capacity); } char ** srcData = (char **)malloc((POINTER_BYTES + sizeof(int32_t)) * pQueryInfo->fieldsInfo.numOfOutputCols); @@ -899,11 +900,11 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo TSKEY etime = taosGetRevisedEndKey(actualETime, pQueryInfo->order.order, pQueryInfo->nAggTimeInterval, pQueryInfo->intervalTimeUnit, precision); int32_t nrows = taosGetNumOfResultWithInterpo(pInterpoInfo, pPrimaryKeys, remains, pQueryInfo->nAggTimeInterval, etime, - pLocalReducer->resColModel->maxCapacity); + pLocalReducer->resColModel->capacity); int32_t newRows = taosDoInterpoResult(pInterpoInfo, pQueryInfo->interpoType, pResPages, remains, nrows, pQueryInfo->nAggTimeInterval, pPrimaryKeys, pLocalReducer->resColModel, srcData, - pQueryInfo->defaultVal, functions, pLocalReducer->resColModel->maxCapacity); + pQueryInfo->defaultVal, functions, pLocalReducer->resColModel->capacity); assert(newRows <= nrows); if (pQueryInfo->limit.offset < newRows) { @@ -960,11 +961,10 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo if (pQueryInfo->order.order == TSQL_SO_ASC) { for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, i); - - memcpy(pRes->data + pLocalReducer->resColModel->colOffset[i] * pRes->numOfRows, pResPages[i]->data, - pField->bytes * pRes->numOfRows); + int16_t offset = getColumnModelOffset(pLocalReducer->resColModel, i); + memcpy(pRes->data + offset * pRes->numOfRows, pResPages[i]->data, pField->bytes * pRes->numOfRows); } - } else { + } else {//todo bug?? reversedCopyFromInterpolationToDstBuf(pQueryInfo, pRes, pResPages, pLocalReducer); } } @@ -979,13 +979,15 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo } static void savePreviousRow(SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) { - tColModel *pColModel = pLocalReducer->pDesc->pSchema; - assert(pColModel->maxCapacity == 1 && tmpBuffer->numOfElems == 1); + SColumnModel *pColumnModel = pLocalReducer->pDesc->pColumnModel; + assert(pColumnModel->capacity == 1 && tmpBuffer->numOfElems == 1); // copy to previous temp buffer - for (int32_t i = 0; i < pLocalReducer->pDesc->pSchema->numOfCols; ++i) { - memcpy(pLocalReducer->prevRowOfInput + pColModel->colOffset[i], tmpBuffer->data + pColModel->colOffset[i], - pColModel->pFields[i].bytes); + for (int32_t i = 0; i < pColumnModel->numOfCols; ++i) { + SSchema* pSchema = getColumnModelSchema(pColumnModel, i); + int16_t offset = getColumnModelOffset(pColumnModel, i); + + memcpy(pLocalReducer->prevRowOfInput + offset, tmpBuffer->data + offset, pSchema->bytes); } tmpBuffer->numOfElems = 0; @@ -1127,7 +1129,7 @@ bool needToMerge(SQueryInfo* pQueryInfo, SLocalReducer *pLocalReducer, tFilePage ret = 1; // disable merge procedure } else { tOrderDescriptor *pDesc = pLocalReducer->pDesc; - if (pDesc->orderIdx.numOfOrderedCols > 0) { + if (pDesc->orderIdx.numOfCols > 0) { if (pDesc->tsOrder == TSQL_SO_ASC) { // asc // todo refactor comparator ret = compare_a(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data); @@ -1177,7 +1179,7 @@ bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool no SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); tFilePage *pResBuf = pLocalReducer->pResultBuf; - tColModel *pModel = pLocalReducer->resColModel; + SColumnModel *pModel = pLocalReducer->resColModel; pRes->code = TSDB_CODE_SUCCESS; @@ -1192,7 +1194,7 @@ bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool no return false; } - tColModelCompact(pModel, pResBuf, pModel->maxCapacity); + tColModelCompact(pModel, pResBuf, pModel->capacity); memcpy(pLocalReducer->pBufForInterpo, pResBuf->data, pLocalReducer->nResultBufSize); #ifdef _DEBUG_VIEW @@ -1204,9 +1206,11 @@ bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool no int32_t startIndex = pQueryInfo->fieldsInfo.numOfOutputCols - pQueryInfo->groupbyExpr.numOfGroupCols; for (int32_t i = 0; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) { + int16_t offset = getColumnModelOffset(pModel, startIndex + i); + SSchema* pSchema = getColumnModelSchema(pModel, startIndex + i); + memcpy(pInterpoInfo->pTags[i], - pLocalReducer->pBufForInterpo + pModel->colOffset[startIndex + i] * pResBuf->numOfElems, - pModel->pFields[startIndex + i].bytes); + pLocalReducer->pBufForInterpo + offset * pResBuf->numOfElems, pSchema->bytes); } taosInterpoSetStartInfo(&pLocalReducer->interpolationInfo, pResBuf->numOfElems, pQueryInfo->interpoType); @@ -1218,7 +1222,7 @@ bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool no void resetOutputBuf(SQueryInfo* pQueryInfo, SLocalReducer *pLocalReducer) { // reset output buffer to the beginning for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { pLocalReducer->pCtx[i].aOutputBuf = - pLocalReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pLocalReducer->resColModel->maxCapacity; + pLocalReducer->pResultBuf->data + tscFieldInfoGetOffset(pQueryInfo, i) * pLocalReducer->resColModel->capacity; } memset(pLocalReducer->pResultBuf, 0, pLocalReducer->nResultBufSize + sizeof(tFilePage)); @@ -1270,7 +1274,7 @@ static bool doInterpolationForCurrentGroup(SSqlObj *pSql) { int32_t remain = taosNumOfRemainPoints(pInterpoInfo); TSKEY ekey = taosGetRevisedEndKey(etime, pQueryInfo->order.order, pQueryInfo->nAggTimeInterval, pQueryInfo->intervalTimeUnit, p); int32_t rows = taosGetNumOfResultWithInterpo(pInterpoInfo, (TSKEY *)pLocalReducer->pBufForInterpo, remain, - pQueryInfo->nAggTimeInterval, ekey, pLocalReducer->resColModel->maxCapacity); + pQueryInfo->nAggTimeInterval, ekey, pLocalReducer->resColModel->capacity); if (rows > 0) { // do interpo doInterpolateResult(pSql, pLocalReducer, false); } @@ -1302,7 +1306,7 @@ static bool doHandleLastRemainData(SSqlObj *pSql) { etime = taosGetRevisedEndKey(etime, pQueryInfo->order.order, pQueryInfo->nAggTimeInterval, pQueryInfo->intervalTimeUnit, precision); int32_t rows = taosGetNumOfResultWithInterpo(pInterpoInfo, NULL, 0, pQueryInfo->nAggTimeInterval, etime, - pLocalReducer->resColModel->maxCapacity); + pLocalReducer->resColModel->capacity); if (rows > 0) { // do interpo doInterpolateResult(pSql, pLocalReducer, true); } @@ -1391,7 +1395,7 @@ int32_t tscDoLocalreduce(SSqlObj *pSql) { // clear buffer handleUnprocessedRow(pCmd, pLocalReducer, tmpBuffer); - tColModel *pModel = pLocalReducer->pDesc->pSchema; + SColumnModel *pModel = pLocalReducer->pDesc->pColumnModel; while (1) { if (isAllSourcesCompleted(pLocalReducer)) { @@ -1408,14 +1412,14 @@ int32_t tscDoLocalreduce(SSqlObj *pSql) { SLocalDataSource *pOneDataSrc = pLocalReducer->pLocalDataSrc[pTree->pNode[0].index]; tColModelAppend(pModel, tmpBuffer, pOneDataSrc->filePage.data, pOneDataSrc->rowIdx, 1, - pOneDataSrc->pMemBuffer->pColModel->maxCapacity); + pOneDataSrc->pMemBuffer->pColumnModel->capacity); #if defined(_DEBUG_VIEW) printf("chosen row:\t"); SSrcColumnInfo colInfo[256] = {0}; tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->numOfElems, pModel->maxCapacity, colInfo); + tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->numOfElems, pModel->capacity, colInfo); #endif if (pLocalReducer->discard) { @@ -1470,7 +1474,7 @@ int32_t tscDoLocalreduce(SSqlObj *pSql) { * continue to process results instead of return results. */ if ((!sameGroup && pResBuf->numOfElems > 0) || - (pResBuf->numOfElems == pLocalReducer->resColModel->maxCapacity)) { + (pResBuf->numOfElems == pLocalReducer->resColModel->capacity)) { // does not belong to the same group bool notSkipped = doGenerateFinalResults(pSql, pLocalReducer, !sameGroup); diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 677f728472..41c3a87295 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -901,7 +901,7 @@ int tscLaunchSTableSubqueries(SSqlObj *pSql) { tExtMemBuffer ** pMemoryBuf = NULL; tOrderDescriptor *pDesc = NULL; - tColModel * pModel = NULL; + SColumnModel * pModel = NULL; pRes->qhandle = 1; // hack the qhandle check @@ -1194,7 +1194,7 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { SSrcColumnInfo colInfo[256] = {0}; tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pDesc->pSchema, pRes->data, pRes->numOfRows, pRes->numOfRows, colInfo); + tColModelDisplayEx(pDesc->pColumnModel, pRes->data, pRes->numOfRows, pRes->numOfRows, colInfo); #endif if (tsTotalTmpDirGB != 0 && tsAvailTmpDirGB < tsMinimalTmpDirGB) { tscError("%p sub:%p client disk space remain %.3f GB, need at least %.3f GB, stop query", pPObj, pSql, @@ -1214,17 +1214,17 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { } else { // all data has been retrieved to client /* data in from current vnode is stored in cache and disk */ - uint32_t numOfRowsFromVnode = trsupport->pExtMemBuffer[idx]->numOfAllElems + trsupport->localBuffer->numOfElems; + uint32_t numOfRowsFromVnode = trsupport->pExtMemBuffer[idx]->numOfTotalElems + trsupport->localBuffer->numOfElems; tscTrace("%p sub:%p all data retrieved from ip:%u,vid:%d, numOfRows:%d, orderOfSub:%d", pPObj, pSql, pSvd->ip, pSvd->vnode, numOfRowsFromVnode, idx); - tColModelCompact(pDesc->pSchema, trsupport->localBuffer, pDesc->pSchema->maxCapacity); + tColModelCompact(pDesc->pColumnModel, trsupport->localBuffer, pDesc->pColumnModel->capacity); #ifdef _DEBUG_VIEW printf("%" PRIu64 " rows data flushed to disk:\n", trsupport->localBuffer->numOfElems); SSrcColumnInfo colInfo[256] = {0}; tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pDesc->pSchema, trsupport->localBuffer->data, trsupport->localBuffer->numOfElems, + tColModelDisplayEx(pDesc->pColumnModel, trsupport->localBuffer->data, trsupport->localBuffer->numOfElems, trsupport->localBuffer->numOfElems, colInfo); #endif @@ -1256,7 +1256,7 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { } // all sub-queries are returned, start to local merge process - pDesc->pSchema->maxCapacity = trsupport->pExtMemBuffer[idx]->numOfElemsPerPage; + pDesc->pColumnModel->capacity = trsupport->pExtMemBuffer[idx]->numOfElemsPerPage; tscTrace("%p retrieve from %d vnodes completed.final NumOfRows:%d,start to build loser tree", pPObj, pState->numOfTotal, pState->numOfRetrievedRows); @@ -1516,7 +1516,7 @@ void tscUpdateVnodeInQueryMsg(SSqlObj *pSql, char *buf) { char * pStart = buf + tsRpcHeadSize; SQueryMeterMsg *pQueryMsg = (SQueryMeterMsg *)pStart; - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { // pSchema == NULL, query on meter + if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { // pColumnModel == NULL, query on meter SMeterMeta *pMeterMeta = pMeterMetaInfo->pMeterMeta; pQueryMsg->vnode = htons(pMeterMeta->vpeerDesc[pSql->index].vnode); } else { // query on metric diff --git a/src/inc/textbuffer.h b/src/inc/textbuffer.h index c7de20bd74..4f52c19cfa 100644 --- a/src/inc/textbuffer.h +++ b/src/inc/textbuffer.h @@ -19,19 +19,16 @@ extern "C" { #endif -#include -#include -#include - -#include "tutil.h" +#include "os.h" #include "taosmsg.h" +#include "tutil.h" -#define DEFAULT_PAGE_SIZE 16384 // 16k larger than the SHistoInfo -#define MIN_BUFFER_SIZE (1 << 19) -#define MAX_TMPFILE_PATH_LENGTH PATH_MAX -#define INITIAL_ALLOCATION_BUFFER_SIZE 64 +#define DEFAULT_PAGE_SIZE 16384 // 16k larger than the SHistoInfo +#define MIN_BUFFER_SIZE (1 << 19) +#define MAX_TMPFILE_PATH_LENGTH PATH_MAX +#define INITIAL_ALLOCATION_BUFFER_SIZE 64 -// forward declare +// forward declaration struct tTagSchema; typedef enum EXT_BUFFER_FLUSH_MODEL { @@ -61,12 +58,12 @@ typedef struct tFlushoutData { tFlushoutInfo *pFlushoutInfo; } tFlushoutData; -typedef struct tFileMeta { +typedef struct SFileInfo { uint32_t nFileSize; // in pages - uint32_t nPageSize; + uint32_t pageSize; uint32_t numOfElemsInFile; tFlushoutData flushoutData; -} tFileMeta; +} SFileInfo; typedef struct tFilePage { uint64_t numOfElems; @@ -78,65 +75,73 @@ typedef struct tFilePagesItem { tFilePage item; } tFilePagesItem; -typedef struct tColModel { - int32_t maxCapacity; - int32_t numOfCols; - int16_t * colOffset; - struct SSchema *pFields; -} tColModel; +typedef struct SSchemaEx { + struct SSchema field; + int16_t offset; +} SSchemaEx; + +typedef struct SColumnModel { + int32_t capacity; + int32_t numOfCols; + int16_t rowSize; + SSchemaEx *pFields; +} SColumnModel; -typedef struct tOrderIdx { - int32_t numOfOrderedCols; +typedef struct SColumnOrderInfo { + int32_t numOfCols; int16_t pData[]; -} tOrderIdx; +} SColumnOrderInfo; typedef struct tOrderDescriptor { - union { - struct tTagSchema *pTagSchema; - tColModel * pSchema; - }; - int32_t tsOrder; // timestamp order type if exists - tOrderIdx orderIdx; + SColumnModel * pColumnModel; + int32_t tsOrder; // timestamp order type if exists + SColumnOrderInfo orderIdx; } tOrderDescriptor; typedef struct tExtMemBuffer { - int32_t nMaxSizeInPages; - + int32_t inMemCapacity; int32_t nElemSize; - int32_t nPageSize; - - int32_t numOfAllElems; + int32_t pageSize; + int32_t numOfTotalElems; int32_t numOfElemsInBuffer; int32_t numOfElemsPerPage; + int16_t numOfInMemPages; - int16_t numOfPagesInMem; tFilePagesItem *pHead; tFilePagesItem *pTail; - tFileMeta fileMeta; - - char dataFilePath[MAX_TMPFILE_PATH_LENGTH]; - FILE *dataFile; - - tColModel *pColModel; + char * path; + FILE * file; + SFileInfo fileMeta; + SColumnModel * pColumnModel; EXT_BUFFER_FLUSH_MODEL flushModel; } tExtMemBuffer; +/** + * + * @param fileNamePattern + * @param dstPath + */ void getTmpfilePath(const char *fileNamePattern, char *dstPath); -/* - * create ext-memory buffer +/** + * + * @param inMemSize + * @param elemSize + * @param pModel + * @return */ -void tExtMemBufferCreate(tExtMemBuffer **pMemBuffer, int32_t numOfBufferSize, int32_t elemSize, - const char *tmpDataFilePath, tColModel *pModel); +tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel); -/* - * destroy ext-memory buffer +/** + * + * @param pMemBuffer + * @return */ -void tExtMemBufferDestroy(tExtMemBuffer **pMemBuffer); +void *destoryExtMemBuffer(tExtMemBuffer *pMemBuffer); -/* +/** * @param pMemBuffer * @param data input data pointer * @param numOfRows number of rows in data @@ -145,12 +150,15 @@ void tExtMemBufferDestroy(tExtMemBuffer **pMemBuffer); */ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRows); -/* - * flush all data into disk and release all in-memory buffer +/** + * + * @param pMemBuffer + * @return */ bool tExtMemBufferFlush(tExtMemBuffer *pMemBuffer); -/* +/** + * * remove all data that has been put into buffer, including in buffer or * ext-buffer(disk) */ @@ -163,11 +171,44 @@ void tExtMemBufferClear(tExtMemBuffer *pMemBuffer); */ bool tExtMemBufferLoadData(tExtMemBuffer *pMemBuffer, tFilePage *pFilePage, int32_t flushIdx, int32_t pageIdx); +/** + * + * @param pMemBuffer + * @return + */ bool tExtMemBufferIsAllDataInMem(tExtMemBuffer *pMemBuffer); -tColModel *tColModelCreate(SSchema *field, int32_t numOfCols, int32_t maxCapacity); +/** + * + * @param fields + * @param numOfCols + * @param blockCapacity + * @return + */ +SColumnModel *createColumnModel(SSchema *fields, int32_t numOfCols, int32_t blockCapacity); + +/** + * + * @param pSrc + * @return + */ +SColumnModel *cloneColumnModel(SColumnModel *pSrc); + +/** + * + * @param pModel + */ +void destroyColumnModel(SColumnModel *pModel); + +/* + * compress data into consecutive block without hole in data + */ +void tColModelCompact(SColumnModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity); + +void tColModelErase(SColumnModel *pModel, tFilePage *inputBuffer, int32_t maxCapacity, int32_t s, int32_t e); +SSchema *getColumnModelSchema(SColumnModel *pColumnModel, int32_t index); -void tColModelDestroy(tColModel *pModel); +int16_t getColumnModelOffset(SColumnModel *pColumnModel, int32_t index); typedef struct SSrcColumnInfo { int32_t functionId; @@ -177,68 +218,18 @@ typedef struct SSrcColumnInfo { /* * display data in column format model for debug purpose only */ -void tColModelDisplay(tColModel *pModel, void *pData, int32_t numOfRows, int32_t maxCount); +void tColModelDisplay(SColumnModel *pModel, void *pData, int32_t numOfRows, int32_t maxCount); -void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32_t maxCount, SSrcColumnInfo *pInfo); +void tColModelDisplayEx(SColumnModel *pModel, void *pData, int32_t numOfRows, int32_t maxCount, SSrcColumnInfo *pInfo); -/* - * compress data into consecutive block without hole in data - */ -void tColModelCompact(tColModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity); - -void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapacity, int32_t s, int32_t e); - -tOrderDescriptor *tOrderDesCreate(int32_t *orderColIdx, int32_t numOfOrderCols, tColModel *pModel, int32_t tsOrderType); +tOrderDescriptor *tOrderDesCreate(const int32_t *orderColIdx, int32_t numOfOrderCols, SColumnModel *pModel, + int32_t tsOrderType); void tOrderDescDestroy(tOrderDescriptor *pDesc); -void tColModelAppend(tColModel *dstModel, tFilePage *dstPage, void *srcData, int32_t srcStartRows, +void tColModelAppend(SColumnModel *dstModel, tFilePage *dstPage, void *srcData, int32_t srcStartRows, int32_t numOfRowsToWrite, int32_t srcCapacity); -/////////////////////////////////////////////////////////////////////////////////////////////////////// -typedef struct MinMaxEntry { - union { - double dMinVal; - int32_t iMinVal; - int64_t i64MinVal; - }; - union { - double dMaxVal; - int32_t iMaxVal; - int64_t i64MaxVal; - }; -} MinMaxEntry; - -typedef struct tMemBucketSegment { - int32_t numOfSlots; - MinMaxEntry * pBoundingEntries; - tExtMemBuffer **pBuffer; -} tMemBucketSegment; - -typedef struct tMemBucket { - int16_t numOfSegs; - int16_t nTotalSlots; - int16_t nSlotsOfSeg; - int16_t dataType; - - int16_t nElemSize; - int32_t numOfElems; - - int32_t nTotalBufferSize; - int32_t maxElemsCapacity; - - int16_t nPageSize; - int16_t numOfTotalPages; - int16_t numOfAvailPages; /* remain available buffer pages */ - - tMemBucketSegment *pSegs; - tOrderDescriptor * pOrderDesc; - - MinMaxEntry nRange; - - void (*HashFunc)(struct tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); -} tMemBucket; - typedef int (*__col_compar_fn_t)(tOrderDescriptor *, int32_t numOfRows, int32_t idx1, int32_t idx2, char *data); void tColDataQSort(tOrderDescriptor *, int32_t numOfRows, int32_t start, int32_t end, char *data, int32_t orderType); @@ -253,19 +244,6 @@ int32_t compare_a(tOrderDescriptor *, int32_t numOfRow1, int32_t s1, char *data1 int32_t compare_d(tOrderDescriptor *, int32_t numOfRow1, int32_t s1, char *data1, int32_t numOfRow2, int32_t s2, char *data2); -tMemBucket* tMemBucketCreate(int32_t totalSlots, int32_t nBufferSize, int16_t nElemSize, - int16_t dataType, tOrderDescriptor *pDesc); - -void tMemBucketDestroy(tMemBucket *pBucket); - -void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows); - -double getPercentile(tMemBucket *pMemBucket, double percent); - -void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); - -void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); - #ifdef __cplusplus } #endif diff --git a/src/inc/thistogram.h b/src/inc/thistogram.h index 7e5b1ccac6..bb058449e8 100644 --- a/src/inc/thistogram.h +++ b/src/inc/thistogram.h @@ -20,8 +20,6 @@ extern "C" { #endif -#include "tskiplist.h" - #define USE_ARRAYLIST #define MAX_HISTOGRAM_BIN 500 diff --git a/src/inc/tinterpolation.h b/src/inc/tinterpolation.h index 22b4ceb4f0..f4b327bcbe 100644 --- a/src/inc/tinterpolation.h +++ b/src/inc/tinterpolation.h @@ -78,7 +78,7 @@ int32_t taosNumOfRemainPoints(SInterpolationInfo *pInterpoInfo); */ int32_t taosDoInterpoResult(SInterpolationInfo *pInterpoInfo, int16_t interpoType, tFilePage **data, int32_t numOfRawDataInRows, int32_t outputRows, int64_t nInterval, - const int64_t *pPrimaryKeyArray, tColModel *pModel, char **srcData, int64_t *defaultVal, + const int64_t *pPrimaryKeyArray, SColumnModel *pModel, char **srcData, int64_t *defaultVal, const int32_t *functionIDs, int32_t bufSize); int taosDoLinearInterpolation(int32_t type, SPoint *point1, SPoint *point2, SPoint *point); diff --git a/src/inc/tpercentile.h b/src/inc/tpercentile.h new file mode 100644 index 0000000000..b9cf50e0bb --- /dev/null +++ b/src/inc/tpercentile.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TPERCENTILE_H +#define TDENGINE_TPERCENTILE_H + +#include "textbuffer.h" + +typedef struct MinMaxEntry { + union { + double dMinVal; + int32_t iMinVal; + int64_t i64MinVal; + }; + union { + double dMaxVal; + int32_t iMaxVal; + int64_t i64MaxVal; + }; +} MinMaxEntry; + +typedef struct tMemBucketSegment { + int32_t numOfSlots; + MinMaxEntry * pBoundingEntries; + tExtMemBuffer **pBuffer; +} tMemBucketSegment; + +typedef struct tMemBucket { + int16_t numOfSegs; + int16_t nTotalSlots; + int16_t nSlotsOfSeg; + int16_t dataType; + + int16_t nElemSize; + int32_t numOfElems; + + int32_t nTotalBufferSize; + int32_t maxElemsCapacity; + + int16_t pageSize; + int16_t numOfTotalPages; + int16_t numOfAvailPages; /* remain available buffer pages */ + + tMemBucketSegment *pSegs; + tOrderDescriptor * pOrderDesc; + + MinMaxEntry nRange; + + void (*HashFunc)(struct tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); +} tMemBucket; + +tMemBucket *tMemBucketCreate(int32_t totalSlots, int32_t nBufferSize, int16_t nElemSize, int16_t dataType, + tOrderDescriptor *pDesc); + +void tMemBucketDestroy(tMemBucket *pBucket); + +void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows); + +double getPercentile(tMemBucket *pMemBucket, double percent); + +void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); + +void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); + +#endif // TDENGINE_TPERCENTILE_H diff --git a/src/system/detail/inc/vnodeTagMgmt.h b/src/system/detail/inc/vnodeTagMgmt.h index 320ef56453..02d9779d15 100644 --- a/src/system/detail/inc/vnodeTagMgmt.h +++ b/src/system/detail/inc/vnodeTagMgmt.h @@ -33,11 +33,13 @@ extern "C" { * 1. we implement a quick sort algorithm, may remove it later. */ -typedef struct tTagSchema { - struct SSchema *pSchema; - int32_t numOfCols; - int32_t colOffset[]; -} tTagSchema; +//typedef struct tTagSchema { +// struct SSchema *pSchema; +// int32_t numOfCols; +// int32_t colOffset[]; +//} tTagSchema; + +typedef SColumnModel tTagSchema; typedef struct tSidSet { int32_t numOfSids; @@ -45,8 +47,8 @@ typedef struct tSidSet { SMeterSidExtInfo **pSids; int32_t * starterPos; // position of each subgroup, generated according to - tTagSchema *pTagSchema; - tOrderIdx orderIdx; + SColumnModel *pColumnModel; + SColumnOrderInfo orderIdx; } tSidSet; typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, void *param); @@ -54,7 +56,7 @@ typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, void *param tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOfMeters, SSchema *pSchema, int32_t numOfTags, SColIndexEx *colList, int32_t numOfOrderCols); -tTagSchema *tCreateTagSchema(SSchema *pSchema, int32_t numOfTagCols); +//tTagSchema *tCreateTagSchema(SSchema *pSchema, int32_t numOfTagCols); int32_t *calculateSubGroup(void **pSids, int32_t numOfMeters, int32_t *numOfSubset, tOrderDescriptor *pOrderDesc, __ext_compar_fn_t compareFn); diff --git a/src/system/detail/src/mgmtDnodeInt.c b/src/system/detail/src/mgmtDnodeInt.c index a1dae7738f..1a6d0c9c09 100644 --- a/src/system/detail/src/mgmtDnodeInt.c +++ b/src/system/detail/src/mgmtDnodeInt.c @@ -210,7 +210,7 @@ char *mgmtBuildCreateMeterIe(STabObj *pMeter, char *pMsg, int vnode) { for (int i = 0; i < pMeter->numOfColumns; ++i) { pCreateMeter->schema[i].type = pSchema[i].type; - /* strcpy(pCreateMeter->schema[i].name, pSchema[i].name); */ + /* strcpy(pCreateMeter->schema[i].name, pColumnModel[i].name); */ pCreateMeter->schema[i].bytes = htons(pSchema[i].bytes); pCreateMeter->schema[i].colId = htons(pSchema[i].colId); } diff --git a/src/system/detail/src/mgmtSupertableQuery.c b/src/system/detail/src/mgmtSupertableQuery.c index 1b7ae66e6d..347b54595e 100644 --- a/src/system/detail/src/mgmtSupertableQuery.c +++ b/src/system/detail/src/mgmtSupertableQuery.c @@ -70,7 +70,7 @@ static int32_t tabObjResultComparator(const void* p1, const void* p2, void* para STabObj* pNode1 = (STabObj*)p1; STabObj* pNode2 = (STabObj*)p2; - for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfOrderedCols; ++i) { + for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfCols; ++i) { int32_t colIdx = pOrderDesc->orderIdx.pData[i]; char* f1 = NULL; @@ -86,7 +86,9 @@ static int32_t tabObjResultComparator(const void* p1, const void* p2, void* para } else { f1 = mgmtMeterGetTag(pNode1, colIdx, NULL); f2 = mgmtMeterGetTag(pNode2, colIdx, &schema); - assert(schema.type == pOrderDesc->pTagSchema->pSchema[colIdx].type); + + SSchema* pSchema = getColumnModelSchema(pOrderDesc->pColumnModel, colIdx); + assert(schema.type == pSchema->type); } int32_t ret = doCompare(f1, f2, schema.type, schema.bytes); @@ -109,7 +111,7 @@ static int32_t tabObjResultComparator(const void* p1, const void* p2, void* para * @param pOrderIndexInfo * @param numOfTags */ -static void mgmtUpdateOrderTagColIndex(SMetricMetaMsg* pMetricMetaMsg, int32_t tableIndex, tOrderIdx* pOrderIndexInfo, +static void mgmtUpdateOrderTagColIndex(SMetricMetaMsg* pMetricMetaMsg, int32_t tableIndex, SColumnOrderInfo* pOrderIndexInfo, int32_t numOfTags) { SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pMetricMetaMsg + pMetricMetaMsg->metaElem[tableIndex]); SColIndexEx* groupColumnList = (SColIndexEx*)((char*)pMetricMetaMsg + pElem->groupbyTagColumnList); @@ -123,7 +125,7 @@ static void mgmtUpdateOrderTagColIndex(SMetricMetaMsg* pMetricMetaMsg, int32_t t } } - pOrderIndexInfo->numOfOrderedCols = numOfGroupbyTags; + pOrderIndexInfo->numOfCols = numOfGroupbyTags; } // todo merge sort function with losertree used @@ -143,14 +145,14 @@ void mgmtReorganizeMetersInMetricMeta(SMetricMetaMsg* pMetricMetaMsg, int32_t ta */ tOrderDescriptor* descriptor = (tOrderDescriptor*)calloc(1, sizeof(tOrderDescriptor) + sizeof(int32_t) * pElem->numOfGroupCols); - descriptor->pTagSchema = tCreateTagSchema(pTagSchema, pMetric->numOfTags); - descriptor->orderIdx.numOfOrderedCols = pElem->numOfGroupCols; + descriptor->pColumnModel = createColumnModel(pTagSchema, pMetric->numOfTags, 1); + descriptor->orderIdx.numOfCols = pElem->numOfGroupCols; int32_t* startPos = NULL; int32_t numOfSubset = 1; mgmtUpdateOrderTagColIndex(pMetricMetaMsg, tableIndex, &descriptor->orderIdx, pMetric->numOfTags); - if (descriptor->orderIdx.numOfOrderedCols > 0) { + if (descriptor->orderIdx.numOfCols > 0) { tQSortEx(pRes->pRes, POINTER_BYTES, 0, pRes->num - 1, descriptor, tabObjResultComparator); startPos = calculateSubGroup(pRes->pRes, pRes->num, &numOfSubset, descriptor, tabObjResultComparator); } else { @@ -166,7 +168,7 @@ void mgmtReorganizeMetersInMetricMeta(SMetricMetaMsg* pMetricMetaMsg, int32_t ta */ qsort(pRes->pRes, (size_t)pRes->num, POINTER_BYTES, tabObjVGIDComparator); - free(descriptor->pTagSchema); + free(descriptor->pColumnModel); free(descriptor); free(startPos); } @@ -291,15 +293,15 @@ static void orderResult(SMetricMetaMsg* pMetricMetaMsg, tQueryResultset* pRes, i STabObj* pMetric = mgmtGetMeter(pElem->meterId); SSchema* pTagSchema = (SSchema*)(pMetric->schema + pMetric->numOfColumns * sizeof(SSchema)); - descriptor->pTagSchema = tCreateTagSchema(pTagSchema, pMetric->numOfTags); + descriptor->pColumnModel = createColumnModel(pTagSchema, pMetric->numOfTags, 1); descriptor->orderIdx.pData[0] = colIndex; - descriptor->orderIdx.numOfOrderedCols = 1; + descriptor->orderIdx.numOfCols = 1; // sort results list tQSortEx(pRes->pRes, POINTER_BYTES, 0, pRes->num - 1, descriptor, tabObjResultComparator); - free(descriptor->pTagSchema); + free(descriptor->pColumnModel); free(descriptor); } diff --git a/src/system/detail/src/vnodeFile.c b/src/system/detail/src/vnodeFile.c index 9c53d47507..8ac2f21211 100644 --- a/src/system/detail/src/vnodeFile.c +++ b/src/system/detail/src/vnodeFile.c @@ -127,7 +127,7 @@ int vnodeCreateHeadDataFile(int vnode, int fileId, char *headName, char *dataNam if (symlink(dDataName, dataName) != 0) return -1; if (symlink(dLastName, lastName) != 0) return -1; - dPrint("vid:%d, fileId:%d, empty header file:%s dataFile:%s lastFile:%s on disk:%s is created ", + dPrint("vid:%d, fileId:%d, empty header file:%s file:%s lastFile:%s on disk:%s is created ", vnode, fileId, headName, dataName, lastName, path); return 0; diff --git a/src/system/detail/src/vnodeQueryImpl.c b/src/system/detail/src/vnodeQueryImpl.c index 81fce50bd6..bf055a8b63 100644 --- a/src/system/detail/src/vnodeQueryImpl.c +++ b/src/system/detail/src/vnodeQueryImpl.c @@ -589,7 +589,7 @@ static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, int64_t StartQue char *primaryColumnData, int32_t size, int32_t functionId, SField *pField, bool hasNull, int32_t blockStatus, void *param, int32_t scanFlag); -void createGroupResultBuf(SQuery *pQuery, SOutputRes *pOneResult, bool isMetricQuery); +void createGroupResultBuf(SQuery *pQuery, SOutputRes *pOneResult, bool isSTableQuery); static void destroyGroupResultBuf(SOutputRes *pOneOutputRes, int32_t nOutputCols); static int32_t binarySearchForBlockImpl(SCompBlock *pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) { @@ -2499,7 +2499,7 @@ static void setCtxTagColumnInfo(SQuery *pQuery, SQueryRuntimeEnv *pRuntimeEnv) { } static int32_t setupQueryRuntimeEnv(SMeterObj *pMeterObj, SQuery *pQuery, SQueryRuntimeEnv *pRuntimeEnv, - SSchema *pTagsSchema, int16_t order, bool isMetricQuery) { + tTagSchema *pTagsSchema, int16_t order, bool isSTableQuery) { dTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pQuery)); pRuntimeEnv->pMeterObj = pMeterObj; @@ -2520,8 +2520,10 @@ static int32_t setupQueryRuntimeEnv(SMeterObj *pMeterObj, SQuery *pQuery, SQuery SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; if (TSDB_COL_IS_TAG(pSqlFuncMsg->colInfo.flag)) { // process tag column info - pCtx->inputType = pTagsSchema[pColIndexEx->colIdx].type; - pCtx->inputBytes = pTagsSchema[pColIndexEx->colIdx].bytes; + SSchema* pSchema = getColumnModelSchema(pTagsSchema, pColIndexEx->colIdx); + + pCtx->inputType = pSchema->type; + pCtx->inputBytes = pSchema->bytes; } else { pCtx->inputType = GET_COLUMN_TYPE(pQuery, i); pCtx->inputBytes = GET_COLUMN_BYTES(pQuery, i); @@ -2567,11 +2569,11 @@ static int32_t setupQueryRuntimeEnv(SMeterObj *pMeterObj, SQuery *pQuery, SQuery // set the intermediate result output buffer SResultInfo *pResInfo = &pRuntimeEnv->resultInfo[i]; - setResultInfoBuf(pResInfo, pQuery->pSelectExpr[i].interResBytes, isMetricQuery); + setResultInfoBuf(pResInfo, pQuery->pSelectExpr[i].interResBytes, isSTableQuery); } // if it is group by normal column, do not set output buffer, the output buffer is pResult - if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isMetricQuery) { + if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isSTableQuery) { resetCtxOutputBuf(pRuntimeEnv); } @@ -4120,7 +4122,7 @@ static void allocMemForInterpo(SMeterQuerySupportObj *pSupporter, SQuery *pQuery } } -static int32_t allocateOutputBufForGroup(SMeterQuerySupportObj *pSupporter, SQuery *pQuery, bool isMetricQuery) { +static int32_t allocateOutputBufForGroup(SMeterQuerySupportObj *pSupporter, SQuery *pQuery, bool isSTableQuery) { int32_t slot = 0; if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (pQuery->nAggTimeInterval > 0 && pQuery->slidingTime > 0)) { @@ -4142,14 +4144,14 @@ static int32_t allocateOutputBufForGroup(SMeterQuerySupportObj *pSupporter, SQue * for single table top/bottom query, the output for group by normal column, the output rows is * equals to the maximum rows, instead of 1. */ - if (!isMetricQuery && isTopBottomQuery(pQuery)) { + if (!isSTableQuery && isTopBottomQuery(pQuery)) { assert(pQuery->numOfOutputCols > 1); SSqlFunctionExpr *pExpr = &pQuery->pSelectExpr[1]; pOneRes->nAlloc = pExpr->pBase.arg[0].argValue.i64; } - createGroupResultBuf(pQuery, pOneRes, isMetricQuery); + createGroupResultBuf(pQuery, pOneRes, isSTableQuery); } return TSDB_CODE_SUCCESS; @@ -4498,12 +4500,12 @@ int32_t vnodeMultiMeterQueryPrepare(SQInfo *pQInfo, SQuery *pQuery, void *param) pQuery->lastKey = pQuery->skey; // create runtime environment - SSchema *pTagSchema = NULL; +// SSchema *pColumnModel = NULL; - tTagSchema *pTagSchemaInfo = pSupporter->pSidSet->pTagSchema; - if (pTagSchemaInfo != NULL) { - pTagSchema = pTagSchemaInfo->pSchema; - } + tTagSchema *pTagSchemaInfo = pSupporter->pSidSet->pColumnModel; +// if (pTagSchemaInfo != NULL) { +// pColumnModel = pTagSchemaInfo->pSchema; +// } // get one queried meter SMeterObj *pMeter = getMeterObj(pSupporter->pMetersHashTable, pSupporter->pSidSet->pSids[0]->sid); @@ -4517,7 +4519,7 @@ int32_t vnodeMultiMeterQueryPrepare(SQInfo *pQInfo, SQuery *pQuery, void *param) tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order); } - int32_t ret = setupQueryRuntimeEnv(pMeter, pQuery, &pSupporter->runtimeEnv, pTagSchema, TSQL_SO_ASC, true); + int32_t ret = setupQueryRuntimeEnv(pMeter, pQuery, &pSupporter->runtimeEnv, pTagSchemaInfo, TSQL_SO_ASC, true); if (ret != TSDB_CODE_SUCCESS) { return ret; } @@ -5065,10 +5067,10 @@ static void doSetTagValueInParam(tTagSchema *pTagSchema, int32_t tagColIdx, SMet tVariant *param) { assert(tagColIdx >= 0); - int32_t *fieldValueOffset = pTagSchema->colOffset; - - void * pStr = (char *)pMeterSidInfo->tags + fieldValueOffset[tagColIdx]; - SSchema *pCol = &pTagSchema->pSchema[tagColIdx]; + int16_t offset = getColumnModelOffset(pTagSchema, tagColIdx); + + void * pStr = (char *)pMeterSidInfo->tags + offset; + SSchema *pCol = getColumnModelSchema(pTagSchema, tagColIdx); tVariantDestroy(param); @@ -5081,7 +5083,7 @@ static void doSetTagValueInParam(tTagSchema *pTagSchema, int32_t tagColIdx, SMet void vnodeSetTagValueInParam(tSidSet *pSidSet, SQueryRuntimeEnv *pRuntimeEnv, SMeterSidExtInfo *pMeterSidInfo) { SQuery * pQuery = pRuntimeEnv->pQuery; - tTagSchema *pTagSchema = pSidSet->pTagSchema; + tTagSchema *pTagSchema = pSidSet->pColumnModel; SSqlFuncExprMsg *pFuncMsg = &pQuery->pSelectExpr[0].pBase; if (pQuery->numOfOutputCols == 1 && pFuncMsg->functionId == TSDB_FUNC_TS_COMP) { @@ -5691,7 +5693,7 @@ void enableFunctForMasterScan(SQueryRuntimeEnv *pRuntimeEnv, int32_t order) { pQuery->order.order = (pQuery->order.order ^ 1); } -void createGroupResultBuf(SQuery *pQuery, SOutputRes *pOneResult, bool isMetricQuery) { +void createGroupResultBuf(SQuery *pQuery, SOutputRes *pOneResult, bool isSTableQuery) { int32_t numOfOutput = pQuery->numOfOutputCols; pOneResult->resultInfo = calloc((size_t)numOfOutput, sizeof(SResultInfo)); @@ -5704,7 +5706,7 @@ void createGroupResultBuf(SQuery *pQuery, SOutputRes *pOneResult, bool isMetricQ pOneResult->result[i] = malloc(sizeof(tFilePage) + size * pOneResult->nAlloc); pOneResult->result[i]->numOfElems = 0; - setResultInfoBuf(pResInfo, (int32_t)size, isMetricQuery); + setResultInfoBuf(pResInfo, (int32_t)size, isSTableQuery); } } @@ -7580,7 +7582,7 @@ int32_t saveResult(SMeterQuerySupportObj *pSupporter, SMeterQueryInfo *pMeterQue sc[1].bytes = 8; UNUSED(sc); - tColModel *cm = tColModelCreate(sc, pQuery->numOfOutputCols, pRuntimeEnv->numOfRowsPerPage); + SColumnModel *cm = createColumnModel(sc, pQuery->numOfOutputCols, pRuntimeEnv->numOfRowsPerPage); // if (outputPage->numOfElems + numOfResult >= pRuntimeEnv->numOfRowsPerPage) tColModelDisplay(cm, outputPage->data, outputPage->numOfElems, pRuntimeEnv->numOfRowsPerPage); @@ -7717,7 +7719,7 @@ static void applyIntervalQueryOnBlock(SMeterQuerySupportObj *pSupporter, SMeterD saveIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo); } else { - doApplyIntervalQueryOnBlock(pSupporter, pMeterQueryInfo, pBlockInfo, pPrimaryKey, pFields, searchFn); + doApplyIntervalQueryOnBlock_rv(pSupporter, pMeterQueryInfo, pBlockInfo, pPrimaryKey, pFields, searchFn); } } @@ -7802,7 +7804,7 @@ static int32_t resultInterpolate(SQInfo *pQInfo, tFilePage **data, tFilePage **p pSchema[i].type = pQuery->pSelectExpr[i].resType; } - tColModel *pModel = tColModelCreate(pSchema, pQuery->numOfOutputCols, pQuery->pointsToRead); + SColumnModel *pModel = createColumnModel(pSchema, pQuery->numOfOutputCols, pQuery->pointsToRead); char * srcData[TSDB_MAX_COLUMNS] = {0}; int32_t functions[TSDB_MAX_COLUMNS] = {0}; @@ -7816,7 +7818,7 @@ static int32_t resultInterpolate(SQInfo *pQInfo, tFilePage **data, tFilePage **p pQuery->nAggTimeInterval, (int64_t *)pDataSrc[0]->data, pModel, srcData, pQuery->defaultVal, functions, pRuntimeEnv->pMeterObj->pointsPerFileBlock); - tColModelDestroy(pModel); + destroyColumnModel(pModel); free(pSchema); return numOfRes; diff --git a/src/system/detail/src/vnodeShell.c b/src/system/detail/src/vnodeShell.c index 69d502c618..6e43e70742 100644 --- a/src/system/detail/src/vnodeShell.c +++ b/src/system/detail/src/vnodeShell.c @@ -442,22 +442,22 @@ void vnodeExecuteRetrieveReq(SSchedMsg *pSched) { if (code == TSDB_CODE_SUCCESS) { size = vnodeGetResultSize((void *)(pRetrieve->qhandle), &numOfRows); + + // buffer size for progress information, including meter count, + // and for each meter, including 'uid' and 'TSKEY'. + int progressSize = 0; + if (pQInfo->pMeterQuerySupporter != NULL) + progressSize = pQInfo->pMeterQuerySupporter->numOfMeters * (sizeof(int64_t) + sizeof(TSKEY)) + sizeof(int32_t); + else if (pQInfo->pObj != NULL) + progressSize = sizeof(int64_t) + sizeof(TSKEY) + sizeof(int32_t); + + pStart = taosBuildRspMsgWithSize(pObj->thandle, TSDB_MSG_TYPE_RETRIEVE_RSP, progressSize + size + 100); + if (pStart == NULL) { + taosSendSimpleRsp(pObj->thandle, TSDB_MSG_TYPE_RETRIEVE_RSP, TSDB_CODE_SERV_OUT_OF_MEMORY); + goto _exit; + } } - // buffer size for progress information, including meter count, - // and for each meter, including 'uid' and 'TSKEY'. - int progressSize = 0; - if (pQInfo->pMeterQuerySupporter != NULL) - progressSize = pQInfo->pMeterQuerySupporter->numOfMeters * (sizeof(int64_t) + sizeof(TSKEY)) + sizeof(int32_t); - else if (pQInfo->pObj != NULL) - progressSize = sizeof(int64_t) + sizeof(TSKEY) + sizeof(int32_t); - - pStart = taosBuildRspMsgWithSize(pObj->thandle, TSDB_MSG_TYPE_RETRIEVE_RSP, progressSize + size + 100); - if (pStart == NULL) { - taosSendSimpleRsp(pObj->thandle, TSDB_MSG_TYPE_RETRIEVE_RSP, TSDB_CODE_SERV_OUT_OF_MEMORY); - goto _exit; - } - pMsg = pStart; *pMsg = code; @@ -485,26 +485,28 @@ void vnodeExecuteRetrieveReq(SSchedMsg *pSched) { // write the progress information of each meter to response // this is required by subscriptions - if (pQInfo->pMeterQuerySupporter != NULL && pQInfo->pMeterQuerySupporter->pMeterSidExtInfo != NULL) { - *((int32_t*)pMsg) = htonl(pQInfo->pMeterQuerySupporter->numOfMeters); - pMsg += sizeof(int32_t); - for (int32_t i = 0; i < pQInfo->pMeterQuerySupporter->numOfMeters; i++) { - *((int64_t*)pMsg) = htobe64(pQInfo->pMeterQuerySupporter->pMeterSidExtInfo[i]->uid); + if (numOfRows > 0 && code == TSDB_CODE_SUCCESS) { + if (pQInfo->pMeterQuerySupporter != NULL && pQInfo->pMeterQuerySupporter->pMeterSidExtInfo != NULL) { + *((int32_t *)pMsg) = htonl(pQInfo->pMeterQuerySupporter->numOfMeters); + pMsg += sizeof(int32_t); + for (int32_t i = 0; i < pQInfo->pMeterQuerySupporter->numOfMeters; i++) { + *((int64_t *)pMsg) = htobe64(pQInfo->pMeterQuerySupporter->pMeterSidExtInfo[i]->uid); + pMsg += sizeof(int64_t); + *((TSKEY *)pMsg) = htobe64(pQInfo->pMeterQuerySupporter->pMeterSidExtInfo[i]->key); + pMsg += sizeof(TSKEY); + } + } else if (pQInfo->pObj != NULL) { + *((int32_t *)pMsg) = htonl(1); + pMsg += sizeof(int32_t); + *((int64_t *)pMsg) = htobe64(pQInfo->pObj->uid); pMsg += sizeof(int64_t); - *((TSKEY*)pMsg) = htobe64(pQInfo->pMeterQuerySupporter->pMeterSidExtInfo[i]->key); + if (pQInfo->pointsRead > 0) { + *((TSKEY *)pMsg) = htobe64(pQInfo->query.lastKey + 1); + } else { + *((TSKEY *)pMsg) = htobe64(pQInfo->query.lastKey); + } pMsg += sizeof(TSKEY); } - } else if (pQInfo->pObj != NULL) { - *((int32_t*)pMsg) = htonl(1); - pMsg += sizeof(int32_t); - *((int64_t*)pMsg) = htobe64(pQInfo->pObj->uid); - pMsg += sizeof(int64_t); - if (pQInfo->pointsRead > 0) { - *((TSKEY*)pMsg) = htobe64(pQInfo->query.lastKey + 1); - } else { - *((TSKEY*)pMsg) = htobe64(pQInfo->query.lastKey); - } - pMsg += sizeof(TSKEY); } msgLen = pMsg - pStart; diff --git a/src/system/detail/src/vnodeTagMgmt.c b/src/system/detail/src/vnodeTagMgmt.c index cea4f75f83..58d1be677a 100644 --- a/src/system/detail/src/vnodeTagMgmt.c +++ b/src/system/detail/src/vnodeTagMgmt.c @@ -24,10 +24,10 @@ #include "tast.h" #include "vnodeTagMgmt.h" -#define GET_TAG_VAL_POINTER(s, col, sc, t) ((t *)(&((s)->tags[(sc)->colOffset[(col)]]))) +#define GET_TAG_VAL_POINTER(s, col, sc, t) ((t *)(&((s)->tags[getColumnModelOffset(sc, col)]))) #define GET_TAG_VAL(s, col, sc, t) (*GET_TAG_VAL_POINTER(s, col, sc, t)) -static void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, tOrderIdx *pOrder); +static void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, SColumnOrderInfo *pOrder); static void tSidSetDisplay(tSidSet *pSets); @@ -65,7 +65,7 @@ int32_t meterSidComparator(const void *p1, const void *p2, void *param) { SMeterSidExtInfo *s1 = (SMeterSidExtInfo *)p1; SMeterSidExtInfo *s2 = (SMeterSidExtInfo *)p2; - for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfOrderedCols; ++i) { + for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfCols; ++i) { int32_t colIdx = pOrderDesc->orderIdx.pData[i]; char * f1 = NULL; @@ -79,9 +79,9 @@ int32_t meterSidComparator(const void *p1, const void *p2, void *param) { type = TSDB_DATA_TYPE_BINARY; bytes = TSDB_METER_NAME_LEN; } else { - f1 = GET_TAG_VAL_POINTER(s1, colIdx, pOrderDesc->pTagSchema, char); - f2 = GET_TAG_VAL_POINTER(s2, colIdx, pOrderDesc->pTagSchema, char); - SSchema *pSchema = &pOrderDesc->pTagSchema->pSchema[colIdx]; + f1 = GET_TAG_VAL_POINTER(s1, colIdx, pOrderDesc->pColumnModel, char); + f2 = GET_TAG_VAL_POINTER(s2, colIdx, pOrderDesc->pColumnModel, char); + SSchema *pSchema = getColumnModelSchema(pOrderDesc->pColumnModel, colIdx); type = pSchema->type; bytes = pSchema->bytes; } @@ -116,9 +116,9 @@ static void median(void **pMeterSids, size_t size, int32_t s1, int32_t s2, tOrde compareFn(pMeterSids[s1], pMeterSids[s2], pOrderDesc) <= 0); #ifdef _DEBUG_VIEW - tTagsPrints(pMeterSids[s1], pOrderDesc->pTagSchema, &pOrderDesc->orderIdx); - tTagsPrints(pMeterSids[midIdx], pOrderDesc->pTagSchema, &pOrderDesc->orderIdx); - tTagsPrints(pMeterSids[s2], pOrderDesc->pTagSchema, &pOrderDesc->orderIdx); + tTagsPrints(pMeterSids[s1], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); + tTagsPrints(pMeterSids[midIdx], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); + tTagsPrints(pMeterSids[s2], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); #endif } @@ -241,24 +241,24 @@ int32_t *calculateSubGroup(void **pSids, int32_t numOfMeters, int32_t *numOfSubs return starterPos; } -tTagSchema *tCreateTagSchema(SSchema *pSchema, int32_t numOfTagCols) { - if (numOfTagCols == 0 || pSchema == NULL) { - return NULL; - } - - tTagSchema *pTagSchema = - (tTagSchema *)calloc(1, sizeof(tTagSchema) + numOfTagCols * sizeof(int32_t) + sizeof(SSchema) * numOfTagCols); - - pTagSchema->colOffset[0] = 0; - pTagSchema->numOfCols = numOfTagCols; - for (int32_t i = 1; i < numOfTagCols; ++i) { - pTagSchema->colOffset[i] = (pTagSchema->colOffset[i - 1] + pSchema[i - 1].bytes); - } - - pTagSchema->pSchema = (SSchema *)&(pTagSchema->colOffset[numOfTagCols]); - memcpy(pTagSchema->pSchema, pSchema, sizeof(SSchema) * numOfTagCols); - return pTagSchema; -} +//tTagSchema *tCreateTagSchema(SSchema *pSchema, int32_t numOfTagCols) { +// if (numOfTagCols == 0 || pSchema == NULL) { +// return NULL; +// } +// +// tTagSchema *pColumnModel = +// (tTagSchema *)calloc(1, sizeof(tTagSchema) + numOfTagCols * sizeof(int32_t) + sizeof(SSchema) * numOfTagCols); +// +// pColumnModel->colOffset[0] = 0; +// pColumnModel->numOfCols = numOfTagCols; +// for (int32_t i = 1; i < numOfTagCols; ++i) { +// pColumnModel->colOffset[i] = (pColumnModel->colOffset[i - 1] + pSchema[i - 1].bytes); +// } +// +// pColumnModel->pSchema = (SSchema *)&(pColumnModel->colOffset[numOfTagCols]); +// memcpy(pColumnModel->pSchema, pSchema, sizeof(SSchema) * numOfTagCols); +// return pColumnModel; +//} tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOfMeters, SSchema *pSchema, int32_t numOfTags, SColIndexEx *colList, int32_t numOfCols) { @@ -269,8 +269,8 @@ tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOf pSidSet->numOfSids = numOfMeters; pSidSet->pSids = pMeterSidExtInfo; - pSidSet->pTagSchema = tCreateTagSchema(pSchema, numOfTags); - pSidSet->orderIdx.numOfOrderedCols = numOfCols; + pSidSet->pColumnModel = createColumnModel(pSchema, numOfTags, 1); + pSidSet->orderIdx.numOfCols = numOfCols; /* * in case of "group by tbname,normal_col", the normal_col is ignored @@ -282,7 +282,7 @@ tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOf } } - pSidSet->orderIdx.numOfOrderedCols = numOfTagCols; + pSidSet->orderIdx.numOfCols = numOfTagCols; pSidSet->starterPos = NULL; return pSidSet; @@ -291,19 +291,19 @@ tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOf void tSidSetDestroy(tSidSet **pSets) { if ((*pSets) != NULL) { tfree((*pSets)->starterPos); - tfree((*pSets)->pTagSchema)(*pSets)->pSids = NULL; + tfree((*pSets)->pColumnModel)(*pSets)->pSids = NULL; tfree(*pSets); } } -void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, tOrderIdx *pOrder) { +void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, SColumnOrderInfo *pOrder) { if (pSchema == NULL) { return; } printf("sid: %-5d tags(", pMeterInfo->sid); - for (int32_t i = 0; i < pOrder->numOfOrderedCols; ++i) { + for (int32_t i = 0; i < pOrder->numOfCols; ++i) { int32_t colIndex = pOrder->pData[i]; // it is the tbname column @@ -312,7 +312,9 @@ void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, tOrderIdx *p continue; } - switch (pSchema->pSchema[colIndex].type) { + SSchema* s = getColumnModelSchema(pSchema, colIndex); + + switch (s->type) { case TSDB_DATA_TYPE_INT: printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int32_t)); break; @@ -336,9 +338,9 @@ void tTagsPrints(SMeterSidExtInfo *pMeterInfo, tTagSchema *pSchema, tOrderIdx *p break; case TSDB_DATA_TYPE_NCHAR: { char *data = GET_TAG_VAL_POINTER(pMeterInfo, colIndex, pSchema, char); + char buffer[512] = {0}; - - taosUcs4ToMbs(data, pSchema->pSchema[colIndex].bytes, buffer); + taosUcs4ToMbs(data, s->bytes, buffer); printf("%s, ", buffer); break; } @@ -370,16 +372,16 @@ static void UNUSED_FUNC tSidSetDisplay(tSidSet *pSets) { printf("the %d-th subgroup: \n", i + 1); for (int32_t j = s; j < e; ++j) { - tTagsPrints(pSets->pSids[j], pSets->pTagSchema, &pSets->orderIdx); + tTagsPrints(pSets->pSids[j], pSets->pColumnModel, &pSets->orderIdx); } } } void tSidSetSort(tSidSet *pSets) { pTrace("number of meters in sort: %d", pSets->numOfSids); - tOrderIdx *pOrderIdx = &pSets->orderIdx; + SColumnOrderInfo *pOrderIdx = &pSets->orderIdx; - if (pOrderIdx->numOfOrderedCols == 0 || pSets->numOfSids <= 1 || pSets->pTagSchema == NULL) { // no group by tags clause + if (pOrderIdx->numOfCols == 0 || pSets->numOfSids <= 1 || pSets->pColumnModel == NULL) { // no group by tags clause pSets->numOfSubSet = 1; pSets->starterPos = (int32_t *)malloc(sizeof(int32_t) * (pSets->numOfSubSet + 1)); pSets->starterPos[0] = 0; @@ -390,11 +392,11 @@ void tSidSetSort(tSidSet *pSets) { #endif } else { tOrderDescriptor *descriptor = - (tOrderDescriptor *)calloc(1, sizeof(tOrderDescriptor) + sizeof(int16_t) * pSets->orderIdx.numOfOrderedCols); - descriptor->pTagSchema = pSets->pTagSchema; + (tOrderDescriptor *)calloc(1, sizeof(tOrderDescriptor) + sizeof(int16_t) * pSets->orderIdx.numOfCols); + descriptor->pColumnModel = pSets->pColumnModel; descriptor->orderIdx = pSets->orderIdx; - memcpy(descriptor->orderIdx.pData, pOrderIdx->pData, sizeof(int16_t) * pSets->orderIdx.numOfOrderedCols); + memcpy(descriptor->orderIdx.pData, pOrderIdx->pData, sizeof(int16_t) * pSets->orderIdx.numOfCols); tQSortEx((void **)pSets->pSids, POINTER_BYTES, 0, pSets->numOfSids - 1, descriptor, meterSidComparator); pSets->starterPos = diff --git a/src/system/detail/src/vnodeUtil.c b/src/system/detail/src/vnodeUtil.c index c9d7ca0cf4..6f25d3a8b1 100644 --- a/src/system/detail/src/vnodeUtil.c +++ b/src/system/detail/src/vnodeUtil.c @@ -247,7 +247,7 @@ SSqlFunctionExpr* vnodeCreateSqlFunctionExpr(SQueryMeterMsg* pQueryMsg, int32_t* SColIndexEx* pColumnIndexExInfo = &pExprs[i].pBase.colInfo; - // tag column schema is kept in pQueryMsg->pTagSchema + // tag column schema is kept in pQueryMsg->pColumnModel if (TSDB_COL_IS_TAG(pColumnIndexExInfo->flag)) { if (pColumnIndexExInfo->colIdx >= pQueryMsg->numOfTagsCols) { *code = TSDB_CODE_INVALID_QUERY_MSG; diff --git a/src/util/src/textbuffer.c b/src/util/src/textbuffer.c index e1c571f4c2..53c2736761 100644 --- a/src/util/src/textbuffer.c +++ b/src/util/src/textbuffer.c @@ -23,7 +23,7 @@ #include "tutil.h" #define COLMODEL_GET_VAL(data, schema, allrow, rowId, colId) \ - (data + (schema)->colOffset[colId] * (allrow) + (rowId) * (schema)->pFields[colId].bytes) + (data + (schema)->pFields[colId].offset * (allrow) + (rowId) * (schema)->pFields[colId].field.bytes) int32_t tmpFileSerialNum = 0; @@ -49,46 +49,50 @@ void getTmpfilePath(const char *fileNamePrefix, char *dstPath) { } /* - * tColModel is deeply copy + * SColumnModel is deeply copy */ -void tExtMemBufferCreate(tExtMemBuffer **pMemBuffer, int32_t nBufferSize, int32_t elemSize, const char *tmpDataFilePath, - tColModel *pModel) { - (*pMemBuffer) = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer)); +tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel) { + tExtMemBuffer* pMemBuffer = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer)); - (*pMemBuffer)->nPageSize = DEFAULT_PAGE_SIZE; - (*pMemBuffer)->nMaxSizeInPages = ALIGN8(nBufferSize) / (*pMemBuffer)->nPageSize; - (*pMemBuffer)->nElemSize = elemSize; + pMemBuffer->pageSize = DEFAULT_PAGE_SIZE; + pMemBuffer->inMemCapacity = ALIGN8(inMemSize) / pMemBuffer->pageSize; + pMemBuffer->nElemSize = elemSize; - (*pMemBuffer)->numOfElemsPerPage = ((*pMemBuffer)->nPageSize - sizeof(tFilePage)) / (*pMemBuffer)->nElemSize; + pMemBuffer->numOfElemsPerPage = (pMemBuffer->pageSize - sizeof(tFilePage)) / pMemBuffer->nElemSize; + + char name[MAX_TMPFILE_PATH_LENGTH] = {0}; + getTmpfilePath("extbuf", name); + + pMemBuffer->path = strdup(name); + pTrace("create tmp file:%s", pMemBuffer->path); + + SFileInfo *pFMeta = &pMemBuffer->fileMeta; - strcpy((*pMemBuffer)->dataFilePath, tmpDataFilePath); - - tFileMeta *pFMeta = &(*pMemBuffer)->fileMeta; - - pFMeta->numOfElemsInFile = 0; - pFMeta->nFileSize = 0; - pFMeta->nPageSize = DEFAULT_PAGE_SIZE; + pFMeta->pageSize = DEFAULT_PAGE_SIZE; pFMeta->flushoutData.nAllocSize = 4; pFMeta->flushoutData.nLength = 0; pFMeta->flushoutData.pFlushoutInfo = (tFlushoutInfo *)calloc(4, sizeof(tFlushoutInfo)); - (*pMemBuffer)->pColModel = tColModelCreate(pModel->pFields, pModel->numOfCols, (*pMemBuffer)->numOfElemsPerPage); + pMemBuffer->pColumnModel = cloneColumnModel(pModel); + pMemBuffer->pColumnModel->capacity = pMemBuffer->numOfElemsPerPage; + + return pMemBuffer; } -void tExtMemBufferDestroy(tExtMemBuffer **pMemBuffer) { - if ((*pMemBuffer) == NULL) { - return; +void* destoryExtMemBuffer(tExtMemBuffer *pMemBuffer) { + if (pMemBuffer == NULL) { + return NULL; } // release flush out info link - tFileMeta *pFileMeta = &(*pMemBuffer)->fileMeta; + SFileInfo *pFileMeta = &pMemBuffer->fileMeta; if (pFileMeta->flushoutData.nAllocSize != 0 && pFileMeta->flushoutData.pFlushoutInfo != NULL) { tfree(pFileMeta->flushoutData.pFlushoutInfo); } // release all in-memory buffer pages - tFilePagesItem *pFilePages = (*pMemBuffer)->pHead; + tFilePagesItem *pFilePages = pMemBuffer->pHead; while (pFilePages != NULL) { tFilePagesItem *pTmp = pFilePages; pFilePages = pFilePages->pNext; @@ -96,24 +100,27 @@ void tExtMemBufferDestroy(tExtMemBuffer **pMemBuffer) { } // close temp file - if ((*pMemBuffer)->dataFile != 0) { - if (fclose((*pMemBuffer)->dataFile) != 0) { - pError("failed to close file:%s, reason:%s", (*pMemBuffer)->dataFilePath, strerror(errno)); + if (pMemBuffer->file != 0) { + if (fclose(pMemBuffer->file) != 0) { + pError("failed to close file:%s, reason:%s", pMemBuffer->path, strerror(errno)); } - pTrace("remove temp file:%s for external buffer", (*pMemBuffer)->dataFilePath); - unlink((*pMemBuffer)->dataFilePath); + pTrace("remove temp file:%s for external buffer", pMemBuffer->path); + unlink(pMemBuffer->path); } - tColModelDestroy((*pMemBuffer)->pColModel); + destroyColumnModel(pMemBuffer->pColumnModel); - tfree(*pMemBuffer); + tfree(pMemBuffer->path); + tfree(pMemBuffer); + + return NULL; } /* * alloc more memory for flush out info entries. */ -static bool allocFlushoutInfoEntries(tFileMeta *pFileMeta) { +static bool allocFlushoutInfoEntries(SFileInfo *pFileMeta) { pFileMeta->flushoutData.nAllocSize = pFileMeta->flushoutData.nAllocSize << 1; tFlushoutInfo *tmp = (tFlushoutInfo *)realloc(pFileMeta->flushoutData.pFlushoutInfo, @@ -127,8 +134,8 @@ static bool allocFlushoutInfoEntries(tFileMeta *pFileMeta) { return true; } -bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { - if (pMemBuffer->numOfPagesInMem > 0 && pMemBuffer->numOfPagesInMem == pMemBuffer->nMaxSizeInPages) { +static bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { + if (pMemBuffer->numOfInMemPages > 0 && pMemBuffer->numOfInMemPages == pMemBuffer->inMemCapacity) { /* * the in-mem buffer is full. * To flush data to disk to accommodate more data @@ -145,7 +152,7 @@ bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { * The memory buffer pages may be recycle in order to avoid unnecessary memory * allocation later. */ - tFilePagesItem *item = (tFilePagesItem *)calloc(1, pMemBuffer->nPageSize + sizeof(tFilePagesItem)); + tFilePagesItem *item = (tFilePagesItem *)calloc(1, pMemBuffer->pageSize + sizeof(tFilePagesItem)); if (item == NULL) { return false; } @@ -161,8 +168,7 @@ bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { pMemBuffer->pHead = item; } - pMemBuffer->numOfPagesInMem += 1; - + pMemBuffer->numOfInMemPages += 1; return true; } @@ -171,7 +177,7 @@ bool tExtMemBufferAlloc(tExtMemBuffer *pMemBuffer) { */ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRows) { if (numOfRows == 0) { - return pMemBuffer->numOfPagesInMem; + return pMemBuffer->numOfInMemPages; } tFilePagesItem *pLast = pMemBuffer->pTail; @@ -185,15 +191,15 @@ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRow if (pLast->item.numOfElems + numOfRows <= pMemBuffer->numOfElemsPerPage) { // enough space for records - tColModelAppend(pMemBuffer->pColModel, &pLast->item, data, 0, numOfRows, numOfRows); + tColModelAppend(pMemBuffer->pColumnModel, &pLast->item, data, 0, numOfRows, numOfRows); pMemBuffer->numOfElemsInBuffer += numOfRows; - pMemBuffer->numOfAllElems += numOfRows; + pMemBuffer->numOfTotalElems += numOfRows; } else { int32_t numOfRemainEntries = pMemBuffer->numOfElemsPerPage - pLast->item.numOfElems; - tColModelAppend(pMemBuffer->pColModel, &pLast->item, data, 0, numOfRemainEntries, numOfRows); + tColModelAppend(pMemBuffer->pColumnModel, &pLast->item, data, 0, numOfRemainEntries, numOfRows); pMemBuffer->numOfElemsInBuffer += numOfRemainEntries; - pMemBuffer->numOfAllElems += numOfRemainEntries; + pMemBuffer->numOfTotalElems += numOfRemainEntries; int32_t hasWritten = numOfRemainEntries; int32_t remain = numOfRows - numOfRemainEntries; @@ -211,10 +217,10 @@ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRow numOfWriteElems = remain; } - pMemBuffer->numOfAllElems += numOfWriteElems; + pMemBuffer->numOfTotalElems += numOfWriteElems; pLast = pMemBuffer->pTail; - tColModelAppend(pMemBuffer->pColModel, &pLast->item, data, hasWritten, numOfWriteElems, numOfRows); + tColModelAppend(pMemBuffer->pColumnModel, &pLast->item, data, hasWritten, numOfWriteElems, numOfRows); remain -= numOfWriteElems; pMemBuffer->numOfElemsInBuffer += numOfWriteElems; @@ -222,11 +228,11 @@ int16_t tExtMemBufferPut(tExtMemBuffer *pMemBuffer, void *data, int32_t numOfRow } } - return pMemBuffer->numOfPagesInMem; + return pMemBuffer->numOfInMemPages; } static bool tExtMemBufferUpdateFlushoutInfo(tExtMemBuffer *pMemBuffer) { - tFileMeta *pFileMeta = &pMemBuffer->fileMeta; + SFileInfo *pFileMeta = &pMemBuffer->fileMeta; if (pMemBuffer->flushModel == MULTIPLE_APPEND_MODEL) { if (pFileMeta->flushoutData.nLength == pFileMeta->flushoutData.nAllocSize && !allocFlushoutInfoEntries(pFileMeta)) { @@ -243,46 +249,47 @@ static bool tExtMemBufferUpdateFlushoutInfo(tExtMemBuffer *pMemBuffer) { } // only the page still in buffer is flushed out to disk - pFlushoutInfo->numOfPages = pMemBuffer->numOfPagesInMem; + pFlushoutInfo->numOfPages = pMemBuffer->numOfInMemPages; pFileMeta->flushoutData.nLength += 1; } else { // always update the first flushout array in single_flush_model pFileMeta->flushoutData.nLength = 1; tFlushoutInfo *pFlushoutInfo = &pFileMeta->flushoutData.pFlushoutInfo[0]; - pFlushoutInfo->numOfPages += pMemBuffer->numOfPagesInMem; + pFlushoutInfo->numOfPages += pMemBuffer->numOfInMemPages; } return true; } static void tExtMemBufferClearFlushoutInfo(tExtMemBuffer *pMemBuffer) { - tFileMeta *pFileMeta = &pMemBuffer->fileMeta; + SFileInfo *pFileMeta = &pMemBuffer->fileMeta; pFileMeta->flushoutData.nLength = 0; memset(pFileMeta->flushoutData.pFlushoutInfo, 0, sizeof(tFlushoutInfo) * pFileMeta->flushoutData.nAllocSize); } bool tExtMemBufferFlush(tExtMemBuffer *pMemBuffer) { - if (pMemBuffer->numOfAllElems == 0) { + if (pMemBuffer->numOfTotalElems == 0) { return true; } - if (pMemBuffer->dataFile == NULL) { - if ((pMemBuffer->dataFile = fopen(pMemBuffer->dataFilePath, "wb+")) == NULL) { + if (pMemBuffer->file == NULL) { + if ((pMemBuffer->file = fopen(pMemBuffer->path, "wb+")) == NULL) { return false; } } + /* all data has been flushed to disk, ignore flush operation */ if (pMemBuffer->numOfElemsInBuffer == 0) { - /* all data has been flushed to disk, ignore flush operation */ return true; } - bool ret = true; + bool ret = true; + tFilePagesItem *first = pMemBuffer->pHead; while (first != NULL) { - size_t retVal = fwrite((char *)&(first->item), pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); + size_t retVal = fwrite((char *)&(first->item), pMemBuffer->pageSize, 1, pMemBuffer->file); if (retVal <= 0) { // failed to write to buffer, may be not enough space ret = false; } @@ -296,12 +303,12 @@ bool tExtMemBufferFlush(tExtMemBuffer *pMemBuffer) { tfree(ptmp); // release all data in memory buffer } - fflush(pMemBuffer->dataFile); // flush to disk + fflush(pMemBuffer->file); // flush to disk tExtMemBufferUpdateFlushoutInfo(pMemBuffer); pMemBuffer->numOfElemsInBuffer = 0; - pMemBuffer->numOfPagesInMem = 0; + pMemBuffer->numOfInMemPages = 0; pMemBuffer->pHead = NULL; pMemBuffer->pTail = NULL; @@ -309,7 +316,9 @@ bool tExtMemBufferFlush(tExtMemBuffer *pMemBuffer) { } void tExtMemBufferClear(tExtMemBuffer *pMemBuffer) { - if (pMemBuffer == NULL || pMemBuffer->numOfAllElems == 0) return; + if (pMemBuffer == NULL || pMemBuffer->numOfTotalElems == 0) { + return; + } /* * release all data in memory buffer @@ -325,15 +334,15 @@ void tExtMemBufferClear(tExtMemBuffer *pMemBuffer) { pMemBuffer->fileMeta.nFileSize = 0; pMemBuffer->numOfElemsInBuffer = 0; - pMemBuffer->numOfPagesInMem = 0; + pMemBuffer->numOfInMemPages = 0; pMemBuffer->pHead = NULL; pMemBuffer->pTail = NULL; tExtMemBufferClearFlushoutInfo(pMemBuffer); - if (pMemBuffer->dataFile != NULL) { - // reset the write pointer to the header - fseek(pMemBuffer->dataFile, 0, SEEK_SET); + // reset the write pointer to the header + if (pMemBuffer->file != NULL) { + fseek(pMemBuffer->file, 0, SEEK_SET); } } @@ -347,8 +356,8 @@ bool tExtMemBufferLoadData(tExtMemBuffer *pMemBuffer, tFilePage *pFilePage, int3 return false; } - size_t ret = fseek(pMemBuffer->dataFile, (pInfo->startPageId + pageIdx) * pMemBuffer->nPageSize, SEEK_SET); - ret = fread(pFilePage, pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); + size_t ret = fseek(pMemBuffer->file, (pInfo->startPageId + pageIdx) * pMemBuffer->pageSize, SEEK_SET); + ret = fread(pFilePage, pMemBuffer->pageSize, 1, pMemBuffer->file); return (ret > 0); } @@ -356,474 +365,11 @@ bool tExtMemBufferLoadData(tExtMemBuffer *pMemBuffer, tFilePage *pFilePage, int3 bool tExtMemBufferIsAllDataInMem(tExtMemBuffer *pMemBuffer) { return (pMemBuffer->fileMeta.nFileSize == 0); } ////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TODO safty check in result -void tBucketBigIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { - int64_t v = *(int64_t *)value; - - if (pBucket->nRange.i64MaxVal == INT64_MIN) { - if (v >= 0) { - *segIdx = ((v >> (64 - 9)) >> 6) + 8; - *slotIdx = (v >> (64 - 9)) & 0x3F; - } else { // v<0 - *segIdx = ((-v) >> (64 - 9)) >> 6; - *slotIdx = ((-v) >> (64 - 9)) & 0x3F; - *segIdx = 7 - (*segIdx); - } - } else { - // todo hash for bigint and float and double - int64_t span = pBucket->nRange.i64MaxVal - pBucket->nRange.i64MinVal; - if (span < pBucket->nTotalSlots) { - int32_t delta = (int32_t)(v - pBucket->nRange.i64MinVal); - *segIdx = delta / pBucket->nSlotsOfSeg; - *slotIdx = delta % pBucket->nSlotsOfSeg; - } else { - double x = (double)span / pBucket->nTotalSlots; - double posx = (v - pBucket->nRange.i64MinVal) / x; - if (v == pBucket->nRange.i64MaxVal) { - posx -= 1; - } - - *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; - *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; - } - } -} - -// todo refactor to more generic -void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { - int32_t v = *(int32_t *)value; - - if (pBucket->nRange.iMaxVal == INT32_MIN) { - /* - * taking negative integer into consideration, - * there is only half of pBucket->segs available for non-negative integer - */ - // int32_t numOfSlots = pBucket->nTotalSlots>>1; - // int32_t bits = bitsOfNumber(numOfSlots)-1; - - if (v >= 0) { - *segIdx = ((v >> (32 - 9)) >> 6) + 8; - *slotIdx = (v >> (32 - 9)) & 0x3F; - } else { // v<0 - *segIdx = ((-v) >> (32 - 9)) >> 6; - *slotIdx = ((-v) >> (32 - 9)) & 0x3F; - *segIdx = 7 - (*segIdx); - } - } else { - // divide a range of [iMinVal, iMaxVal] into 1024 buckets - int32_t span = pBucket->nRange.iMaxVal - pBucket->nRange.iMinVal; - if (span < pBucket->nTotalSlots) { - int32_t delta = v - pBucket->nRange.iMinVal; - *segIdx = delta / pBucket->nSlotsOfSeg; - *slotIdx = delta % pBucket->nSlotsOfSeg; - } else { - double x = (double)span / pBucket->nTotalSlots; - double posx = (v - pBucket->nRange.iMinVal) / x; - if (v == pBucket->nRange.iMaxVal) { - posx -= 1; - } - *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; - *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; - } - } -} - -void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { - //double v = *(double *)value; - double v = GET_DOUBLE_VAL(value); - - if (pBucket->nRange.dMinVal == DBL_MAX) { - /* - * taking negative integer into consideration, - * there is only half of pBucket->segs available for non-negative integer - */ - double x = DBL_MAX / (pBucket->nTotalSlots >> 1); - double posx = (v + DBL_MAX) / x; - *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; - *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; - } else { - // divide a range of [dMinVal, dMaxVal] into 1024 buckets - double span = pBucket->nRange.dMaxVal - pBucket->nRange.dMinVal; - if (span < pBucket->nTotalSlots) { - int32_t delta = (int32_t)(v - pBucket->nRange.dMinVal); - *segIdx = delta / pBucket->nSlotsOfSeg; - *slotIdx = delta % pBucket->nSlotsOfSeg; - } else { - double x = span / pBucket->nTotalSlots; - double posx = (v - pBucket->nRange.dMinVal) / x; - if (v == pBucket->nRange.dMaxVal) { - posx -= 1; - } - *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; - *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; - } - - if (*segIdx < 0 || *segIdx > 16 || *slotIdx < 0 || *slotIdx > 64) { - pError("error in hash process. segment is: %d, slot id is: %d\n", *segIdx, *slotIdx); - } - } -} - -tMemBucket* tMemBucketCreate(int32_t totalSlots, int32_t nBufferSize, int16_t nElemSize, int16_t dataType, tOrderDescriptor *pDesc) { - tMemBucket* pBucket = (tMemBucket *)malloc(sizeof(tMemBucket)); - - pBucket->nTotalSlots = totalSlots; - pBucket->nSlotsOfSeg = 1 << 6; // 64 Segments, 16 slots each seg. - pBucket->dataType = dataType; - pBucket->nElemSize = nElemSize; - pBucket->nPageSize = DEFAULT_PAGE_SIZE; - - pBucket->numOfElems = 0; - pBucket->numOfSegs = pBucket->nTotalSlots / pBucket->nSlotsOfSeg; - - pBucket->nTotalBufferSize = nBufferSize; - - pBucket->maxElemsCapacity = pBucket->nTotalBufferSize / pBucket->nElemSize; - - pBucket->numOfTotalPages = pBucket->nTotalBufferSize / pBucket->nPageSize; - pBucket->numOfAvailPages = pBucket->numOfTotalPages; - - pBucket->pOrderDesc = pDesc; - - switch (pBucket->dataType) { - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: { - pBucket->nRange.iMinVal = INT32_MAX; - pBucket->nRange.iMaxVal = INT32_MIN; - pBucket->HashFunc = tBucketIntHash; - break; - }; - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_FLOAT: { - pBucket->nRange.dMinVal = DBL_MAX; - pBucket->nRange.dMaxVal = -DBL_MAX; - pBucket->HashFunc = tBucketDoubleHash; - break; - }; - case TSDB_DATA_TYPE_BIGINT: { - pBucket->nRange.i64MinVal = INT64_MAX; - pBucket->nRange.i64MaxVal = INT64_MIN; - pBucket->HashFunc = tBucketBigIntHash; - break; - }; - default: { - pError("MemBucket:%p,not support data type %d,failed", *pBucket, pBucket->dataType); - tfree(pBucket); - return NULL; - } - } - - if (pDesc->pSchema->numOfCols != 1 || pDesc->pSchema->colOffset[0] != 0) { - pError("MemBucket:%p,only consecutive data is allowed,invalid numOfCols:%d or offset:%d", - pBucket, pDesc->pSchema->numOfCols, pDesc->pSchema->colOffset[0]); - tfree(pBucket); - return NULL; - } - - if (pDesc->pSchema->pFields[0].type != dataType) { - pError("MemBucket:%p,data type is not consistent,%d in schema, %d in param", pBucket, - pDesc->pSchema->pFields[0].type, dataType); - tfree(pBucket); - return NULL; - } - - if (pBucket->numOfTotalPages < pBucket->nTotalSlots) { - pWarn("MemBucket:%p,total buffer pages %d are not enough for all slots", pBucket, pBucket->numOfTotalPages); - } - - pBucket->pSegs = (tMemBucketSegment *)malloc(pBucket->numOfSegs * sizeof(tMemBucketSegment)); - - for (int32_t i = 0; i < pBucket->numOfSegs; ++i) { - pBucket->pSegs[i].numOfSlots = pBucket->nSlotsOfSeg; - pBucket->pSegs[i].pBuffer = NULL; - pBucket->pSegs[i].pBoundingEntries = NULL; - } - - pTrace("MemBucket:%p,created,buffer size:%d,elem size:%d", pBucket, pBucket->numOfTotalPages * DEFAULT_PAGE_SIZE, - pBucket->nElemSize); - - return pBucket; -} - -void tMemBucketDestroy(tMemBucket *pBucket) { - if (pBucket == NULL) { - return; - } - - if (pBucket->pSegs) { - for (int32_t i = 0; i < pBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &(pBucket->pSegs[i]); - tfree(pSeg->pBoundingEntries); - - if (pSeg->pBuffer == NULL || pSeg->numOfSlots == 0) { - continue; - } - - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - if (pSeg->pBuffer[j] != NULL) { - tExtMemBufferDestroy(&pSeg->pBuffer[j]); - } - } - tfree(pSeg->pBuffer); - } - } - - tfree(pBucket->pSegs); - tfree(pBucket); -} - -/* - * find the slots which accounts for largest proportion of total in-memory buffer - */ -static void tBucketGetMaxMemSlot(tMemBucket *pBucket, int16_t *segIdx, int16_t *slotIdx) { - *segIdx = -1; - *slotIdx = -1; - - int32_t val = 0; - for (int32_t k = 0; k < pBucket->numOfSegs; ++k) { - tMemBucketSegment *pSeg = &pBucket->pSegs[k]; - for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { - if (pSeg->pBuffer == NULL || pSeg->pBuffer[i] == NULL) { - continue; - } - - if (val < pSeg->pBuffer[i]->numOfPagesInMem) { - val = pSeg->pBuffer[i]->numOfPagesInMem; - *segIdx = k; - *slotIdx = i; - } - } - } -} - -static void resetBoundingBox(tMemBucketSegment *pSeg, int32_t type) { - switch (type) { - case TSDB_DATA_TYPE_BIGINT: { - for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { - pSeg->pBoundingEntries[i].i64MaxVal = INT64_MIN; - pSeg->pBoundingEntries[i].i64MinVal = INT64_MAX; - } - break; - }; - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: { - for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { - pSeg->pBoundingEntries[i].iMaxVal = INT32_MIN; - pSeg->pBoundingEntries[i].iMinVal = INT32_MAX; - } - break; - }; - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_FLOAT: { - for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { - pSeg->pBoundingEntries[i].dMaxVal = -DBL_MAX; - pSeg->pBoundingEntries[i].dMinVal = DBL_MAX; - } - break; - } - } -} - -void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) { - switch (dataType) { - case TSDB_DATA_TYPE_INT: { - int32_t val = *(int32_t *)data; - if (r->iMinVal > val) { - r->iMinVal = val; - } - - if (r->iMaxVal < val) { - r->iMaxVal = val; - } - break; - }; - case TSDB_DATA_TYPE_BIGINT: { - int64_t val = *(int64_t *)data; - if (r->i64MinVal > val) { - r->i64MinVal = val; - } - - if (r->i64MaxVal < val) { - r->i64MaxVal = val; - } - break; - }; - case TSDB_DATA_TYPE_SMALLINT: { - int32_t val = *(int16_t *)data; - if (r->iMinVal > val) { - r->iMinVal = val; - } - - if (r->iMaxVal < val) { - r->iMaxVal = val; - } - break; - }; - case TSDB_DATA_TYPE_TINYINT: { - int32_t val = *(int8_t *)data; - if (r->iMinVal > val) { - r->iMinVal = val; - } - - if (r->iMaxVal < val) { - r->iMaxVal = val; - } - - break; - }; - case TSDB_DATA_TYPE_DOUBLE: { - //double val = *(double *)data; - double val = GET_DOUBLE_VAL(data); - if (r->dMinVal > val) { - r->dMinVal = val; - } - - if (r->dMaxVal < val) { - r->dMaxVal = val; - } - break; - }; - case TSDB_DATA_TYPE_FLOAT: { - //double val = *(float *)data; - double val = GET_FLOAT_VAL(data); - - if (r->dMinVal > val) { - r->dMinVal = val; - } - - if (r->dMaxVal < val) { - r->dMaxVal = val; - } - break; - }; - default: { assert(false); } - } -} - -/* - * in memory bucket, we only accept the simple data consecutive put in a row/column - * no column-model in this case. - */ -void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) { - pBucket->numOfElems += numOfRows; - int16_t segIdx = 0, slotIdx = 0; - - for (int32_t i = 0; i < numOfRows; ++i) { - char *d = (char *)data + i * tDataTypeDesc[pBucket->dataType].nSize; - - switch (pBucket->dataType) { - case TSDB_DATA_TYPE_SMALLINT: { - int32_t val = *(int16_t *)d; - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_TINYINT: { - int32_t val = *(int8_t *)d; - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_INT: { - int32_t val = *(int32_t *)d; - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_BIGINT: { - int64_t val = *(int64_t *)d; - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_DOUBLE: { - //double val = *(double *)d; - double val = GET_DOUBLE_VAL(d); - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - case TSDB_DATA_TYPE_FLOAT: { - //double val = *(float *)d; - double val = GET_FLOAT_VAL(d); - (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); - break; - } - } - - tMemBucketSegment *pSeg = &pBucket->pSegs[segIdx]; - if (pSeg->pBoundingEntries == NULL) { - pSeg->pBoundingEntries = (MinMaxEntry *)malloc(sizeof(MinMaxEntry) * pBucket->nSlotsOfSeg); - resetBoundingBox(pSeg, pBucket->dataType); - } - - if (pSeg->pBuffer == NULL) { - pSeg->pBuffer = (tExtMemBuffer **)calloc(pBucket->nSlotsOfSeg, sizeof(void *)); - } - - if (pSeg->pBuffer[slotIdx] == NULL) { - char name[MAX_TMPFILE_PATH_LENGTH] = {0}; - getTmpfilePath("tb_ex_bk_%lld_%lld_%d_%d", name); - - tExtMemBufferCreate(&pSeg->pBuffer[slotIdx], pBucket->numOfTotalPages * pBucket->nPageSize, pBucket->nElemSize, - name, pBucket->pOrderDesc->pSchema); - pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL; - pBucket->pOrderDesc->pSchema->maxCapacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage; - } - - tMemBucketUpdateBoundingBox(&pSeg->pBoundingEntries[slotIdx], d, pBucket->dataType); - - // ensure available memory pages to allocate - int16_t cseg = 0, cslot = 0; - if (pBucket->numOfAvailPages == 0) { - pTrace("MemBucket:%p,max avail size:%d, no avail memory pages,", pBucket, pBucket->numOfTotalPages); - - tBucketGetMaxMemSlot(pBucket, &cseg, &cslot); - if (cseg == -1 || cslot == -1) { - pError("MemBucket:%p,failed to find appropriated avail buffer", pBucket); - return; - } - - if (cseg != segIdx || cslot != slotIdx) { - pBucket->numOfAvailPages += pBucket->pSegs[cseg].pBuffer[cslot]->numOfPagesInMem; - - int32_t avail = pBucket->pSegs[cseg].pBuffer[cslot]->numOfPagesInMem; - UNUSED(avail); - tExtMemBufferFlush(pBucket->pSegs[cseg].pBuffer[cslot]); - - pTrace("MemBucket:%p,seg:%d,slot:%d flushed to disk,new avail pages:%d", pBucket, cseg, cslot, - pBucket->numOfAvailPages); - } else { - pTrace("MemBucket:%p,failed to choose slot to flush to disk seg:%d,slot:%d", - pBucket, cseg, cslot); - } - } - int16_t consumedPgs = pSeg->pBuffer[slotIdx]->numOfPagesInMem; - - int16_t newPgs = tExtMemBufferPut(pSeg->pBuffer[slotIdx], d, 1); - /* - * trigger 1. page re-allocation, to reduce the available pages - * 2. page flushout, to increase the available pages - */ - pBucket->numOfAvailPages += (consumedPgs - newPgs); - } -} - -void releaseBucket(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { - if (segIdx < 0 || segIdx > pMemBucket->numOfSegs || slotIdx < 0) { - return; - } - - tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; - if (slotIdx < 0 || slotIdx >= pSeg->numOfSlots || pSeg->pBuffer[slotIdx] == NULL) { - return; - } - - tExtMemBufferDestroy(&pSeg->pBuffer[slotIdx]); -} - static FORCE_INLINE int32_t primaryKeyComparator(int64_t f1, int64_t f2, int32_t colIdx, int32_t tsOrder) { if (f1 == f2) { return 0; } - + if (colIdx == 0 && tsOrder == TSQL_SO_DESC) { // primary column desc order return (f1 < f2) ? 1 : -1; } else { // asc @@ -831,7 +377,6 @@ static FORCE_INLINE int32_t primaryKeyComparator(int64_t f1, int64_t f2, int32_t } } -// todo refactor static FORCE_INLINE int32_t columnValueAscendingComparator(char *f1, char *f2, int32_t type, int32_t bytes) { switch (type) { case TSDB_DATA_TYPE_INT: { @@ -902,7 +447,7 @@ static FORCE_INLINE int32_t columnValueAscendingComparator(char *f1, char *f2, i return (ret < 0) ? -1 : 1; }; } - + return 0; } @@ -910,14 +455,14 @@ int32_t compare_a(tOrderDescriptor *pDescriptor, int32_t numOfRows1, int32_t s1, int32_t s2, char *data2) { assert(numOfRows1 == numOfRows2); - int32_t cmpCnt = pDescriptor->orderIdx.numOfOrderedCols; + int32_t cmpCnt = pDescriptor->orderIdx.numOfCols; for (int32_t i = 0; i < cmpCnt; ++i) { int32_t colIdx = pDescriptor->orderIdx.pData[i]; - char *f1 = COLMODEL_GET_VAL(data1, pDescriptor->pSchema, numOfRows1, s1, colIdx); - char *f2 = COLMODEL_GET_VAL(data2, pDescriptor->pSchema, numOfRows2, s2, colIdx); + char *f1 = COLMODEL_GET_VAL(data1, pDescriptor->pColumnModel, numOfRows1, s1, colIdx); + char *f2 = COLMODEL_GET_VAL(data2, pDescriptor->pColumnModel, numOfRows2, s2, colIdx); - if (pDescriptor->pSchema->pFields[colIdx].type == TSDB_DATA_TYPE_TIMESTAMP) { + if (pDescriptor->pColumnModel->pFields[colIdx].field.type == TSDB_DATA_TYPE_TIMESTAMP) { int32_t ret = primaryKeyComparator(*(int64_t *)f1, *(int64_t *)f2, colIdx, pDescriptor->tsOrder); if (ret == 0) { continue; @@ -925,7 +470,7 @@ int32_t compare_a(tOrderDescriptor *pDescriptor, int32_t numOfRows1, int32_t s1, return ret; } } else { - SSchema *pSchema = &pDescriptor->pSchema->pFields[colIdx]; + SSchema *pSchema = &pDescriptor->pColumnModel->pFields[colIdx]; int32_t ret = columnValueAscendingComparator(f1, f2, pSchema->type, pSchema->bytes); if (ret == 0) { continue; @@ -942,14 +487,14 @@ int32_t compare_d(tOrderDescriptor *pDescriptor, int32_t numOfRows1, int32_t s1, int32_t s2, char *data2) { assert(numOfRows1 == numOfRows2); - int32_t cmpCnt = pDescriptor->orderIdx.numOfOrderedCols; + int32_t cmpCnt = pDescriptor->orderIdx.numOfCols; for (int32_t i = 0; i < cmpCnt; ++i) { int32_t colIdx = pDescriptor->orderIdx.pData[i]; - char *f1 = COLMODEL_GET_VAL(data1, pDescriptor->pSchema, numOfRows1, s1, colIdx); - char *f2 = COLMODEL_GET_VAL(data2, pDescriptor->pSchema, numOfRows2, s2, colIdx); + char *f1 = COLMODEL_GET_VAL(data1, pDescriptor->pColumnModel, numOfRows1, s1, colIdx); + char *f2 = COLMODEL_GET_VAL(data2, pDescriptor->pColumnModel, numOfRows2, s2, colIdx); - if (pDescriptor->pSchema->pFields[colIdx].type == TSDB_DATA_TYPE_TIMESTAMP) { + if (pDescriptor->pColumnModel->pFields[colIdx].field.type == TSDB_DATA_TYPE_TIMESTAMP) { int32_t ret = primaryKeyComparator(*(int64_t *)f1, *(int64_t *)f2, colIdx, pDescriptor->tsOrder); if (ret == 0) { continue; @@ -957,7 +502,7 @@ int32_t compare_d(tOrderDescriptor *pDescriptor, int32_t numOfRows1, int32_t s1, return ret; } } else { - SSchema *pSchema = &pDescriptor->pSchema->pFields[colIdx]; + SSchema *pSchema = &pDescriptor->pColumnModel->pFields[colIdx]; int32_t ret = columnValueAscendingComparator(f1, f2, pSchema->type, pSchema->bytes); if (ret == 0) { continue; @@ -979,12 +524,13 @@ FORCE_INLINE int32_t compare_sd(tOrderDescriptor *pDescriptor, int32_t numOfRows return compare_d(pDescriptor, numOfRows, idx1, data, numOfRows, idx2, data); } -static void swap(tOrderDescriptor *pDescriptor, int32_t count, int32_t s1, char *data1, int32_t s2) { - for (int32_t i = 0; i < pDescriptor->pSchema->numOfCols; ++i) { - void *first = COLMODEL_GET_VAL(data1, pDescriptor->pSchema, count, s1, i); - void *second = COLMODEL_GET_VAL(data1, pDescriptor->pSchema, count, s2, i); +static void swap(SColumnModel *pColumnModel, int32_t count, int32_t s1, char *data1, int32_t s2) { + for (int32_t i = 0; i < pColumnModel->numOfCols; ++i) { + void *first = COLMODEL_GET_VAL(data1, pColumnModel, count, s1, i); + void *second = COLMODEL_GET_VAL(data1, pColumnModel, count, s2, i); - tsDataSwap(first, second, pDescriptor->pSchema->pFields[i].type, pDescriptor->pSchema->pFields[i].bytes); + SSchema* pSchema = &pColumnModel->pFields[i].field; + tsDataSwap(first, second, pSchema->type, pSchema->bytes); } } @@ -993,7 +539,7 @@ static void tColDataInsertSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, for (int32_t i = start + 1; i <= end; ++i) { for (int32_t j = i; j > start; --j) { if (compareFn(pDescriptor, numOfRows, j, j - 1, data) == -1) { - swap(pDescriptor, numOfRows, j - 1, data, j); + swap(pDescriptor->pColumnModel, numOfRows, j - 1, data, j); } else { break; } @@ -1035,33 +581,33 @@ static void median(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta #if defined(_DEBUG_VIEW) int32_t f = pDescriptor->orderIdx.pData[0]; - char *midx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, midIdx, f); - char *startx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, start, f); - char *endx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, end, f); + char *midx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, midIdx, f); + char *startx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, start, f); + char *endx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, end, f); int32_t colIdx = pDescriptor->orderIdx.pData[0]; - tSortDataPrint(pDescriptor->pSchema->pFields[colIdx].type, "before", startx, midx, endx); + tSortDataPrint(pDescriptor->pColumnModel->pFields[colIdx].type, "before", startx, midx, endx); #endif if (compareFn(pDescriptor, numOfRows, midIdx, start, data) == 1) { - swap(pDescriptor, numOfRows, start, data, midIdx); + swap(pDescriptor->pColumnModel, numOfRows, start, data, midIdx); } if (compareFn(pDescriptor, numOfRows, midIdx, end, data) == 1) { - swap(pDescriptor, numOfRows, midIdx, data, start); - swap(pDescriptor, numOfRows, midIdx, data, end); + swap(pDescriptor->pColumnModel, numOfRows, midIdx, data, start); + swap(pDescriptor->pColumnModel, numOfRows, midIdx, data, end); } else if (compareFn(pDescriptor, numOfRows, start, end, data) == 1) { - swap(pDescriptor, numOfRows, start, data, end); + swap(pDescriptor->pColumnModel, numOfRows, start, data, end); } assert(compareFn(pDescriptor, numOfRows, midIdx, start, data) <= 0 && compareFn(pDescriptor, numOfRows, start, end, data) <= 0); #if defined(_DEBUG_VIEW) - midx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, midIdx, f); - startx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, start, f); - endx = COLMODEL_GET_VAL(data, pDescriptor->pSchema, numOfRows, end, f); - tSortDataPrint(pDescriptor->pSchema->pFields[colIdx].type, "after", startx, midx, endx); + midx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, midIdx, f); + startx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, start, f); + endx = COLMODEL_GET_VAL(data, pDescriptor->pColumnModel, numOfRows, end, f); + tSortDataPrint(pDescriptor->pColumnModel->pFields[colIdx].type, "after", startx, midx, endx); #endif } @@ -1069,9 +615,9 @@ static UNUSED_FUNC void tRowModelDisplay(tOrderDescriptor *pDescriptor, int32_t int32_t colIdx = pDescriptor->orderIdx.pData[0]; for (int32_t i = 0; i < len; ++i) { - char *startx = COLMODEL_GET_VAL(d, pDescriptor->pSchema, numOfRows, i, colIdx); + char *startx = COLMODEL_GET_VAL(d, pDescriptor->pColumnModel, numOfRows, i, colIdx); - switch (pDescriptor->pSchema->pFields[colIdx].type) { + switch (pDescriptor->pColumnModel->pFields[colIdx].field.type) { case TSDB_DATA_TYPE_DOUBLE: printf("%lf\t", *(double *)startx); break; @@ -1139,13 +685,13 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta } if (ret == 0 && e != end_same) { - swap(pDescriptor, numOfRows, e, data, end_same--); + swap(pDescriptor->pColumnModel, numOfRows, e, data, end_same--); } e--; } if (e != s) { - swap(pDescriptor, numOfRows, s, data, e); + swap(pDescriptor->pColumnModel, numOfRows, s, data, e); } #ifdef _DEBUG_VIEW @@ -1159,13 +705,13 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta } if (ret == 0 && s != start_same) { - swap(pDescriptor, numOfRows, s, data, start_same++); + swap(pDescriptor->pColumnModel, numOfRows, s, data, start_same++); } s++; } if (s != e) { - swap(pDescriptor, numOfRows, s, data, e); + swap(pDescriptor->pColumnModel, numOfRows, s, data, e); } #ifdef _DEBUG_VIEW tRowModelDisplay(pDescriptor, numOfRows, data, end - start + 1); @@ -1178,7 +724,7 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta int32_t right = end; while (right > end_same && left <= end_same) { - swap(pDescriptor, numOfRows, left++, data, right--); + swap(pDescriptor->pColumnModel, numOfRows, left++, data, right--); } // (pivotal+1) + steps of number that are identical pivotal @@ -1195,7 +741,7 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta int32_t right = e - 1; while (left < start_same && right >= start_same) { - swap(pDescriptor, numOfRows, left++, data, right--); + swap(pDescriptor->pColumnModel, numOfRows, left++, data, right--); } // (pivotal-1) - steps of number that are identical pivotal @@ -1215,143 +761,50 @@ void tColDataQSort(tOrderDescriptor *pDescriptor, int32_t numOfRows, int32_t sta } } -tExtMemBuffer *releaseBucketsExceptFor(tMemBucket *pMemBucket, int16_t segIdx, int16_t slotIdx) { - tExtMemBuffer *pBuffer = NULL; - - for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - if (i == segIdx && j == slotIdx) { - pBuffer = pSeg->pBuffer[j]; - } else { - if (pSeg->pBuffer && pSeg->pBuffer[j]) { - tExtMemBufferDestroy(&pSeg->pBuffer[j]); - } - } - } - } - - return pBuffer; -} - -static tFilePage *loadIntoBucketFromDisk(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx, - tOrderDescriptor *pDesc) { - // release all data in other slots - tExtMemBuffer *pMemBuffer = pMemBucket->pSegs[segIdx].pBuffer[slotIdx]; - tFilePage * buffer = (tFilePage *)calloc(1, pMemBuffer->nElemSize * pMemBuffer->numOfAllElems + sizeof(tFilePage)); - int32_t oldCapacity = pDesc->pSchema->maxCapacity; - pDesc->pSchema->maxCapacity = pMemBuffer->numOfAllElems; - - if (!tExtMemBufferIsAllDataInMem(pMemBuffer)) { - pMemBuffer = releaseBucketsExceptFor(pMemBucket, segIdx, slotIdx); - assert(pMemBuffer->numOfAllElems > 0); - - // load data in disk to memory - tFilePage *pPage = (tFilePage *)calloc(1, pMemBuffer->nPageSize); - - for (int32_t i = 0; i < pMemBuffer->fileMeta.flushoutData.nLength; ++i) { - tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[i]; - - int32_t ret = fseek(pMemBuffer->dataFile, pFlushInfo->startPageId * pMemBuffer->nPageSize, SEEK_SET); - UNUSED(ret); - - for (uint32_t j = 0; j < pFlushInfo->numOfPages; ++j) { - ret = fread(pPage, pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); - UNUSED(ret); - assert(pPage->numOfElems > 0); - - tColModelAppend(pDesc->pSchema, buffer, pPage->data, 0, pPage->numOfElems, pPage->numOfElems); - printf("id: %d count: %" PRIu64 "\n", j, buffer->numOfElems); - } - } - tfree(pPage); - - assert(buffer->numOfElems == pMemBuffer->fileMeta.numOfElemsInFile); +/* + * deep copy of sschema + */ +SColumnModel *createColumnModel(SSchema *fields, int32_t numOfCols, int32_t blockCapacity) { + SColumnModel *pColumnModel = (SColumnModel *)calloc(1, sizeof(SColumnModel) + numOfCols * sizeof(SSchemaEx)); + if (pColumnModel == NULL) { + return NULL; } - // load data in pMemBuffer to buffer - tFilePagesItem *pListItem = pMemBuffer->pHead; - while (pListItem != NULL) { - tColModelAppend(pDesc->pSchema, buffer, pListItem->item.data, 0, pListItem->item.numOfElems, - pListItem->item.numOfElems); - pListItem = pListItem->pNext; + pColumnModel->pFields = (SSchemaEx *)(&pColumnModel[1]); + + for(int32_t i = 0; i < numOfCols; ++i) { + SSchemaEx* pSchemaEx = &pColumnModel->pFields[i]; + pSchemaEx->field = fields[i]; + pSchemaEx->offset = pColumnModel->rowSize; + + pColumnModel->rowSize += pSchemaEx->field.bytes; } - tColDataQSort(pDesc, buffer->numOfElems, 0, buffer->numOfElems - 1, buffer->data, TSQL_SO_ASC); + pColumnModel->numOfCols = numOfCols; + pColumnModel->capacity = blockCapacity; - pDesc->pSchema->maxCapacity = oldCapacity; // restore value - return buffer; + return pColumnModel; } -double findOnlyResult(tMemBucket *pMemBucket) { - assert(pMemBucket->numOfElems == 1); - - for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - if (pSeg->pBuffer) { - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - tExtMemBuffer *pBuffer = pSeg->pBuffer[j]; - if (pBuffer) { - assert(pBuffer->numOfAllElems == 1); - tFilePage *pPage = &pBuffer->pHead->item; - if (pBuffer->numOfElemsInBuffer == 1) { - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_INT: - return *(int32_t *)pPage->data; - case TSDB_DATA_TYPE_SMALLINT: - return *(int16_t *)pPage->data; - case TSDB_DATA_TYPE_TINYINT: - return *(int8_t *)pPage->data; - case TSDB_DATA_TYPE_BIGINT: - return (double)(*(int64_t *)pPage->data); - case TSDB_DATA_TYPE_DOUBLE: { - double dv = GET_DOUBLE_VAL(pPage->data); - //return *(double *)pPage->data; - return dv; - } - case TSDB_DATA_TYPE_FLOAT: { - float fv = GET_FLOAT_VAL(pPage->data); - //return *(float *)pPage->data; - return fv; - } - default: - return 0; - } - } - } - } - } - } - return 0; -} - -/* - * deep copy of sschema - */ -tColModel *tColModelCreate(SSchema *field, int32_t numOfCols, int32_t maxCapacity) { - tColModel *pSchema = - (tColModel *)calloc(1, sizeof(tColModel) + numOfCols * sizeof(SSchema) + numOfCols * sizeof(int16_t)); - if (pSchema == NULL) { +SColumnModel *cloneColumnModel(SColumnModel *pSrc) { + if (pSrc == NULL) { return NULL; } - - pSchema->pFields = (SSchema *)(&pSchema[1]); - memcpy(pSchema->pFields, field, sizeof(SSchema) * numOfCols); - - pSchema->colOffset = (int16_t *)(&pSchema->pFields[numOfCols]); - pSchema->colOffset[0] = 0; - for (int32_t i = 1; i < numOfCols; ++i) { - pSchema->colOffset[i] = pSchema->colOffset[i - 1] + pSchema->pFields[i - 1].bytes; + + SColumnModel *pColumnModel = (SColumnModel *)calloc(1, sizeof(SColumnModel) + pSrc->numOfCols * sizeof(SSchemaEx)); + if (pColumnModel == NULL) { + return NULL; } - - pSchema->numOfCols = numOfCols; - pSchema->maxCapacity = maxCapacity; - - return pSchema; + + *pColumnModel = *pSrc; + + pColumnModel->pFields = (SSchemaEx*) (&pColumnModel[1]); + memcpy(pColumnModel->pFields, pSrc->pFields, pSrc->numOfCols * sizeof(SSchemaEx)); + + return pColumnModel; } -void tColModelDestroy(tColModel *pModel) { +void destroyColumnModel(SColumnModel *pModel) { if (pModel == NULL) { return; } @@ -1444,12 +897,12 @@ static void printBinaryDataEx(char *data, int32_t len, SSrcColumnInfo *param) { } } -void tColModelDisplay(tColModel *pModel, void *pData, int32_t numOfRows, int32_t totalCapacity) { +void tColModelDisplay(SColumnModel *pModel, void *pData, int32_t numOfRows, int32_t totalCapacity) { for (int32_t i = 0; i < numOfRows; ++i) { for (int32_t j = 0; j < pModel->numOfCols; ++j) { char *val = COLMODEL_GET_VAL((char *)pData, pModel, totalCapacity, i, j); - int type = pModel->pFields[j].type; + int type = pModel->pFields[j].field.type; printf("type:%d ", type); switch (type) { @@ -1461,11 +914,11 @@ void tColModelDisplay(tColModel *pModel, void *pData, int32_t numOfRows, int32_t break; case TSDB_DATA_TYPE_NCHAR: { char buf[4096] = {0}; - taosUcs4ToMbs(val, pModel->pFields[j].bytes, buf); + taosUcs4ToMbs(val, pModel->pFields[j].field.bytes, buf); printf("%s\t", buf); } case TSDB_DATA_TYPE_BINARY: { - printBinaryData(val, pModel->pFields[j].bytes); + printBinaryData(val, pModel->pFields[j].field.bytes); break; } case TSDB_DATA_TYPE_DOUBLE: @@ -1495,15 +948,15 @@ void tColModelDisplay(tColModel *pModel, void *pData, int32_t numOfRows, int32_t printf("\n"); } -void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32_t totalCapacity, +void tColModelDisplayEx(SColumnModel *pModel, void *pData, int32_t numOfRows, int32_t totalCapacity, SSrcColumnInfo *param) { for (int32_t i = 0; i < numOfRows; ++i) { for (int32_t j = 0; j < pModel->numOfCols; ++j) { char *val = COLMODEL_GET_VAL((char *)pData, pModel, totalCapacity, i, j); - printf("type:%d\t", pModel->pFields[j].type); + printf("type:%d\t", pModel->pFields[j].field.type); - switch (pModel->pFields[j].type) { + switch (pModel->pFields[j].field.type) { case TSDB_DATA_TYPE_BIGINT: printf("%" PRId64 "\t", *(int64_t *)val); break; @@ -1512,11 +965,11 @@ void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32 break; case TSDB_DATA_TYPE_NCHAR: { char buf[128] = {0}; - taosUcs4ToMbs(val, pModel->pFields[j].bytes, buf); + taosUcs4ToMbs(val, pModel->pFields[j].field.bytes, buf); printf("%s\t", buf); } case TSDB_DATA_TYPE_BINARY: { - printBinaryDataEx(val, pModel->pFields[j].bytes, ¶m[j]); + printBinaryDataEx(val, pModel->pFields[j].field.bytes, ¶m[j]); break; } case TSDB_DATA_TYPE_DOUBLE: @@ -1547,20 +1000,31 @@ void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32 } //////////////////////////////////////////////////////////////////////////////////////////// -void tColModelCompact(tColModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity) { +void tColModelCompact(SColumnModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity) { if (inputBuffer->numOfElems == 0 || maxElemsCapacity == inputBuffer->numOfElems) { return; } /* start from the second column */ for (int32_t i = 1; i < pModel->numOfCols; ++i) { - memmove(inputBuffer->data + pModel->colOffset[i] * inputBuffer->numOfElems, - inputBuffer->data + pModel->colOffset[i] * maxElemsCapacity, - pModel->pFields[i].bytes * inputBuffer->numOfElems); + SSchemaEx* pSchemaEx = &pModel->pFields[i]; + memmove(inputBuffer->data + pSchemaEx->offset * inputBuffer->numOfElems, + inputBuffer->data + pSchemaEx->offset * maxElemsCapacity, + pSchemaEx->field.bytes * inputBuffer->numOfElems); } } -void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapacity, int32_t s, int32_t e) { +SSchema* getColumnModelSchema(SColumnModel *pColumnModel, int32_t index) { + assert(pColumnModel != NULL && index >= 0 && index < pColumnModel->numOfCols); + return &pColumnModel->pFields[index].field; +} + +int16_t getColumnModelOffset(SColumnModel *pColumnModel, int32_t index) { + assert(pColumnModel != NULL && index >= 0 && index < pColumnModel->numOfCols); + return pColumnModel->pFields[index].offset; +} + +void tColModelErase(SColumnModel *pModel, tFilePage *inputBuffer, int32_t blockCapacity, int32_t s, int32_t e) { if (inputBuffer->numOfElems == 0 || (e - s + 1) <= 0) { return; } @@ -1571,10 +1035,13 @@ void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapaci /* start from the second column */ for (int32_t i = 0; i < pModel->numOfCols; ++i) { - char *startPos = inputBuffer->data + pModel->colOffset[i] * maxCapacity + s * pModel->pFields[i].bytes; - char *endPos = startPos + pModel->pFields[i].bytes * removed; + int16_t offset = getColumnModelOffset(pModel, i); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char *startPos = inputBuffer->data + offset * blockCapacity + s * pSchema->bytes; + char *endPos = startPos + pSchema->bytes * removed; - memmove(startPos, endPos, pModel->pFields[i].bytes * secPart); + memmove(startPos, endPos, pSchema->bytes * secPart); } inputBuffer->numOfElems = remain; @@ -1587,31 +1054,31 @@ void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapaci * data in srcData must has the same schema as data in dstPage, that can be * described by dstModel */ -void tColModelAppend(tColModel *dstModel, tFilePage *dstPage, void *srcData, int32_t start, int32_t numOfRows, +void tColModelAppend(SColumnModel *dstModel, tFilePage *dstPage, void *srcData, int32_t start, int32_t numOfRows, int32_t srcCapacity) { - assert(dstPage->numOfElems + numOfRows <= dstModel->maxCapacity); + assert(dstPage->numOfElems + numOfRows <= dstModel->capacity); for (int32_t col = 0; col < dstModel->numOfCols; ++col) { - char *dst = COLMODEL_GET_VAL(dstPage->data, dstModel, dstModel->maxCapacity, dstPage->numOfElems, col); + char *dst = COLMODEL_GET_VAL(dstPage->data, dstModel, dstModel->capacity, dstPage->numOfElems, col); char *src = COLMODEL_GET_VAL((char *)srcData, dstModel, srcCapacity, start, col); - memmove(dst, src, dstModel->pFields[col].bytes * numOfRows); + memmove(dst, src, dstModel->pFields[col].field.bytes * numOfRows); } dstPage->numOfElems += numOfRows; } -tOrderDescriptor *tOrderDesCreate(int32_t *orderColIdx, int32_t numOfOrderCols, tColModel *pModel, +tOrderDescriptor *tOrderDesCreate(const int32_t *orderColIdx, int32_t numOfOrderCols, SColumnModel *pModel, int32_t tsOrderType) { tOrderDescriptor *desc = (tOrderDescriptor *)calloc(1, sizeof(tOrderDescriptor) + sizeof(int32_t) * numOfOrderCols); if (desc == NULL) { return NULL; } - desc->pSchema = pModel; + desc->pColumnModel = pModel; desc->tsOrder = tsOrderType; - desc->orderIdx.numOfOrderedCols = numOfOrderCols; + desc->orderIdx.numOfCols = numOfOrderCols; for (int32_t i = 0; i < numOfOrderCols; ++i) { desc->orderIdx.pData[i] = orderColIdx[i]; } @@ -1624,390 +1091,6 @@ void tOrderDescDestroy(tOrderDescriptor *pDesc) { return; } - tColModelDestroy(pDesc->pSchema); + destroyColumnModel(pDesc->pColumnModel); tfree(pDesc); } - -//////////////////////////////////////////////////////////////////////////////////////////// -static void findMaxMinValue(tMemBucket *pMemBucket, double *maxVal, double *minVal) { - *minVal = DBL_MAX; - *maxVal = -DBL_MAX; - - for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - if (pSeg->pBuffer == NULL) { - continue; - } - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: { - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - double minv = pSeg->pBoundingEntries[j].iMinVal; - double maxv = pSeg->pBoundingEntries[j].iMaxVal; - - if (*minVal > minv) { - *minVal = minv; - } - if (*maxVal < maxv) { - *maxVal = maxv; - } - } - break; - } - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_FLOAT: { - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - double minv = pSeg->pBoundingEntries[j].dMinVal; - double maxv = pSeg->pBoundingEntries[j].dMaxVal; - - if (*minVal > minv) { - *minVal = minv; - } - if (*maxVal < maxv) { - *maxVal = maxv; - } - } - break; - } - case TSDB_DATA_TYPE_BIGINT: { - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - double minv = (double)pSeg->pBoundingEntries[j].i64MinVal; - double maxv = (double)pSeg->pBoundingEntries[j].i64MaxVal; - - if (*minVal > minv) { - *minVal = minv; - } - if (*maxVal < maxv) { - *maxVal = maxv; - } - } - break; - } - } - } -} - -static MinMaxEntry getMinMaxEntryOfNearestSlotInNextSegment(tMemBucket *pMemBucket, int32_t segIdx) { - int32_t i = segIdx + 1; - while (i < pMemBucket->numOfSegs && pMemBucket->pSegs[i].numOfSlots == 0) ++i; - - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - assert(pMemBucket->numOfSegs > i && pMemBucket->pSegs[i].pBuffer != NULL); - - i = 0; - while (i < pMemBucket->nSlotsOfSeg && pSeg->pBuffer[i] == NULL) ++i; - - assert(i < pMemBucket->nSlotsOfSeg); - return pSeg->pBoundingEntries[i]; -} - -/* - * - * now, we need to find the minimum value of the next slot for - * interpolating the percentile value - * j is the last slot of current segment, we need to get the first - * slot of the next segment. - */ -static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; - - MinMaxEntry next; - if (slotIdx == pSeg->numOfSlots - 1) { // find next segment with data - return getMinMaxEntryOfNearestSlotInNextSegment(pMemBucket, segIdx); - } else { - int32_t j = slotIdx + 1; - for (; j < pMemBucket->nSlotsOfSeg && pMemBucket->pSegs[segIdx].pBuffer[j] == 0; ++j) { - }; - - if (j == pMemBucket->nSlotsOfSeg) { // current slot has no available - // slot,try next segment - return getMinMaxEntryOfNearestSlotInNextSegment(pMemBucket, segIdx); - } else { - next = pSeg->pBoundingEntries[slotIdx + 1]; - assert(pSeg->pBuffer[slotIdx + 1] != NULL); - } - } - - return next; -} - -bool isIdenticalData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx); -char *getFirstElemOfMemBuffer(tMemBucketSegment *pSeg, int32_t slotIdx, tFilePage *pPage); - -double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) { - int32_t num = 0; - - for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; - for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { - if (pSeg->pBuffer == NULL || pSeg->pBuffer[j] == NULL) { - continue; - } - // required value in current slot - if (num < (count + 1) && num + pSeg->pBuffer[j]->numOfAllElems >= (count + 1)) { - if (pSeg->pBuffer[j]->numOfAllElems + num == (count + 1)) { - /* - * now, we need to find the minimum value of the next slot for interpolating the percentile value - * j is the last slot of current segment, we need to get the first slot of the next segment. - * - */ - MinMaxEntry next = getMinMaxEntryOfNextSlotWithData(pMemBucket, i, j); - - double maxOfThisSlot = 0; - double minOfNextSlot = 0; - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: { - maxOfThisSlot = pSeg->pBoundingEntries[j].iMaxVal; - minOfNextSlot = next.iMinVal; - break; - }; - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_DOUBLE: { - maxOfThisSlot = pSeg->pBoundingEntries[j].dMaxVal; - minOfNextSlot = next.dMinVal; - break; - }; - case TSDB_DATA_TYPE_BIGINT: { - maxOfThisSlot = (double)pSeg->pBoundingEntries[j].i64MaxVal; - minOfNextSlot = (double)next.i64MinVal; - break; - } - }; - - assert(minOfNextSlot > maxOfThisSlot); - - double val = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot; - return val; - } - if (pSeg->pBuffer[j]->numOfAllElems <= pMemBucket->maxElemsCapacity) { - // data in buffer and file are merged together to be processed. - tFilePage *buffer = loadIntoBucketFromDisk(pMemBucket, i, j, pMemBucket->pOrderDesc); - int32_t currentIdx = count - num; - - char * thisVal = buffer->data + pMemBucket->nElemSize * currentIdx; - char * nextVal = thisVal + pMemBucket->nElemSize; - double td, nd; - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_SMALLINT: { - td = *(int16_t *)thisVal; - nd = *(int16_t *)nextVal; - break; - } - case TSDB_DATA_TYPE_TINYINT: { - td = *(int8_t *)thisVal; - nd = *(int8_t *)nextVal; - break; - } - case TSDB_DATA_TYPE_INT: { - td = *(int32_t *)thisVal; - nd = *(int32_t *)nextVal; - break; - }; - case TSDB_DATA_TYPE_FLOAT: { - //td = *(float *)thisVal; - //nd = *(float *)nextVal; - td = GET_FLOAT_VAL(thisVal); - nd = GET_FLOAT_VAL(nextVal); - break; - } - case TSDB_DATA_TYPE_DOUBLE: { - //td = *(double *)thisVal; - td = GET_DOUBLE_VAL(thisVal); - //nd = *(double *)nextVal; - nd = GET_DOUBLE_VAL(nextVal); - break; - } - case TSDB_DATA_TYPE_BIGINT: { - td = (double)*(int64_t *)thisVal; - nd = (double)*(int64_t *)nextVal; - break; - } - } - double val = (1 - fraction) * td + fraction * nd; - tfree(buffer); - - return val; - } else { // incur a second round bucket split - if (isIdenticalData(pMemBucket, i, j)) { - tExtMemBuffer *pMemBuffer = pSeg->pBuffer[j]; - - tFilePage *pPage = (tFilePage *)malloc(pMemBuffer->nPageSize); - - char *thisVal = getFirstElemOfMemBuffer(pSeg, j, pPage); - - double finalResult = 0.0; - - switch (pMemBucket->dataType) { - case TSDB_DATA_TYPE_SMALLINT: { - finalResult = *(int16_t *)thisVal; - break; - } - case TSDB_DATA_TYPE_TINYINT: { - finalResult = *(int8_t *)thisVal; - break; - } - case TSDB_DATA_TYPE_INT: { - finalResult = *(int32_t *)thisVal; - break; - }; - case TSDB_DATA_TYPE_FLOAT: { - //finalResult = *(float *)thisVal; - finalResult = GET_FLOAT_VAL(thisVal); - break; - } - case TSDB_DATA_TYPE_DOUBLE: { - //finalResult = *(double *)thisVal; - finalResult = GET_DOUBLE_VAL(thisVal); - break; - } - case TSDB_DATA_TYPE_BIGINT: { - finalResult = (double)(*(int64_t *)thisVal); - break; - } - } - - free(pPage); - return finalResult; - } - - pTrace("MemBucket:%p,start second round bucketing", pMemBucket); - - if (pSeg->pBuffer[j]->numOfElemsInBuffer != 0) { - pTrace("MemBucket:%p,flush %d pages to disk, clear status", pMemBucket, pSeg->pBuffer[j]->numOfPagesInMem); - - pMemBucket->numOfAvailPages += pSeg->pBuffer[j]->numOfPagesInMem; - tExtMemBufferFlush(pSeg->pBuffer[j]); - } - - tExtMemBuffer *pMemBuffer = pSeg->pBuffer[j]; - pSeg->pBuffer[j] = NULL; - - // release all - for (int32_t tt = 0; tt < pMemBucket->numOfSegs; ++tt) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[tt]; - for (int32_t ttx = 0; ttx < pSeg->numOfSlots; ++ttx) { - if (pSeg->pBuffer && pSeg->pBuffer[ttx]) { - tExtMemBufferDestroy(&pSeg->pBuffer[ttx]); - } - } - } - - pMemBucket->nRange.i64MaxVal = pSeg->pBoundingEntries->i64MaxVal; - pMemBucket->nRange.i64MinVal = pSeg->pBoundingEntries->i64MinVal; - pMemBucket->numOfElems = 0; - - for (int32_t tt = 0; tt < pMemBucket->numOfSegs; ++tt) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[tt]; - for (int32_t ttx = 0; ttx < pSeg->numOfSlots; ++ttx) { - if (pSeg->pBoundingEntries) { - resetBoundingBox(pSeg, pMemBucket->dataType); - } - } - } - - tFilePage *pPage = (tFilePage *)malloc(pMemBuffer->nPageSize); - - tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0]; - assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize); - - int32_t ret = fseek(pMemBuffer->dataFile, pFlushInfo->startPageId * pMemBuffer->nPageSize, SEEK_SET); - UNUSED(ret); - - for (uint32_t jx = 0; jx < pFlushInfo->numOfPages; ++jx) { - ret = fread(pPage, pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); - UNUSED(ret); - tMemBucketPut(pMemBucket, pPage->data, pPage->numOfElems); - } - - fclose(pMemBuffer->dataFile); - if (unlink(pMemBuffer->dataFilePath) != 0) { - pError("MemBucket:%p,remove tmp file %s failed", pMemBucket, pMemBuffer->dataFilePath); - } - tfree(pMemBuffer); - tfree(pPage); - - return getPercentileImpl(pMemBucket, count - num, fraction); - } - } else { - num += pSeg->pBuffer[j]->numOfAllElems; - } - } - } - return 0; -} - -double getPercentile(tMemBucket *pMemBucket, double percent) { - if (pMemBucket->numOfElems == 0) { - return 0.0; - } - - if (pMemBucket->numOfElems == 1) { // return the only element - return findOnlyResult(pMemBucket); - } - - percent = fabs(percent); - - // validate the parameters - if (fabs(percent - 100.0) < DBL_EPSILON || (percent < DBL_EPSILON)) { - double minx = 0, maxx = 0; - /* - * find the min/max value, no need to scan all data in bucket - */ - findMaxMinValue(pMemBucket, &maxx, &minx); - - return fabs(percent - 100) < DBL_EPSILON ? maxx : minx; - } - - double percentVal = (percent * (pMemBucket->numOfElems - 1)) / ((double)100.0); - int32_t orderIdx = (int32_t)percentVal; - - // do put data by using buckets - return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx); -} - -/* - * check if data in one slot are all identical - * only need to compare with the bounding box - */ -bool isIdenticalData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { - tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; - - if (pMemBucket->dataType == TSDB_DATA_TYPE_INT || pMemBucket->dataType == TSDB_DATA_TYPE_BIGINT || - pMemBucket->dataType == TSDB_DATA_TYPE_SMALLINT || pMemBucket->dataType == TSDB_DATA_TYPE_TINYINT) { - return pSeg->pBoundingEntries[slotIdx].i64MinVal == pSeg->pBoundingEntries[slotIdx].i64MaxVal; - } - - if (pMemBucket->dataType == TSDB_DATA_TYPE_FLOAT || pMemBucket->dataType == TSDB_DATA_TYPE_DOUBLE) { - return fabs(pSeg->pBoundingEntries[slotIdx].dMaxVal - pSeg->pBoundingEntries[slotIdx].dMinVal) < DBL_EPSILON; - } - - return false; -} - -/* - * get the first element of one slot into memory. - * if no data of current slot in memory, load it from disk - */ -char *getFirstElemOfMemBuffer(tMemBucketSegment *pSeg, int32_t slotIdx, tFilePage *pPage) { - tExtMemBuffer *pMemBuffer = pSeg->pBuffer[slotIdx]; - char * thisVal = NULL; - - if (pSeg->pBuffer[slotIdx]->numOfElemsInBuffer != 0) { - thisVal = pSeg->pBuffer[slotIdx]->pHead->item.data; - } else { - /* - * no data in memory, load one page into memory - */ - tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0]; - assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize); - - fseek(pMemBuffer->dataFile, pFlushInfo->startPageId * pMemBuffer->nPageSize, SEEK_SET); - size_t ret = fread(pPage, pMemBuffer->nPageSize, 1, pMemBuffer->dataFile); - UNUSED(ret); - thisVal = pPage->data; - } - return thisVal; -} diff --git a/src/util/src/tinterpolation.c b/src/util/src/tinterpolation.c index 5df07a5c43..82cc52cd42 100644 --- a/src/util/src/tinterpolation.c +++ b/src/util/src/tinterpolation.c @@ -205,16 +205,18 @@ static char* getPos(char* data, int32_t bytes, int32_t order, int32_t capacity, // } } -static void setTagsValueInInterpolation(tFilePage** data, char** pTags, tColModel* pModel, int32_t order, int32_t start, +static void setTagsValueInInterpolation(tFilePage** data, char** pTags, SColumnModel* pModel, int32_t order, int32_t start, int32_t capacity, int32_t num) { for (int32_t j = 0, i = start; i < pModel->numOfCols; ++i, ++j) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, order, capacity, num); - assignVal(val1, pTags[j], pModel->pFields[i].bytes, pModel->pFields[i].type); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, order, capacity, num); + assignVal(val1, pTags[j], pSchema->bytes, pSchema->type); } } static void doInterpoResultImpl(SInterpolationInfo* pInterpoInfo, int16_t interpoType, tFilePage** data, - tColModel* pModel, int32_t* num, char** srcData, int64_t nInterval, int64_t* defaultVal, + SColumnModel* pModel, int32_t* num, char** srcData, int64_t nInterval, int64_t* defaultVal, int64_t currentTimestamp, int32_t capacity, int32_t numOfTags, char** pTags, bool outOfBound) { char** prevValues = &pInterpoInfo->prevValues; @@ -234,18 +236,23 @@ static void doInterpoResultImpl(SInterpolationInfo* pInterpoInfo, int16_t interp char* pInterpolationData = INTERPOL_IS_ASC_INTERPOL(pInterpoInfo) ? *prevValues : *nextValues; if (pInterpolationData != NULL) { for (int32_t i = 1; i < numOfValCols; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); + SSchema* pSchema = getColumnModelSchema(pModel, i); + int16_t offset = getColumnModelOffset(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); - if (isNull(pInterpolationData + pModel->colOffset[i], pModel->pFields[i].type)) { - setNull(val1, pModel->pFields[i].type, pModel->pFields[i].bytes); + if (isNull(pInterpolationData + offset, pSchema->type)) { + setNull(val1, pSchema->type, pSchema->bytes); } else { - assignVal(val1, pInterpolationData + pModel->colOffset[i], pModel->pFields[i].bytes, pModel->pFields[i].type); + assignVal(val1, pInterpolationData + offset, pSchema->bytes, pSchema->type); } } } else { /* no prev value yet, set the value for null */ for (int32_t i = 1; i < numOfValCols; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); - setNull(val1, pModel->pFields[i].type, pModel->pFields[i].bytes); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); + setNull(val1, pSchema->type, pSchema->bytes); } } @@ -254,34 +261,41 @@ static void doInterpoResultImpl(SInterpolationInfo* pInterpoInfo, int16_t interp // TODO : linear interpolation supports NULL value if (*prevValues != NULL && !outOfBound) { for (int32_t i = 1; i < numOfValCols; ++i) { - int32_t type = pModel->pFields[i].type; - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); + SSchema* pSchema = getColumnModelSchema(pModel, i); + int16_t offset = getColumnModelOffset(pModel, i); + + int16_t type = pSchema->type; + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR || type == TSDB_DATA_TYPE_BOOL) { - setNull(val1, pModel->pFields[i].type, pModel->pFields[i].bytes); + setNull(val1, type, pSchema->bytes); continue; } - point1 = (SPoint){.key = *(TSKEY*)(*prevValues), .val = *prevValues + pModel->colOffset[i]}; - point2 = (SPoint){.key = currentTimestamp, .val = srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes}; + point1 = (SPoint){.key = *(TSKEY*)(*prevValues), .val = *prevValues + offset}; + point2 = (SPoint){.key = currentTimestamp, .val = srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes}; point = (SPoint){.key = pInterpoInfo->startTimestamp, .val = val1}; - taosDoLinearInterpolation(pModel->pFields[i].type, &point1, &point2, &point); + taosDoLinearInterpolation(type, &point1, &point2, &point); } setTagsValueInInterpolation(data, pTags, pModel, pInterpoInfo->order, numOfValCols, capacity, *num); } else { for (int32_t i = 1; i < numOfValCols; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); - setNull(val1, pModel->pFields[i].type, pModel->pFields[i].bytes); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); + setNull(val1, pSchema->type, pSchema->bytes); } setTagsValueInInterpolation(data, pTags, pModel, pInterpoInfo->order, numOfValCols, capacity, *num); } } else { /* default value interpolation */ for (int32_t i = 1; i < numOfValCols; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, capacity, *num); - assignVal(val1, (char*)&defaultVal[i], pModel->pFields[i].bytes, pModel->pFields[i].type); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, capacity, *num); + assignVal(val1, (char*)&defaultVal[i], pSchema->bytes, pSchema->type); } setTagsValueInInterpolation(data, pTags, pModel, pInterpoInfo->order, numOfValCols, capacity, *num); @@ -295,7 +309,7 @@ static void doInterpoResultImpl(SInterpolationInfo* pInterpoInfo, int16_t interp int32_t taosDoInterpoResult(SInterpolationInfo* pInterpoInfo, int16_t interpoType, tFilePage** data, int32_t numOfRawDataInRows, int32_t outputRows, int64_t nInterval, - const int64_t* pPrimaryKeyArray, tColModel* pModel, char** srcData, int64_t* defaultVal, + const int64_t* pPrimaryKeyArray, SColumnModel* pModel, char** srcData, int64_t* defaultVal, const int32_t* functionIDs, int32_t bufSize) { int32_t num = 0; pInterpoInfo->numOfCurrentInterpo = 0; @@ -328,17 +342,21 @@ int32_t taosDoInterpoResult(SInterpolationInfo* pInterpoInfo, int16_t interpoTyp (pInterpoInfo->startTimestamp > currentTimestamp && !INTERPOL_IS_ASC_INTERPOL(pInterpoInfo))) { /* set the next value for interpolation */ if (*nextValues == NULL) { - *nextValues = - calloc(1, pModel->colOffset[pModel->numOfCols - 1] + pModel->pFields[pModel->numOfCols - 1].bytes); + *nextValues = calloc(1, pModel->rowSize); for (int i = 1; i < pModel->numOfCols; i++) { - setNull(*nextValues + pModel->colOffset[i], pModel->pFields[i].type, pModel->pFields[i].bytes); + int16_t offset = getColumnModelOffset(pModel, i); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + setNull(*nextValues + offset, pSchema->type, pSchema->bytes); } } int32_t offset = pInterpoInfo->rowIdx; for (int32_t tlen = 0, i = 0; i < pModel->numOfCols - numOfTags; ++i) { - memcpy(*nextValues + tlen, srcData[i] + offset * pModel->pFields[i].bytes, pModel->pFields[i].bytes); - tlen += pModel->pFields[i].bytes; + SSchema* pSchema = getColumnModelSchema(pModel, i); + + memcpy(*nextValues + tlen, srcData[i] + offset * pSchema->bytes, pSchema->bytes); + tlen += pSchema->bytes; } } @@ -358,37 +376,41 @@ int32_t taosDoInterpoResult(SInterpolationInfo* pInterpoInfo, int16_t interpoTyp if (pInterpoInfo->startTimestamp == currentTimestamp) { if (*prevValues == NULL) { - *prevValues = - calloc(1, pModel->colOffset[pModel->numOfCols - 1] + pModel->pFields[pModel->numOfCols - 1].bytes); + *prevValues = calloc(1, pModel->rowSize); for (int i = 1; i < pModel->numOfCols; i++) { - setNull(*prevValues + pModel->colOffset[i], pModel->pFields[i].type, pModel->pFields[i].bytes); + int16_t offset = getColumnModelOffset(pModel, i); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + setNull(*prevValues + offset, pSchema->type, pSchema->bytes); } } // assign rows to dst buffer int32_t i = 0; for (int32_t tlen = 0; i < pModel->numOfCols - numOfTags; ++i) { - char* val1 = getPos(data[i]->data, pModel->pFields[i].bytes, pInterpoInfo->order, bufSize, num); + int16_t offset = getColumnModelOffset(pModel, i); + SSchema* pSchema = getColumnModelSchema(pModel, i); + + char* val1 = getPos(data[i]->data, pSchema->bytes, pInterpoInfo->order, bufSize, num); if (i == 0 || (functionIDs[i] != TSDB_FUNC_COUNT && - !isNull(srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes, pModel->pFields[i].type)) || + !isNull(srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes, pSchema->type)) || (functionIDs[i] == TSDB_FUNC_COUNT && - *(int64_t*)(srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes) != 0)) { - assignVal(val1, srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes, pModel->pFields[i].bytes, - pModel->pFields[i].type); - memcpy(*prevValues + tlen, srcData[i] + pInterpoInfo->rowIdx * pModel->pFields[i].bytes, - pModel->pFields[i].bytes); + *(int64_t*)(srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes) != 0)) { + + assignVal(val1, srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes, pSchema->bytes, pSchema->type); + memcpy(*prevValues + tlen, srcData[i] + pInterpoInfo->rowIdx * pSchema->bytes, pSchema->bytes); } else { // i > 0 and isNULL, do interpolation if (interpoType == TSDB_INTERPO_PREV) { - assignVal(val1, *prevValues + pModel->colOffset[i], pModel->pFields[i].bytes, pModel->pFields[i].type); + assignVal(val1, *prevValues + offset, pSchema->bytes, pSchema->type); } else if (interpoType == TSDB_INTERPO_LINEAR) { // TODO: } else { - assignVal(val1, (char*)&defaultVal[i], pModel->pFields[i].bytes, pModel->pFields[i].type); + assignVal(val1, (char*)&defaultVal[i], pSchema->bytes, pSchema->type); } } - tlen += pModel->pFields[i].bytes; + tlen += pSchema->bytes; } /* set the tag value for final result */ diff --git a/src/util/src/tpercentile.c b/src/util/src/tpercentile.c new file mode 100644 index 0000000000..b3c09033b4 --- /dev/null +++ b/src/util/src/tpercentile.c @@ -0,0 +1,976 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" + +#include "taosmsg.h" +#include "tsdb.h" +#include "tlog.h" +#include "ttypes.h" +#include "tpercentile.h" + +tExtMemBuffer *releaseBucketsExceptFor(tMemBucket *pMemBucket, int16_t segIdx, int16_t slotIdx) { + tExtMemBuffer *pBuffer = NULL; + + for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + if (i == segIdx && j == slotIdx) { + pBuffer = pSeg->pBuffer[j]; + } else { + if (pSeg->pBuffer && pSeg->pBuffer[j]) { + pSeg->pBuffer[j] = destoryExtMemBuffer(pSeg->pBuffer[j]); + } + } + } + } + + return pBuffer; +} + +static tFilePage *loadIntoBucketFromDisk(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx, + tOrderDescriptor *pDesc) { + // release all data in other slots + tExtMemBuffer *pMemBuffer = pMemBucket->pSegs[segIdx].pBuffer[slotIdx]; + tFilePage * buffer = (tFilePage *)calloc(1, pMemBuffer->nElemSize * pMemBuffer->numOfTotalElems + sizeof(tFilePage)); + int32_t oldCapacity = pDesc->pColumnModel->capacity; + pDesc->pColumnModel->capacity = pMemBuffer->numOfTotalElems; + + if (!tExtMemBufferIsAllDataInMem(pMemBuffer)) { + pMemBuffer = releaseBucketsExceptFor(pMemBucket, segIdx, slotIdx); + assert(pMemBuffer->numOfTotalElems > 0); + + // load data in disk to memory + tFilePage *pPage = (tFilePage *)calloc(1, pMemBuffer->pageSize); + + for (int32_t i = 0; i < pMemBuffer->fileMeta.flushoutData.nLength; ++i) { + tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[i]; + + int32_t ret = fseek(pMemBuffer->file, pFlushInfo->startPageId * pMemBuffer->pageSize, SEEK_SET); + UNUSED(ret); + + for (uint32_t j = 0; j < pFlushInfo->numOfPages; ++j) { + ret = fread(pPage, pMemBuffer->pageSize, 1, pMemBuffer->file); + UNUSED(ret); + assert(pPage->numOfElems > 0); + + tColModelAppend(pDesc->pColumnModel, buffer, pPage->data, 0, pPage->numOfElems, pPage->numOfElems); + printf("id: %d count: %" PRIu64 "\n", j, buffer->numOfElems); + } + } + tfree(pPage); + + assert(buffer->numOfElems == pMemBuffer->fileMeta.numOfElemsInFile); + } + + // load data in pMemBuffer to buffer + tFilePagesItem *pListItem = pMemBuffer->pHead; + while (pListItem != NULL) { + tColModelAppend(pDesc->pColumnModel, buffer, pListItem->item.data, 0, pListItem->item.numOfElems, + pListItem->item.numOfElems); + pListItem = pListItem->pNext; + } + + tColDataQSort(pDesc, buffer->numOfElems, 0, buffer->numOfElems - 1, buffer->data, TSQL_SO_ASC); + + pDesc->pColumnModel->capacity = oldCapacity; // restore value + return buffer; +} + +double findOnlyResult(tMemBucket *pMemBucket) { + assert(pMemBucket->numOfElems == 1); + + for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + if (pSeg->pBuffer) { + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + tExtMemBuffer *pBuffer = pSeg->pBuffer[j]; + if (pBuffer) { + assert(pBuffer->numOfTotalElems == 1); + tFilePage *pPage = &pBuffer->pHead->item; + if (pBuffer->numOfElemsInBuffer == 1) { + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_INT: + return *(int32_t *)pPage->data; + case TSDB_DATA_TYPE_SMALLINT: + return *(int16_t *)pPage->data; + case TSDB_DATA_TYPE_TINYINT: + return *(int8_t *)pPage->data; + case TSDB_DATA_TYPE_BIGINT: + return (double)(*(int64_t *)pPage->data); + case TSDB_DATA_TYPE_DOUBLE: { + double dv = GET_DOUBLE_VAL(pPage->data); + //return *(double *)pPage->data; + return dv; + } + case TSDB_DATA_TYPE_FLOAT: { + float fv = GET_FLOAT_VAL(pPage->data); + //return *(float *)pPage->data; + return fv; + } + default: + return 0; + } + } + } + } + } + } + return 0; +} + +void tBucketBigIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { + int64_t v = *(int64_t *)value; + + if (pBucket->nRange.i64MaxVal == INT64_MIN) { + if (v >= 0) { + *segIdx = ((v >> (64 - 9)) >> 6) + 8; + *slotIdx = (v >> (64 - 9)) & 0x3F; + } else { // v<0 + *segIdx = ((-v) >> (64 - 9)) >> 6; + *slotIdx = ((-v) >> (64 - 9)) & 0x3F; + *segIdx = 7 - (*segIdx); + } + } else { + // todo hash for bigint and float and double + int64_t span = pBucket->nRange.i64MaxVal - pBucket->nRange.i64MinVal; + if (span < pBucket->nTotalSlots) { + int32_t delta = (int32_t)(v - pBucket->nRange.i64MinVal); + *segIdx = delta / pBucket->nSlotsOfSeg; + *slotIdx = delta % pBucket->nSlotsOfSeg; + } else { + double x = (double)span / pBucket->nTotalSlots; + double posx = (v - pBucket->nRange.i64MinVal) / x; + if (v == pBucket->nRange.i64MaxVal) { + posx -= 1; + } + + *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; + *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; + } + } +} + +// todo refactor to more generic +void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { + int32_t v = *(int32_t *)value; + + if (pBucket->nRange.iMaxVal == INT32_MIN) { + /* + * taking negative integer into consideration, + * there is only half of pBucket->segs available for non-negative integer + */ + // int32_t numOfSlots = pBucket->nTotalSlots>>1; + // int32_t bits = bitsOfNumber(numOfSlots)-1; + + if (v >= 0) { + *segIdx = ((v >> (32 - 9)) >> 6) + 8; + *slotIdx = (v >> (32 - 9)) & 0x3F; + } else { // v<0 + *segIdx = ((-v) >> (32 - 9)) >> 6; + *slotIdx = ((-v) >> (32 - 9)) & 0x3F; + *segIdx = 7 - (*segIdx); + } + } else { + // divide a range of [iMinVal, iMaxVal] into 1024 buckets + int32_t span = pBucket->nRange.iMaxVal - pBucket->nRange.iMinVal; + if (span < pBucket->nTotalSlots) { + int32_t delta = v - pBucket->nRange.iMinVal; + *segIdx = delta / pBucket->nSlotsOfSeg; + *slotIdx = delta % pBucket->nSlotsOfSeg; + } else { + double x = (double)span / pBucket->nTotalSlots; + double posx = (v - pBucket->nRange.iMinVal) / x; + if (v == pBucket->nRange.iMaxVal) { + posx -= 1; + } + *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; + *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; + } + } +} + +void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { + // double v = *(double *)value; + double v = GET_DOUBLE_VAL(value); + + if (pBucket->nRange.dMinVal == DBL_MAX) { + /* + * taking negative integer into consideration, + * there is only half of pBucket->segs available for non-negative integer + */ + double x = DBL_MAX / (pBucket->nTotalSlots >> 1); + double posx = (v + DBL_MAX) / x; + *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; + *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; + } else { + // divide a range of [dMinVal, dMaxVal] into 1024 buckets + double span = pBucket->nRange.dMaxVal - pBucket->nRange.dMinVal; + if (span < pBucket->nTotalSlots) { + int32_t delta = (int32_t)(v - pBucket->nRange.dMinVal); + *segIdx = delta / pBucket->nSlotsOfSeg; + *slotIdx = delta % pBucket->nSlotsOfSeg; + } else { + double x = span / pBucket->nTotalSlots; + double posx = (v - pBucket->nRange.dMinVal) / x; + if (v == pBucket->nRange.dMaxVal) { + posx -= 1; + } + *segIdx = ((int32_t)posx) / pBucket->nSlotsOfSeg; + *slotIdx = ((int32_t)posx) % pBucket->nSlotsOfSeg; + } + + if (*segIdx < 0 || *segIdx > 16 || *slotIdx < 0 || *slotIdx > 64) { + pError("error in hash process. segment is: %d, slot id is: %d\n", *segIdx, *slotIdx); + } + } +} + +tMemBucket *tMemBucketCreate(int32_t totalSlots, int32_t nBufferSize, int16_t nElemSize, int16_t dataType, + tOrderDescriptor *pDesc) { + tMemBucket *pBucket = (tMemBucket *)malloc(sizeof(tMemBucket)); + + pBucket->nTotalSlots = totalSlots; + pBucket->nSlotsOfSeg = 1 << 6; // 64 Segments, 16 slots each seg. + pBucket->dataType = dataType; + pBucket->nElemSize = nElemSize; + pBucket->pageSize = DEFAULT_PAGE_SIZE; + + pBucket->numOfElems = 0; + pBucket->numOfSegs = pBucket->nTotalSlots / pBucket->nSlotsOfSeg; + + pBucket->nTotalBufferSize = nBufferSize; + + pBucket->maxElemsCapacity = pBucket->nTotalBufferSize / pBucket->nElemSize; + + pBucket->numOfTotalPages = pBucket->nTotalBufferSize / pBucket->pageSize; + pBucket->numOfAvailPages = pBucket->numOfTotalPages; + + pBucket->pOrderDesc = pDesc; + + switch (pBucket->dataType) { + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TINYINT: { + pBucket->nRange.iMinVal = INT32_MAX; + pBucket->nRange.iMaxVal = INT32_MIN; + pBucket->HashFunc = tBucketIntHash; + break; + }; + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_FLOAT: { + pBucket->nRange.dMinVal = DBL_MAX; + pBucket->nRange.dMaxVal = -DBL_MAX; + pBucket->HashFunc = tBucketDoubleHash; + break; + }; + case TSDB_DATA_TYPE_BIGINT: { + pBucket->nRange.i64MinVal = INT64_MAX; + pBucket->nRange.i64MaxVal = INT64_MIN; + pBucket->HashFunc = tBucketBigIntHash; + break; + }; + default: { + pError("MemBucket:%p,not support data type %d,failed", *pBucket, pBucket->dataType); + tfree(pBucket); + return NULL; + } + } + + int32_t numOfCols = pDesc->pColumnModel->numOfCols; + if (numOfCols != 1) { + pError("MemBucket:%p,only consecutive data is allowed,invalid numOfCols:%d", pBucket, numOfCols); + tfree(pBucket); + return NULL; + } + + SSchema* pSchema = getColumnModelSchema(pDesc->pColumnModel, 0); + if (pSchema->type != dataType) { + pError("MemBucket:%p,data type is not consistent,%d in schema, %d in param", pBucket, pSchema->type, dataType); + tfree(pBucket); + return NULL; + } + + if (pBucket->numOfTotalPages < pBucket->nTotalSlots) { + pWarn("MemBucket:%p,total buffer pages %d are not enough for all slots", pBucket, pBucket->numOfTotalPages); + } + + pBucket->pSegs = (tMemBucketSegment *)malloc(pBucket->numOfSegs * sizeof(tMemBucketSegment)); + + for (int32_t i = 0; i < pBucket->numOfSegs; ++i) { + pBucket->pSegs[i].numOfSlots = pBucket->nSlotsOfSeg; + pBucket->pSegs[i].pBuffer = NULL; + pBucket->pSegs[i].pBoundingEntries = NULL; + } + + pTrace("MemBucket:%p,created,buffer size:%d,elem size:%d", pBucket, pBucket->numOfTotalPages * DEFAULT_PAGE_SIZE, + pBucket->nElemSize); + + return pBucket; +} + +void tMemBucketDestroy(tMemBucket *pBucket) { + if (pBucket == NULL) { + return; + } + + if (pBucket->pSegs) { + for (int32_t i = 0; i < pBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &(pBucket->pSegs[i]); + tfree(pSeg->pBoundingEntries); + + if (pSeg->pBuffer == NULL || pSeg->numOfSlots == 0) { + continue; + } + + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + if (pSeg->pBuffer[j] != NULL) { + pSeg->pBuffer[j] = destoryExtMemBuffer(pSeg->pBuffer[j]); + } + } + tfree(pSeg->pBuffer); + } + } + + tfree(pBucket->pSegs); + tfree(pBucket); +} + +/* + * find the slots which accounts for largest proportion of total in-memory buffer + */ +static void tBucketGetMaxMemSlot(tMemBucket *pBucket, int16_t *segIdx, int16_t *slotIdx) { + *segIdx = -1; + *slotIdx = -1; + + int32_t val = 0; + for (int32_t k = 0; k < pBucket->numOfSegs; ++k) { + tMemBucketSegment *pSeg = &pBucket->pSegs[k]; + for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { + if (pSeg->pBuffer == NULL || pSeg->pBuffer[i] == NULL) { + continue; + } + + if (val < pSeg->pBuffer[i]->numOfInMemPages) { + val = pSeg->pBuffer[i]->numOfInMemPages; + *segIdx = k; + *slotIdx = i; + } + } + } +} + +static void resetBoundingBox(tMemBucketSegment *pSeg, int32_t type) { + switch (type) { + case TSDB_DATA_TYPE_BIGINT: { + for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { + pSeg->pBoundingEntries[i].i64MaxVal = INT64_MIN; + pSeg->pBoundingEntries[i].i64MinVal = INT64_MAX; + } + break; + }; + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TINYINT: { + for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { + pSeg->pBoundingEntries[i].iMaxVal = INT32_MIN; + pSeg->pBoundingEntries[i].iMinVal = INT32_MAX; + } + break; + }; + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_FLOAT: { + for (int32_t i = 0; i < pSeg->numOfSlots; ++i) { + pSeg->pBoundingEntries[i].dMaxVal = -DBL_MAX; + pSeg->pBoundingEntries[i].dMinVal = DBL_MAX; + } + break; + } + } +} + +void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) { + switch (dataType) { + case TSDB_DATA_TYPE_INT: { + int32_t val = *(int32_t *)data; + if (r->iMinVal > val) { + r->iMinVal = val; + } + + if (r->iMaxVal < val) { + r->iMaxVal = val; + } + break; + }; + case TSDB_DATA_TYPE_BIGINT: { + int64_t val = *(int64_t *)data; + if (r->i64MinVal > val) { + r->i64MinVal = val; + } + + if (r->i64MaxVal < val) { + r->i64MaxVal = val; + } + break; + }; + case TSDB_DATA_TYPE_SMALLINT: { + int32_t val = *(int16_t *)data; + if (r->iMinVal > val) { + r->iMinVal = val; + } + + if (r->iMaxVal < val) { + r->iMaxVal = val; + } + break; + }; + case TSDB_DATA_TYPE_TINYINT: { + int32_t val = *(int8_t *)data; + if (r->iMinVal > val) { + r->iMinVal = val; + } + + if (r->iMaxVal < val) { + r->iMaxVal = val; + } + + break; + }; + case TSDB_DATA_TYPE_DOUBLE: { + // double val = *(double *)data; + double val = GET_DOUBLE_VAL(data); + if (r->dMinVal > val) { + r->dMinVal = val; + } + + if (r->dMaxVal < val) { + r->dMaxVal = val; + } + break; + }; + case TSDB_DATA_TYPE_FLOAT: { + // double val = *(float *)data; + double val = GET_FLOAT_VAL(data); + + if (r->dMinVal > val) { + r->dMinVal = val; + } + + if (r->dMaxVal < val) { + r->dMaxVal = val; + } + break; + }; + default: { assert(false); } + } +} + +/* + * in memory bucket, we only accept the simple data consecutive put in a row/column + * no column-model in this case. + */ +void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) { + pBucket->numOfElems += numOfRows; + int16_t segIdx = 0, slotIdx = 0; + + for (int32_t i = 0; i < numOfRows; ++i) { + char *d = (char *)data + i * tDataTypeDesc[pBucket->dataType].nSize; + + switch (pBucket->dataType) { + case TSDB_DATA_TYPE_SMALLINT: { + int32_t val = *(int16_t *)d; + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_TINYINT: { + int32_t val = *(int8_t *)d; + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_INT: { + int32_t val = *(int32_t *)d; + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + int64_t val = *(int64_t *)d; + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + // double val = *(double *)d; + double val = GET_DOUBLE_VAL(d); + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + case TSDB_DATA_TYPE_FLOAT: { + // double val = *(float *)d; + double val = GET_FLOAT_VAL(d); + (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); + break; + } + } + + tMemBucketSegment *pSeg = &pBucket->pSegs[segIdx]; + if (pSeg->pBoundingEntries == NULL) { + pSeg->pBoundingEntries = (MinMaxEntry *)malloc(sizeof(MinMaxEntry) * pBucket->nSlotsOfSeg); + resetBoundingBox(pSeg, pBucket->dataType); + } + + if (pSeg->pBuffer == NULL) { + pSeg->pBuffer = (tExtMemBuffer **)calloc(pBucket->nSlotsOfSeg, sizeof(void *)); + } + + if (pSeg->pBuffer[slotIdx] == NULL) { + pSeg->pBuffer[slotIdx] = createExtMemBuffer(pBucket->numOfTotalPages * pBucket->pageSize, pBucket->nElemSize, + pBucket->pOrderDesc->pColumnModel); + pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL; + pBucket->pOrderDesc->pColumnModel->capacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage; + } + + tMemBucketUpdateBoundingBox(&pSeg->pBoundingEntries[slotIdx], d, pBucket->dataType); + + // ensure available memory pages to allocate + int16_t cseg = 0, cslot = 0; + if (pBucket->numOfAvailPages == 0) { + pTrace("MemBucket:%p,max avail size:%d, no avail memory pages,", pBucket, pBucket->numOfTotalPages); + + tBucketGetMaxMemSlot(pBucket, &cseg, &cslot); + if (cseg == -1 || cslot == -1) { + pError("MemBucket:%p,failed to find appropriated avail buffer", pBucket); + return; + } + + if (cseg != segIdx || cslot != slotIdx) { + pBucket->numOfAvailPages += pBucket->pSegs[cseg].pBuffer[cslot]->numOfInMemPages; + + int32_t avail = pBucket->pSegs[cseg].pBuffer[cslot]->numOfInMemPages; + UNUSED(avail); + tExtMemBufferFlush(pBucket->pSegs[cseg].pBuffer[cslot]); + + pTrace("MemBucket:%p,seg:%d,slot:%d flushed to disk,new avail pages:%d", pBucket, cseg, cslot, + pBucket->numOfAvailPages); + } else { + pTrace("MemBucket:%p,failed to choose slot to flush to disk seg:%d,slot:%d", pBucket, cseg, cslot); + } + } + int16_t consumedPgs = pSeg->pBuffer[slotIdx]->numOfInMemPages; + + int16_t newPgs = tExtMemBufferPut(pSeg->pBuffer[slotIdx], d, 1); + /* + * trigger 1. page re-allocation, to reduce the available pages + * 2. page flushout, to increase the available pages + */ + pBucket->numOfAvailPages += (consumedPgs - newPgs); + } +} + +void releaseBucket(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { + if (segIdx < 0 || segIdx > pMemBucket->numOfSegs || slotIdx < 0) { + return; + } + + tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; + if (slotIdx < 0 || slotIdx >= pSeg->numOfSlots || pSeg->pBuffer[slotIdx] == NULL) { + return; + } + + pSeg->pBuffer[slotIdx] = destoryExtMemBuffer(pSeg->pBuffer[slotIdx]); +} + +//////////////////////////////////////////////////////////////////////////////////////////// +static void findMaxMinValue(tMemBucket *pMemBucket, double *maxVal, double *minVal) { + *minVal = DBL_MAX; + *maxVal = -DBL_MAX; + + for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + if (pSeg->pBuffer == NULL) { + continue; + } + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TINYINT: { + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + double minv = pSeg->pBoundingEntries[j].iMinVal; + double maxv = pSeg->pBoundingEntries[j].iMaxVal; + + if (*minVal > minv) { + *minVal = minv; + } + if (*maxVal < maxv) { + *maxVal = maxv; + } + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_FLOAT: { + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + double minv = pSeg->pBoundingEntries[j].dMinVal; + double maxv = pSeg->pBoundingEntries[j].dMaxVal; + + if (*minVal > minv) { + *minVal = minv; + } + if (*maxVal < maxv) { + *maxVal = maxv; + } + } + break; + } + case TSDB_DATA_TYPE_BIGINT: { + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + double minv = (double)pSeg->pBoundingEntries[j].i64MinVal; + double maxv = (double)pSeg->pBoundingEntries[j].i64MaxVal; + + if (*minVal > minv) { + *minVal = minv; + } + if (*maxVal < maxv) { + *maxVal = maxv; + } + } + break; + } + } + } +} + +static MinMaxEntry getMinMaxEntryOfNearestSlotInNextSegment(tMemBucket *pMemBucket, int32_t segIdx) { + int32_t i = segIdx + 1; + while (i < pMemBucket->numOfSegs && pMemBucket->pSegs[i].numOfSlots == 0) ++i; + + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + assert(pMemBucket->numOfSegs > i && pMemBucket->pSegs[i].pBuffer != NULL); + + i = 0; + while (i < pMemBucket->nSlotsOfSeg && pSeg->pBuffer[i] == NULL) ++i; + + assert(i < pMemBucket->nSlotsOfSeg); + return pSeg->pBoundingEntries[i]; +} + +/* + * + * now, we need to find the minimum value of the next slot for + * interpolating the percentile value + * j is the last slot of current segment, we need to get the first + * slot of the next segment. + */ +static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; + + MinMaxEntry next; + if (slotIdx == pSeg->numOfSlots - 1) { // find next segment with data + return getMinMaxEntryOfNearestSlotInNextSegment(pMemBucket, segIdx); + } else { + int32_t j = slotIdx + 1; + for (; j < pMemBucket->nSlotsOfSeg && pMemBucket->pSegs[segIdx].pBuffer[j] == 0; ++j) { + }; + + if (j == pMemBucket->nSlotsOfSeg) { // current slot has no available + // slot,try next segment + return getMinMaxEntryOfNearestSlotInNextSegment(pMemBucket, segIdx); + } else { + next = pSeg->pBoundingEntries[slotIdx + 1]; + assert(pSeg->pBuffer[slotIdx + 1] != NULL); + } + } + + return next; +} + +bool isIdenticalData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx); +char *getFirstElemOfMemBuffer(tMemBucketSegment *pSeg, int32_t slotIdx, tFilePage *pPage); + +double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) { + int32_t num = 0; + + for (int32_t i = 0; i < pMemBucket->numOfSegs; ++i) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[i]; + for (int32_t j = 0; j < pSeg->numOfSlots; ++j) { + if (pSeg->pBuffer == NULL || pSeg->pBuffer[j] == NULL) { + continue; + } + // required value in current slot + if (num < (count + 1) && num + pSeg->pBuffer[j]->numOfTotalElems >= (count + 1)) { + if (pSeg->pBuffer[j]->numOfTotalElems + num == (count + 1)) { + /* + * now, we need to find the minimum value of the next slot for interpolating the percentile value + * j is the last slot of current segment, we need to get the first slot of the next segment. + * + */ + MinMaxEntry next = getMinMaxEntryOfNextSlotWithData(pMemBucket, i, j); + + double maxOfThisSlot = 0; + double minOfNextSlot = 0; + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TINYINT: { + maxOfThisSlot = pSeg->pBoundingEntries[j].iMaxVal; + minOfNextSlot = next.iMinVal; + break; + }; + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_DOUBLE: { + maxOfThisSlot = pSeg->pBoundingEntries[j].dMaxVal; + minOfNextSlot = next.dMinVal; + break; + }; + case TSDB_DATA_TYPE_BIGINT: { + maxOfThisSlot = (double)pSeg->pBoundingEntries[j].i64MaxVal; + minOfNextSlot = (double)next.i64MinVal; + break; + } + }; + + assert(minOfNextSlot > maxOfThisSlot); + + double val = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot; + return val; + } + if (pSeg->pBuffer[j]->numOfTotalElems <= pMemBucket->maxElemsCapacity) { + // data in buffer and file are merged together to be processed. + tFilePage *buffer = loadIntoBucketFromDisk(pMemBucket, i, j, pMemBucket->pOrderDesc); + int32_t currentIdx = count - num; + + char * thisVal = buffer->data + pMemBucket->nElemSize * currentIdx; + char * nextVal = thisVal + pMemBucket->nElemSize; + double td, nd; + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_SMALLINT: { + td = *(int16_t *)thisVal; + nd = *(int16_t *)nextVal; + break; + } + case TSDB_DATA_TYPE_TINYINT: { + td = *(int8_t *)thisVal; + nd = *(int8_t *)nextVal; + break; + } + case TSDB_DATA_TYPE_INT: { + td = *(int32_t *)thisVal; + nd = *(int32_t *)nextVal; + break; + }; + case TSDB_DATA_TYPE_FLOAT: { + // td = *(float *)thisVal; + // nd = *(float *)nextVal; + td = GET_FLOAT_VAL(thisVal); + nd = GET_FLOAT_VAL(nextVal); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + // td = *(double *)thisVal; + td = GET_DOUBLE_VAL(thisVal); + // nd = *(double *)nextVal; + nd = GET_DOUBLE_VAL(nextVal); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + td = (double)*(int64_t *)thisVal; + nd = (double)*(int64_t *)nextVal; + break; + } + } + double val = (1 - fraction) * td + fraction * nd; + tfree(buffer); + + return val; + } else { // incur a second round bucket split + if (isIdenticalData(pMemBucket, i, j)) { + tExtMemBuffer *pMemBuffer = pSeg->pBuffer[j]; + + tFilePage *pPage = (tFilePage *)malloc(pMemBuffer->pageSize); + + char *thisVal = getFirstElemOfMemBuffer(pSeg, j, pPage); + + double finalResult = 0.0; + + switch (pMemBucket->dataType) { + case TSDB_DATA_TYPE_SMALLINT: { + finalResult = *(int16_t *)thisVal; + break; + } + case TSDB_DATA_TYPE_TINYINT: { + finalResult = *(int8_t *)thisVal; + break; + } + case TSDB_DATA_TYPE_INT: { + finalResult = *(int32_t *)thisVal; + break; + }; + case TSDB_DATA_TYPE_FLOAT: { + // finalResult = *(float *)thisVal; + finalResult = GET_FLOAT_VAL(thisVal); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + // finalResult = *(double *)thisVal; + finalResult = GET_DOUBLE_VAL(thisVal); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + finalResult = (double)(*(int64_t *)thisVal); + break; + } + } + + free(pPage); + return finalResult; + } + + pTrace("MemBucket:%p,start second round bucketing", pMemBucket); + + if (pSeg->pBuffer[j]->numOfElemsInBuffer != 0) { + pTrace("MemBucket:%p,flush %d pages to disk, clear status", pMemBucket, pSeg->pBuffer[j]->numOfInMemPages); + + pMemBucket->numOfAvailPages += pSeg->pBuffer[j]->numOfInMemPages; + tExtMemBufferFlush(pSeg->pBuffer[j]); + } + + tExtMemBuffer *pMemBuffer = pSeg->pBuffer[j]; + pSeg->pBuffer[j] = NULL; + + // release all + for (int32_t tt = 0; tt < pMemBucket->numOfSegs; ++tt) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[tt]; + for (int32_t ttx = 0; ttx < pSeg->numOfSlots; ++ttx) { + if (pSeg->pBuffer && pSeg->pBuffer[ttx]) { + pSeg->pBuffer[ttx] = destoryExtMemBuffer(pSeg->pBuffer[ttx]); + } + } + } + + pMemBucket->nRange.i64MaxVal = pSeg->pBoundingEntries->i64MaxVal; + pMemBucket->nRange.i64MinVal = pSeg->pBoundingEntries->i64MinVal; + pMemBucket->numOfElems = 0; + + for (int32_t tt = 0; tt < pMemBucket->numOfSegs; ++tt) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[tt]; + for (int32_t ttx = 0; ttx < pSeg->numOfSlots; ++ttx) { + if (pSeg->pBoundingEntries) { + resetBoundingBox(pSeg, pMemBucket->dataType); + } + } + } + + tFilePage *pPage = (tFilePage *)malloc(pMemBuffer->pageSize); + + tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0]; + assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize); + + int32_t ret = fseek(pMemBuffer->file, pFlushInfo->startPageId * pMemBuffer->pageSize, SEEK_SET); + UNUSED(ret); + + for (uint32_t jx = 0; jx < pFlushInfo->numOfPages; ++jx) { + ret = fread(pPage, pMemBuffer->pageSize, 1, pMemBuffer->file); + UNUSED(ret); + tMemBucketPut(pMemBucket, pPage->data, pPage->numOfElems); + } + + fclose(pMemBuffer->file); + if (unlink(pMemBuffer->path) != 0) { + pError("MemBucket:%p, remove tmp file %s failed", pMemBucket, pMemBuffer->path); + } + tfree(pMemBuffer); + tfree(pPage); + + return getPercentileImpl(pMemBucket, count - num, fraction); + } + } else { + num += pSeg->pBuffer[j]->numOfTotalElems; + } + } + } + return 0; +} + +double getPercentile(tMemBucket *pMemBucket, double percent) { + if (pMemBucket->numOfElems == 0) { + return 0.0; + } + + if (pMemBucket->numOfElems == 1) { // return the only element + return findOnlyResult(pMemBucket); + } + + percent = fabs(percent); + + // validate the parameters + if (fabs(percent - 100.0) < DBL_EPSILON || (percent < DBL_EPSILON)) { + double minx = 0, maxx = 0; + /* + * find the min/max value, no need to scan all data in bucket + */ + findMaxMinValue(pMemBucket, &maxx, &minx); + + return fabs(percent - 100) < DBL_EPSILON ? maxx : minx; + } + + double percentVal = (percent * (pMemBucket->numOfElems - 1)) / ((double)100.0); + int32_t orderIdx = (int32_t)percentVal; + + // do put data by using buckets + return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx); +} + +/* + * check if data in one slot are all identical + * only need to compare with the bounding box + */ +bool isIdenticalData(tMemBucket *pMemBucket, int32_t segIdx, int32_t slotIdx) { + tMemBucketSegment *pSeg = &pMemBucket->pSegs[segIdx]; + + if (pMemBucket->dataType == TSDB_DATA_TYPE_INT || pMemBucket->dataType == TSDB_DATA_TYPE_BIGINT || + pMemBucket->dataType == TSDB_DATA_TYPE_SMALLINT || pMemBucket->dataType == TSDB_DATA_TYPE_TINYINT) { + return pSeg->pBoundingEntries[slotIdx].i64MinVal == pSeg->pBoundingEntries[slotIdx].i64MaxVal; + } + + if (pMemBucket->dataType == TSDB_DATA_TYPE_FLOAT || pMemBucket->dataType == TSDB_DATA_TYPE_DOUBLE) { + return fabs(pSeg->pBoundingEntries[slotIdx].dMaxVal - pSeg->pBoundingEntries[slotIdx].dMinVal) < DBL_EPSILON; + } + + return false; +} + +/* + * get the first element of one slot into memory. + * if no data of current slot in memory, load it from disk + */ +char *getFirstElemOfMemBuffer(tMemBucketSegment *pSeg, int32_t slotIdx, tFilePage *pPage) { + tExtMemBuffer *pMemBuffer = pSeg->pBuffer[slotIdx]; + char * thisVal = NULL; + + if (pSeg->pBuffer[slotIdx]->numOfElemsInBuffer != 0) { + thisVal = pSeg->pBuffer[slotIdx]->pHead->item.data; + } else { + /* + * no data in memory, load one page into memory + */ + tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0]; + assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize); + + fseek(pMemBuffer->file, pFlushInfo->startPageId * pMemBuffer->pageSize, SEEK_SET); + size_t ret = fread(pPage, pMemBuffer->pageSize, 1, pMemBuffer->file); + UNUSED(ret); + thisVal = pPage->data; + } + return thisVal; +} -- GitLab