From 4e90982c22be6fe41c15f2ea04104735d8b29108 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 6 May 2022 17:23:20 +0800 Subject: [PATCH] fix(query): sort according to the generated column data in order by operator. --- source/common/src/tdatablock.c | 15 ++++--- source/libs/executor/inc/executorimpl.h | 5 +-- source/libs/executor/inc/tsort.h | 15 +++++-- source/libs/executor/src/executorimpl.c | 37 +++++----------- source/libs/executor/src/sortoperator.c | 59 +++++++++++++++++-------- source/libs/executor/src/tsort.c | 40 ++++++++--------- source/libs/scalar/src/sclvector.c | 6 +-- 7 files changed, 94 insertions(+), 83 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index dcea167e81..4946c9690b 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -363,9 +363,9 @@ int32_t blockDataMerge(SSDataBlock* pDest, const SSDataBlock* pSrc, SArray* pInd for (int32_t i = 0; i < pDest->info.numOfCols; ++i) { int32_t mapIndex = i; - if (pIndexMap) { - mapIndex = *(int32_t*)taosArrayGet(pIndexMap, i); - } +// if (pIndexMap) { +// mapIndex = *(int32_t*)taosArrayGet(pIndexMap, i); +// } SColumnInfoData* pCol2 = taosArrayGet(pDest->pDataBlock, i); SColumnInfoData* pCol1 = taosArrayGet(pSrc->pDataBlock, mapIndex); @@ -491,9 +491,14 @@ SSDataBlock* blockDataExtractBlock(SSDataBlock* pBlock, int32_t startIndex, int3 SColumnInfoData* pDstCol = taosArrayGet(pDst->pDataBlock, i); for (int32_t j = startIndex; j < (startIndex + rowCount); ++j) { - bool isNull = colDataIsNull(pColData, pBlock->info.rows, j, pBlock->pBlockAgg[i]); - char* p = colDataGetData(pColData, j); + bool isNull = false; + if (pBlock->pBlockAgg == NULL) { + isNull = colDataIsNull_s(pColData, pBlock->info.rows); + } else { + isNull = colDataIsNull(pColData, pBlock->info.rows, j, pBlock->pBlockAgg[i]); + } + char* p = colDataGetData(pColData, j); colDataAppend(pDstCol, j - startIndex, p, isNull); } } diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index eefa38d802..3b5d0c209f 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -578,9 +578,8 @@ typedef struct SSortOperatorInfo { uint32_t sortBufSize; // max buffer size for in-memory sort SArray* pSortInfo; SSortHandle* pSortHandle; - SArray* inputSlotMap; // for index map from table scan output + SArray* pColMatchInfo; // for index map from table scan output int32_t bufPageSize; -// int32_t numOfRowsInRes; // TODO extact struct int64_t startTs; // sort start time @@ -645,7 +644,7 @@ void cleanupAggSup(SAggSupporter* pAggSup); void destroyBasicOperatorInfo(void* param, int32_t numOfOutput); void appendOneRowToDataBlock(SSDataBlock* pBlock, STupleHandle* pTupleHandle); -SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, int32_t capacity); +SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, int32_t capacity, SArray* pColMatchInfo); SSDataBlock* loadNextDataBlock(void* param); void setResultRowInitCtx(SResultRow* pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset); diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index 2072707b30..d74628a72f 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -117,18 +117,25 @@ STupleHandle* tsortNextTuple(SSortHandle* pHandle); /** * * @param pHandle - * @param colIndex + * @param colId * @return */ -bool tsortIsNullVal(STupleHandle* pVHandle, int32_t colIndex); +bool tsortIsNullVal(STupleHandle* pVHandle, int32_t colId); /** * * @param pHandle - * @param colIndex + * @param colId * @return */ -void* tsortGetValue(STupleHandle* pVHandle, int32_t colIndex); +void* tsortGetValue(STupleHandle* pVHandle, int32_t colId); + +/** + * + * @param pSortHandle + * @return + */ +SSDataBlock* tsortGetSortedDataBlock(const SSortHandle* pSortHandle); #ifdef __cplusplus } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 9aa251e1b6..943d4b2783 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -3520,7 +3520,7 @@ static SSDataBlock* doSortedMerge(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SSortedMergeOperatorInfo* pInfo = pOperator->info; if (pOperator->status == OP_RES_TO_RETURN) { - return getSortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pOperator->resultInfo.capacity); + return getSortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pOperator->resultInfo.capacity, NULL); } int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; @@ -4701,7 +4701,7 @@ static SArray* extractTableIdList(const STableGroupInfo* pTableGroupInfo); static SArray* extractColumnInfo(SNodeList* pNodeList); static SArray* extractColMatchInfo(SNodeList* pNodeList, SDataBlockDescNode* pOutputNodeList, int32_t* numOfOutputCols); -static SArray* createSortInfo(SNodeList* pNodeList, SNodeList* pNodeListTarget); +static SArray* createSortInfo(SNodeList* pNodeList); static SArray* createIndexMap(SNodeList* pNodeList); static SArray* extractPartitionColInfo(SNodeList* pNodeList); static int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode); @@ -4870,16 +4870,16 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo SSortPhysiNode* pSortPhyNode = (SSortPhysiNode*)pPhyNode; SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc); - SArray* info = createSortInfo(pSortPhyNode->pSortKeys, pSortPhyNode->pTargets); - SArray* slotMap = createIndexMap(pSortPhyNode->pTargets); + SArray* info = createSortInfo(pSortPhyNode->pSortKeys); int32_t numOfCols = 0; - SExprInfo* pExprInfo = NULL; - if (pSortPhyNode->pExprs != NULL) { - pExprInfo = createExprInfo(pSortPhyNode->pExprs, NULL, &numOfCols); - } + SExprInfo* pExprInfo = createExprInfo(pSortPhyNode->pExprs, NULL, &numOfCols); + + int32_t numOfOutputCols = 0; + SArray* pColList = + extractColMatchInfo(pSortPhyNode->pTargets, pSortPhyNode->node.pOutputDataBlockDesc, &numOfOutputCols); - pOptr = createSortOperatorInfo(ops[0], pResBlock, info, pExprInfo, numOfCols, slotMap, pTaskInfo); + pOptr = createSortOperatorInfo(ops[0], pResBlock, info, pExprInfo, numOfCols, pColList, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_SESSION_WINDOW == type) { SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; @@ -5037,7 +5037,7 @@ SArray* extractPartitionColInfo(SNodeList* pNodeList) { return pList; } -SArray* createSortInfo(SNodeList* pNodeList, SNodeList* pNodeListTarget) { +SArray* createSortInfo(SNodeList* pNodeList) { size_t numOfCols = LIST_LENGTH(pNodeList); SArray* pList = taosArrayInit(numOfCols, sizeof(SBlockOrderInfo)); if (pList == NULL) { @@ -5052,22 +5052,7 @@ SArray* createSortInfo(SNodeList* pNodeList, SNodeList* pNodeListTarget) { bi.nullFirst = (pSortKey->nullOrder == NULL_ORDER_FIRST); SColumnNode* pColNode = (SColumnNode*)pSortKey->pExpr; - - bool found = false; - for (int32_t j = 0; j < LIST_LENGTH(pNodeListTarget); ++j) { - STargetNode* pTarget = (STargetNode*)nodesListGetNode(pNodeListTarget, j); - - SColumnNode* pColNodeT = (SColumnNode*)pTarget->pExpr; - if (pColNode->slotId == pColNodeT->slotId) { // to find slotId in PhysiSort OutputDataBlockDesc - bi.slotId = pTarget->slotId; - found = true; - break; - } - } - - if (!found) { - qError("sort slot id does not found"); - } + bi.slotId = pColNode->slotId; taosArrayPush(pList, &bi); } diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 0f973b0cf0..619651f11f 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -5,7 +5,7 @@ static SSDataBlock* doSort(SOperatorInfo* pOperator); static void destroyOrderOperatorInfo(void* param, int32_t numOfOutput); SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSDataBlock* pResBlock, SArray* pSortInfo, SExprInfo* pExprInfo, int32_t numOfCols, - SArray* pIndexMap, SExecTaskInfo* pTaskInfo) { + SArray* pColMatchColInfo, SExecTaskInfo* pTaskInfo) { SSortOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SSortOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); int32_t rowSize = pResBlock->info.rowSize; @@ -20,17 +20,19 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSDataBlock* pR pInfo->binfo.pRes = pResBlock; initResultSizeInfo(pOperator, 1024); - pInfo->bufPageSize = rowSize < 1024 ? 1024 * 2 : rowSize * 2; // there are headers, so pageSize = rowSize + header - pInfo->sortBufSize = pInfo->bufPageSize * 16; // TODO dynamic set the available sort buffer pInfo->pSortInfo = pSortInfo; - pInfo->inputSlotMap = pIndexMap; + pInfo->pColMatchInfo= pColMatchColInfo; pOperator->name = "SortOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_SORT; pOperator->blocking = true; pOperator->status = OP_NOT_OPENED; pOperator->info = pInfo; + // lazy evaluation for the following parameter since the input datablock is not known till now. +// pInfo->bufPageSize = rowSize < 1024 ? 1024 * 2 : rowSize * 2; // there are headers, so pageSize = rowSize + header +// pInfo->sortBufSize = pInfo->bufPageSize * 16; // TODO dynamic set the available sort buffer + pOperator->pTaskInfo = pTaskInfo; pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doSort, NULL, NULL, destroyOrderOperatorInfo, NULL, NULL, NULL); @@ -45,14 +47,12 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSDataBlock* pR return NULL; } -// TODO merge aggregate super table void appendOneRowToDataBlock(SSDataBlock* pBlock, STupleHandle* pTupleHandle) { for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) { SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); - bool isNull = tsortIsNullVal(pTupleHandle, i); if (isNull) { - colDataAppend(pColInfo, pBlock->info.rows, NULL, true); + colDataAppendNULL(pColInfo, pBlock->info.rows); } else { char* pData = tsortGetValue(pTupleHandle, i); colDataAppend(pColInfo, pBlock->info.rows, pData, false); @@ -62,11 +62,12 @@ void appendOneRowToDataBlock(SSDataBlock* pBlock, STupleHandle* pTupleHandle) { pBlock->info.rows += 1; } -SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, int32_t capacity) { +SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, int32_t capacity, SArray* pColMatchInfo) { blockDataCleanup(pDataBlock); - blockDataEnsureCapacity(pDataBlock, capacity); + ASSERT(taosArrayGetSize(pColMatchInfo) == pDataBlock->info.numOfCols); - blockDataEnsureCapacity(pDataBlock, capacity); + SSDataBlock* p = tsortGetSortedDataBlock(pHandle); + blockDataEnsureCapacity(p, capacity); while (1) { STupleHandle* pTupleHandle = tsortNextTuple(pHandle); @@ -74,12 +75,32 @@ SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, i break; } - appendOneRowToDataBlock(pDataBlock, pTupleHandle); - if (pDataBlock->info.rows >= capacity) { + appendOneRowToDataBlock(p, pTupleHandle); + if (p->info.rows >= capacity) { return pDataBlock; } } + if (p->info.rows > 0) { + int32_t numOfCols = taosArrayGetSize(pColMatchInfo); + for(int32_t i = 0; i < numOfCols; ++i) { + SColMatchInfo* pmInfo = taosArrayGet(pColMatchInfo, i); + + for(int32_t j = 0; j < p->info.numOfCols; ++j) { + SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, j); + if (pSrc->info.colId == pmInfo->colId) { + SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->targetSlotId); + colDataAssign(pDst, pSrc, p->info.rows); + break; + } + } + } + + pDataBlock->info.rows = p->info.rows; + pDataBlock->info.capacity = p->info.rows; + } + + blockDataDestroy(p); return (pDataBlock->info.rows > 0) ? pDataBlock : NULL; } @@ -106,16 +127,16 @@ SSDataBlock* doSort(SOperatorInfo* pOperator) { SSortOperatorInfo* pInfo = pOperator->info; if (pOperator->status == OP_RES_TO_RETURN) { - return getSortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pOperator->resultInfo.capacity); + return getSortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pOperator->resultInfo.capacity, pInfo->pColMatchInfo); } - int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, pInfo->inputSlotMap, SORT_SINGLESOURCE_SORT, - pInfo->bufPageSize, numOfBufPage, pInfo->binfo.pRes, pTaskInfo->id.str); +// pInfo->binfo.pRes is not equalled to the input datablock. +// int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, pInfo->pColMatchInfo, SORT_SINGLESOURCE_SORT, + -1, -1, NULL, pTaskInfo->id.str); tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, applyScalarFunction, pOperator); - SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); ps->param = pOperator->pDownstream[0]; tsortAddSource(pInfo->pSortHandle, ps); @@ -127,7 +148,7 @@ SSDataBlock* doSort(SOperatorInfo* pOperator) { } pOperator->status = OP_RES_TO_RETURN; - return getSortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pOperator->resultInfo.capacity); + return getSortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pOperator->resultInfo.capacity, pInfo->pColMatchInfo); } void destroyOrderOperatorInfo(void* param, int32_t numOfOutput) { @@ -135,5 +156,5 @@ void destroyOrderOperatorInfo(void* param, int32_t numOfOutput) { pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); taosArrayDestroy(pInfo->pSortInfo); - taosArrayDestroy(pInfo->inputSlotMap); + taosArrayDestroy(pInfo->pColMatchInfo); } diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 50aa4cfc01..040ee8c7f5 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -64,25 +64,8 @@ struct SSortHandle { static int32_t msortComparFn(const void *pLeft, const void *pRight, void *param); -static SSDataBlock* createDataBlock_rv(SSchema* pSchema, int32_t numOfCols) { - SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); - pBlock->pDataBlock = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); - pBlock->info.numOfCols = numOfCols; - - for(int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData colInfo = {0}; - - colInfo.info.type = pSchema[i].type; - colInfo.info.bytes = pSchema[i].bytes; - colInfo.info.colId = pSchema[i].colId; - taosArrayPush(pBlock->pDataBlock, &colInfo); - - if (IS_VAR_DATA_TYPE(colInfo.info.type)) { - pBlock->info.hasVarCol = true; - } - } - - return pBlock; +SSDataBlock* tsortGetSortedDataBlock(const SSortHandle* pSortHandle) { + return createOneDataBlock(pSortHandle->pDataBlock, false); } /** @@ -98,7 +81,10 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, SArray* pIndexMap, int32_t pSortHandle->numOfPages = numOfPages; pSortHandle->pSortInfo = pSortInfo; pSortHandle->pIndexMap = pIndexMap; - pSortHandle->pDataBlock = createOneDataBlock(pBlock, false); + + if (pBlock != NULL) { + pSortHandle->pDataBlock = createOneDataBlock(pBlock, false); + } pSortHandle->pOrderedSource = taosArrayInit(4, POINTER_BYTES); pSortHandle->cmpParam.orderInfo = pSortInfo; @@ -530,6 +516,17 @@ static int32_t createInitialSortedMultiSources(SSortHandle* pHandle) { if (pHandle->pDataBlock == NULL) { pHandle->pDataBlock = createOneDataBlock(pBlock, false); + + // calculate the buffer pages according to the total available buffers. + int32_t rowSize = blockDataGetRowSize(pBlock); + if (rowSize * 4 > 4096) { + pHandle->pageSize = rowSize * 4; + } else { + pHandle->pageSize = 4096; + } + // todo!! + pHandle->numOfPages = 1024; + sortBufSize = pHandle->numOfPages * pHandle->pageSize; } // perform the scalar function calculation before apply the sort @@ -538,7 +535,6 @@ static int32_t createInitialSortedMultiSources(SSortHandle* pHandle) { } // todo relocate the columns - int32_t code = blockDataMerge(pHandle->pDataBlock, pBlock, pHandle->pIndexMap); if (code != 0) { return code; @@ -689,7 +685,7 @@ STupleHandle* tsortNextTuple(SSortHandle* pHandle) { bool tsortIsNullVal(STupleHandle* pVHandle, int32_t colIndex) { SColumnInfoData* pColInfoSrc = taosArrayGet(pVHandle->pBlock->pDataBlock, colIndex); - return colDataIsNull(pColInfoSrc, 0, pVHandle->rowIndex, NULL); + return colDataIsNull_s(pColInfoSrc, pVHandle->rowIndex); } void* tsortGetValue(STupleHandle* pVHandle, int32_t colIndex) { diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index 84aa5559d0..1e37533f2c 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -1023,8 +1023,7 @@ static void vectorMathMultiplyHelper(SColumnInfoData* pLeftCol, SColumnInfoData* colDataAppendNULL(pOutputCol, i); continue; // TODO set null or ignore } - *output = getVectorDoubleValueFnLeft(LEFT_COL, i) - * getVectorDoubleValueFnRight(RIGHT_COL, 0); + *output = getVectorDoubleValueFnLeft(LEFT_COL, i) * getVectorDoubleValueFnRight(RIGHT_COL, 0); } } } @@ -1050,8 +1049,7 @@ void vectorMathMultiply(SScalarParam* pLeft, SScalarParam* pRight, SScalarParam colDataAppendNULL(pOutputCol, i); continue; // TODO set null or ignore } - *output = getVectorDoubleValueFnLeft(LEFT_COL, i) - * getVectorDoubleValueFnRight(RIGHT_COL, i); + *output = getVectorDoubleValueFnLeft(LEFT_COL, i) * getVectorDoubleValueFnRight(RIGHT_COL, i); } } else if (pLeft->numOfRows == 1) { vectorMathMultiplyHelper(pRightCol, pLeftCol, pOutputCol, pRight->numOfRows, step, i); -- GitLab