未验证 提交 c5c5b5aa 编写于 作者: H Haojun Liao 提交者: GitHub

Merge pull request #17662 from taosdata/feature/3_liaohj

enh(query): enable the limit clause to be push down
......@@ -321,12 +321,11 @@ typedef struct {
typedef struct STableScanInfo {
STsdbReader* dataReader;
SReadHandle readHandle;
SLimitInfo limitInfo;
SFileBlockLoadRecorder readRecorder;
SScanInfo scanInfo;
int32_t scanTimes;
SNode* pFilterNode; // filter info, which is push down by optimizer
SSDataBlock* pResBlock;
SColMatchInfo matchInfo;
SExprSupp pseudoSup;
......@@ -337,7 +336,6 @@ typedef struct STableScanInfo {
int32_t currentGroupId;
int32_t currentTable;
int8_t scanMode;
int8_t noTable;
SAggOptrPushDownInfo pdInfo;
int8_t assignBlockUid;
} STableScanInfo;
......@@ -357,7 +355,7 @@ typedef struct STableMergeScanInfo {
SSDataBlock* pSortInputBlock;
int64_t startTs; // sort start time
SArray* sortSourceParams;
SLimitInfo limitInfo;
SFileBlockLoadRecorder readRecorder;
int64_t numOfRows;
SScanInfo scanInfo;
......@@ -374,6 +372,7 @@ typedef struct STableMergeScanInfo {
SQueryTableDataCond cond;
int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan
int32_t dataBlockLoadFlag;
// if the upstream is an interval operator, the interval info is also kept here to get the time
// window to check if current data block needs to be loaded.
SInterval interval;
......@@ -903,6 +902,7 @@ void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SG
int32_t handleLimitOffset(SOperatorInfo* pOperator, SLimitInfo* pLimitInfo, SSDataBlock* pBlock, bool holdDataInBuf);
bool hasLimitOffsetInfo(SLimitInfo* pLimitInfo);
void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimitInfo);
void applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo, SOperatorInfo* pOperator);
void doApplyFunctions(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, SColumnInfoData* pTimeWindowData, int32_t offset,
int32_t forwardStep, int32_t numOfTotal, int32_t numOfOutput);
......
......@@ -210,8 +210,6 @@ SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) {
pOperator->status = OP_OPENED;
}
qDebug("enter project");
if (pOperator->status == OP_EXEC_DONE) {
if (pTaskInfo->execModel == OPTR_EXEC_MODEL_QUEUE) {
pOperator->status = OP_OPENED;
......
......@@ -355,6 +355,34 @@ static void doSetTagColumnData(STableScanInfo* pTableScanInfo, SSDataBlock* pBlo
}
}
// todo handle the slimit info
void applyLimitOffset(SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo, SOperatorInfo* pOperator) {
SLimit* pLimit = &pLimitInfo->limit;
if (pLimit->offset > 0 && pLimitInfo->remainOffset > 0) {
if (pLimitInfo->remainOffset >= pBlock->info.rows) {
pLimitInfo->remainOffset -= pBlock->info.rows;
pBlock->info.rows = 0;
qDebug("current block ignore due to offset, current:%"PRId64", %s", pLimitInfo->remainOffset, GET_TASKID(pTaskInfo));
} else {
blockDataTrimFirstNRows(pBlock, pLimitInfo->remainOffset);
pLimitInfo->remainOffset = 0;
}
}
if (pLimit->limit != -1 && pLimit->limit <= (pLimitInfo->numOfOutputRows + pBlock->info.rows)) {
// limit the output rows
int32_t overflowRows = pLimitInfo->numOfOutputRows + pBlock->info.rows - pLimit->limit;
int32_t keep = pBlock->info.rows - overflowRows;
blockDataKeepFirstNRows(pBlock, keep);
qDebug("output limit %"PRId64" has reached, %s", pLimit->limit, GET_TASKID(pTaskInfo));
// setTaskStatus(pTaskInfo, TASK_COMPLETED);
pOperator->status = OP_EXEC_DONE;
}
}
static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableScanInfo, SSDataBlock* pBlock,
uint32_t* status) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
......@@ -364,6 +392,7 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca
pCost->totalBlocks += 1;
pCost->totalRows += pBlock->info.rows;
bool loadSMA = false;
*status = pInfo->dataBlockLoadFlag;
......@@ -379,6 +408,7 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca
qDebug("%s data block filter out, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_TASKID(pTaskInfo),
pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
pCost->filterOutBlocks += 1;
pCost->totalRows += pBlock->info.rows;
return TSDB_CODE_SUCCESS;
} else if (*status == FUNC_DATA_REQUIRED_NOT_LOAD) {
qDebug("%s data block skipped, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_TASKID(pTaskInfo),
......@@ -446,6 +476,9 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca
relocateColumnData(pBlock, pTableScanInfo->matchInfo.pList, pCols, true);
doSetTagColumnData(pTableScanInfo, pBlock, pTaskInfo);
// restore the previous value
pCost->totalRows -= pBlock->info.rows;
if (pTableScanInfo->pFilterNode != NULL) {
int64_t st = taosGetTimestampUs();
doFilter(pTableScanInfo->pFilterNode, pBlock, &pTableScanInfo->matchInfo, pOperator->exprSupp.pFilterInfo);
......@@ -462,6 +495,10 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca
}
}
applyLimitOffset(&pInfo->limitInfo, pBlock, pTaskInfo, pOperator);
pCost->totalRows += pBlock->info.rows;
pInfo->limitInfo.numOfOutputRows = pCost->totalRows;
return TSDB_CODE_SUCCESS;
}
......@@ -691,10 +728,6 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) {
// if scan table by table
if (pInfo->scanMode == TABLE_SCAN__TABLE_ORDER) {
if (pInfo->noTable) {
return NULL;
}
int32_t numOfTables = taosArrayGetSize(pTaskInfo->tableqinfoList.pTableList);
while (1) {
......@@ -727,7 +760,6 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) {
}
SArray* tableList = taosArrayGetP(pTaskInfo->tableqinfoList.pGroupList, pInfo->currentGroupId);
tsdbReaderClose(pInfo->dataReader);
int32_t code = tsdbReaderOpen(pInfo->readHandle.vnode, &pInfo->cond, tableList, (STsdbReader**)&pInfo->dataReader,
......@@ -749,9 +781,6 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) {
return NULL;
}
SArray* tableList = taosArrayGetP(pTaskInfo->tableqinfoList.pGroupList, pInfo->currentGroupId);
// tsdbSetTableList(pInfo->dataReader, tableList);
tsdbReaderReset(pInfo->dataReader, &pInfo->cond);
pInfo->scanTimes = 0;
......@@ -798,9 +827,15 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode,
}
SDataBlockDescNode* pDescNode = pTableScanNode->scan.node.pOutputDataBlockDesc;
int32_t numOfCols = 0;
int32_t code = extractColMatchInfo(pTableScanNode->scan.pScanCols, pDescNode, &numOfCols, COL_MATCH_FROM_COL_ID,
&pInfo->matchInfo);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo);
code = initQueryTableDataCond(&pInfo->cond, pTableScanNode);
if (code != TSDB_CODE_SUCCESS) {
......@@ -825,6 +860,9 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode,
if (pInfo->pFilterNode != NULL) {
code = filterInitFromNode((SNode*)pInfo->pFilterNode, &pOperator->exprSupp.pFilterInfo, 0);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
}
pInfo->scanFlag = MAIN_SCAN;
......@@ -847,10 +885,12 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode,
return pOperator;
_error:
taosMemoryFreeClear(pInfo);
taosMemoryFreeClear(pOperator);
if (pInfo != NULL) {
destroyTableScanOperatorInfo(pInfo);
}
pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY;
taosMemoryFreeClear(pOperator);
pTaskInfo->code = code;
return NULL;
}
......@@ -4439,6 +4479,9 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock*
}
qDebug("%s get sorted row blocks, rows:%d", GET_TASKID(pTaskInfo), pResBlock->info.rows);
applyLimitOffset(&pInfo->limitInfo, pResBlock, pTaskInfo, pOperator);
pInfo->limitInfo.numOfOutputRows += pResBlock->info.rows;
return (pResBlock->info.rows > 0) ? pResBlock : NULL;
}
......@@ -4454,6 +4497,7 @@ SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) {
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, code);
}
size_t tableListSize = taosArrayGetSize(pInfo->tableListInfo->pTableList);
if (!pInfo->hasGroupId) {
pInfo->hasGroupId = true;
......@@ -4466,6 +4510,7 @@ SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) {
pInfo->groupId = ((STableKeyInfo*)taosArrayGet(pInfo->tableListInfo->pTableList, pInfo->tableStartIndex))->groupId;
startGroupTableMergeScan(pOperator);
}
SSDataBlock* pBlock = NULL;
while (pInfo->tableStartIndex < tableListSize) {
pBlock = getSortedTableMergeScanBlockData(pInfo->pSortHandle, pInfo->pResBlock, pOperator->resultInfo.capacity,
......@@ -4553,6 +4598,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN
if (pInfo == NULL || pOperator == NULL) {
goto _error;
}
if (pTableScanNode->pGroupTags) {
taosArraySort(pTableListInfo->pTableList, compareTableKeyInfoByGid);
}
......@@ -4591,6 +4637,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN
pInfo->pSortInfo = generateSortByTsInfo(pInfo->matchInfo.pList, pInfo->cond.order);
pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false);
initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo);
int32_t rowSize = pInfo->pResBlock->info.rowSize;
pInfo->bufPageSize = getProperSortPageSize(rowSize);
......
......@@ -531,7 +531,7 @@ typedef struct SMultiwayMergeOperatorInfo {
SOptrBasicInfo binfo;
int32_t bufPageSize;
uint32_t sortBufSize; // max buffer size for in-memory sort
SLimitInfo limitInfo;
SArray* pSortInfo;
SSortHandle* pSortHandle;
SColMatchInfo matchInfo;
......@@ -592,6 +592,7 @@ SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pData
blockDataEnsureCapacity(p, capacity);
_retry:
while (1) {
STupleHandle* pTupleHandle = NULL;
if (pInfo->groupSort) {
......@@ -626,14 +627,22 @@ SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pData
} else {
appendOneRowToDataBlock(p, pTupleHandle);
}
if (p->info.rows >= capacity) {
break;
}
}
if (pInfo->groupSort) {
pInfo->hasGroupId = false;
}
if (p->info.rows > 0) { // todo extract method
applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo, pOperator);
if (p->info.rows == 0) {
goto _retry;
}
blockDataEnsureCapacity(pDataBlock, p->info.rows);
int32_t numOfCols = taosArrayGetSize(pColMatchInfo);
for (int32_t i = 0; i < numOfCols; ++i) {
......@@ -650,9 +659,9 @@ SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pData
}
blockDataDestroy(p);
qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%d", GET_TASKID(pTaskInfo), pDataBlock->info.groupId,
pDataBlock->info.rows);
return (pDataBlock->info.rows > 0) ? pDataBlock : NULL;
}
......@@ -717,6 +726,7 @@ SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size
goto _error;
}
initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo);
pInfo->binfo.pRes = createResDataBlock(pDescNode);
int32_t rowSize = pInfo->binfo.pRes->info.rowSize;
ASSERT(rowSize < 100 * 1024 * 1024);
......@@ -725,6 +735,10 @@ SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size
int32_t numOfOutputCols = 0;
code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, &pInfo->matchInfo);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0);
SSDataBlock* pInputBlock = createResDataBlock(pChildNode->pOutputDataBlockDesc);
initResultSizeInfo(&pOperator->resultInfo, 1024);
......
......@@ -2498,7 +2498,7 @@ static const SOptimizeRule optimizeRuleSet[] = {
{.pName = "RewriteUnique", .optimizeFunc = rewriteUniqueOptimize},
{.pName = "LastRowScan", .optimizeFunc = lastRowScanOptimize},
{.pName = "TagScan", .optimizeFunc = tagScanOptimize},
// {.pName = "PushDownLimit", .optimizeFunc = pushDownLimitOptimize}
{.pName = "PushDownLimit", .optimizeFunc = pushDownLimitOptimize}
};
// clang-format on
......
......@@ -97,6 +97,7 @@ static int32_t splCreateExchangeNode(SSplitContext* pCxt, SLogicNode* pChild, SE
if (NULL == pExchange->node.pLimit) {
return TSDB_CODE_OUT_OF_MEMORY;
}
((SLimitNode*)pChild->pLimit)->limit += ((SLimitNode*)pChild->pLimit)->offset;
((SLimitNode*)pChild->pLimit)->offset = 0;
}
......@@ -470,6 +471,12 @@ static int32_t stbSplCreateMergeNode(SSplitContext* pCxt, SLogicSubplan* pSubpla
if (NULL == pMerge->node.pTargets || NULL == pMerge->pInputs) {
code = TSDB_CODE_OUT_OF_MEMORY;
}
if (TSDB_CODE_SUCCESS == code && NULL != pSplitNode->pLimit) {
pMerge->node.pLimit = nodesCloneNode(pSplitNode->pLimit);
if (NULL == pMerge->node.pLimit) {
code = TSDB_CODE_OUT_OF_MEMORY;
}
}
if (TSDB_CODE_SUCCESS == code) {
if (NULL == pSubplan) {
code = nodesListMakeAppend(&pSplitNode->pChildren, (SNode*)pMerge);
......@@ -934,6 +941,7 @@ static int32_t stbSplSplitScanNodeWithoutPartTags(SSplitContext* pCxt, SStableSp
if (NULL == pSplitNode->pLimit) {
return TSDB_CODE_OUT_OF_MEMORY;
}
((SLimitNode*)pInfo->pSplitNode->pLimit)->limit += ((SLimitNode*)pInfo->pSplitNode->pLimit)->offset;
((SLimitNode*)pInfo->pSplitNode->pLimit)->offset = 0;
}
}
......@@ -1021,6 +1029,10 @@ static int32_t stbSplSplitMergeScanNode(SSplitContext* pCxt, SLogicSubplan* pSub
SNodeList* pMergeKeys = NULL;
int32_t code = stbSplCreateMergeScanNode(pScan, &pMergeScan, &pMergeKeys);
if (TSDB_CODE_SUCCESS == code) {
if (NULL != pMergeScan->pLimit) {
((SLimitNode*)pMergeScan->pLimit)->limit += ((SLimitNode*)pMergeScan->pLimit)->offset;
((SLimitNode*)pMergeScan->pLimit)->offset = 0;
}
code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort);
}
if (TSDB_CODE_SUCCESS == code) {
......
......@@ -39,9 +39,9 @@ endi
if $data01 != 1 then
return -1
endi
if $data41 != 5 then
return -1
endi
#if $data41 != 5 then
# return -1
#endi
sql select * from $stb order by ts desc limit 5
if $rows != 5 then
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册