executorimpl.c 148.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
15

H
Haojun Liao 已提交
16 17
#include "filter.h"
#include "function.h"
18 19
#include "functionMgt.h"
#include "os.h"
H
Haojun Liao 已提交
20
#include "querynodes.h"
21
#include "tfill.h"
dengyihao's avatar
dengyihao 已提交
22
#include "tname.h"
X
Xiaoyu Wang 已提交
23
#include "tref.h"
24

H
Haojun Liao 已提交
25
#include "tdatablock.h"
26
#include "tglobal.h"
H
Haojun Liao 已提交
27
#include "tmsg.h"
H
Haojun Liao 已提交
28
#include "tsort.h"
29
#include "ttime.h"
H
Haojun Liao 已提交
30

31
#include "executorimpl.h"
dengyihao's avatar
dengyihao 已提交
32
#include "index.h"
33
#include "query.h"
34 35
#include "tcompare.h"
#include "tcompression.h"
H
Haojun Liao 已提交
36
#include "thash.h"
37
#include "ttypes.h"
dengyihao's avatar
dengyihao 已提交
38
#include "vnode.h"
39

H
Haojun Liao 已提交
40
#define IS_MAIN_SCAN(runtime)          ((runtime)->scanFlag == MAIN_SCAN)
41 42 43 44 45 46
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)

#define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP)

#if 0
static UNUSED_FUNC void *u_malloc (size_t __size) {
wafwerar's avatar
wafwerar 已提交
47
  uint32_t v = taosRand();
48 49 50 51

  if (v % 1000 <= 0) {
    return NULL;
  } else {
wafwerar's avatar
wafwerar 已提交
52
    return taosMemoryMalloc(__size);
53 54 55 56
  }
}

static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
wafwerar's avatar
wafwerar 已提交
57
  uint32_t v = taosRand();
58 59 60
  if (v % 1000 <= 0) {
    return NULL;
  } else {
wafwerar's avatar
wafwerar 已提交
61
    return taosMemoryCalloc(num, __size);
62 63 64 65
  }
}

static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
wafwerar's avatar
wafwerar 已提交
66
  uint32_t v = taosRand();
67 68 69
  if (v % 5 <= 1) {
    return NULL;
  } else {
wafwerar's avatar
wafwerar 已提交
70
    return taosMemoryRealloc(p, __size);
71 72 73 74 75 76 77 78
  }
}

#define calloc  u_calloc
#define malloc  u_malloc
#define realloc u_realloc
#endif

X
Xiaoyu Wang 已提交
79
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
80 81
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)

L
Liu Jicong 已提交
82 83
int32_t getMaximumIdleDurationSec() { return tsShellActivityTimer * 2; }

84
static void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExpr, SSDataBlock* pBlock);
85

X
Xiaoyu Wang 已提交
86
static void releaseQueryBuf(size_t numOfTables);
87

88 89
static void destroyFillOperatorInfo(void* param);
static void destroyProjectOperatorInfo(void* param);
H
Haojun Liao 已提交
90
static void destroySortOperatorInfo(void* param);
91
static void destroyAggOperatorInfo(void* param);
X
Xiaoyu Wang 已提交
92

93 94
static void destroyIntervalOperatorInfo(void* param);
static void destroyExchangeOperatorInfo(void* param);
H
Haojun Liao 已提交
95

96 97
static void destroyOperatorInfo(SOperatorInfo* pOperator);

98
void doSetOperatorCompleted(SOperatorInfo* pOperator) {
99
  pOperator->status = OP_EXEC_DONE;
H
Haojun Liao 已提交
100
  ASSERT(pOperator->pTaskInfo != NULL);
101

102
  pOperator->cost.totalCost = (taosGetTimestampUs() - pOperator->pTaskInfo->cost.start * 1000) / 1000.0;
H
Haojun Liao 已提交
103
  setTaskStatus(pOperator->pTaskInfo, TASK_COMPLETED);
104
}
105

H
Haojun Liao 已提交
106
int32_t operatorDummyOpenFn(SOperatorInfo* pOperator) {
107
  OPTR_SET_OPENED(pOperator);
108
  pOperator->cost.openCost = 0;
H
Haojun Liao 已提交
109
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
110 111
}

112
SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t streamFn,
113
                                   __optr_fn_t cleanup, __optr_close_fn_t closeFn, __optr_explain_fn_t explain) {
114 115 116 117 118 119 120 121 122 123 124
  SOperatorFpSet fpSet = {
      ._openFn = openFn,
      .getNextFn = nextFn,
      .cleanupFn = cleanup,
      .closeFn = closeFn,
      .getExplainFn = explain,
  };

  return fpSet;
}

125 126
static int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf,
                                  SGroupResInfo* pGroupResInfo);
H
Haojun Liao 已提交
127

128
static void initCtxOutputBuffer(SqlFunctionCtx* pCtx, int32_t size);
129
static void doSetTableGroupOutputBuf(SOperatorInfo* pOperator, int32_t numOfOutput, uint64_t groupId);
130

131
#if 0
L
Liu Jicong 已提交
132 133
static bool chkResultRowFromKey(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, char* pData,
                                int16_t bytes, bool masterscan, uint64_t uid) {
134 135 136
  bool existed = false;
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);

L
Liu Jicong 已提交
137 138
  SResultRow** p1 =
      (SResultRow**)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
139 140 141 142 143 144 145 146 147 148 149

  // in case of repeat scan/reverse scan, no new time window added.
  if (QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQueryAttr)) {
    if (!masterscan) {  // the *p1 may be NULL in case of sliding+offset exists.
      return p1 != NULL;
    }

    if (p1 != NULL) {
      if (pResultRowInfo->size == 0) {
        existed = false;
      } else if (pResultRowInfo->size == 1) {
dengyihao's avatar
dengyihao 已提交
150
        //        existed = (pResultRowInfo->pResult[0] == (*p1));
151 152
      } else {  // check if current pResultRowInfo contains the existed pResultRow
        SET_RES_EXT_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid, pResultRowInfo);
L
Liu Jicong 已提交
153 154
        int64_t* index =
            taosHashGet(pRuntimeEnv->pResultRowListSet, pRuntimeEnv->keyBuf, GET_RES_EXT_WINDOW_KEY_LEN(bytes));
155 156 157 158 159 160 161 162 163 164 165 166 167
        if (index != NULL) {
          existed = true;
        } else {
          existed = false;
        }
      }
    }

    return existed;
  }

  return p1 != NULL;
}
168
#endif
169

170
SResultRow* getNewResultRow(SDiskbasedBuf* pResultBuf, int32_t* currentPageId, int32_t interBufSize) {
L
Liu Jicong 已提交
171
  SFilePage* pData = NULL;
172 173 174

  // in the first scan, new space needed for results
  int32_t pageId = -1;
175
  if (*currentPageId == -1) {
176
    pData = getNewBufPage(pResultBuf, &pageId);
177 178
    pData->num = sizeof(SFilePage);
  } else {
179 180
    pData = getBufPage(pResultBuf, *currentPageId);
    pageId = *currentPageId;
181

wmmhello's avatar
wmmhello 已提交
182
    if (pData->num + interBufSize > getBufPageSize(pResultBuf)) {
183
      // release current page first, and prepare the next one
184
      releaseBufPage(pResultBuf, pData);
185

186
      pData = getNewBufPage(pResultBuf, &pageId);
187 188 189 190 191 192 193 194 195 196
      if (pData != NULL) {
        pData->num = sizeof(SFilePage);
      }
    }
  }

  if (pData == NULL) {
    return NULL;
  }

197 198
  setBufPageDirty(pData, true);

199 200 201 202
  // set the number of rows in current disk page
  SResultRow* pResultRow = (SResultRow*)((char*)pData + pData->num);
  pResultRow->pageId = pageId;
  pResultRow->offset = (int32_t)pData->num;
203
  *currentPageId = pageId;
204

wmmhello's avatar
wmmhello 已提交
205
  pData->num += interBufSize;
206 207 208
  return pResultRow;
}

209 210 211 212 213 214 215
/**
 * the struct of key in hash table
 * +----------+---------------+
 * | group id |   key data    |
 * | 8 bytes  | actual length |
 * +----------+---------------+
 */
216 217 218
SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pResultRowInfo, char* pData,
                                   int16_t bytes, bool masterscan, uint64_t groupId, SExecTaskInfo* pTaskInfo,
                                   bool isIntervalQuery, SAggSupporter* pSup) {
219
  SET_RES_WINDOW_KEY(pSup->keyBuf, pData, bytes, groupId);
H
Haojun Liao 已提交
220

dengyihao's avatar
dengyihao 已提交
221
  SResultRowPosition* p1 =
222
      (SResultRowPosition*)tSimpleHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
H
Haojun Liao 已提交
223

224 225
  SResultRow* pResult = NULL;

H
Haojun Liao 已提交
226 227
  // in case of repeat scan/reverse scan, no new time window added.
  if (isIntervalQuery) {
228
    if (masterscan && p1 != NULL) {  // the *p1 may be NULL in case of sliding+offset exists.
229
      pResult = getResultRowByPos(pResultBuf, p1, true);
230
      ASSERT(pResult->pageId == p1->pageId && pResult->offset == p1->offset);
H
Haojun Liao 已提交
231 232
    }
  } else {
dengyihao's avatar
dengyihao 已提交
233 234
    // In case of group by column query, the required SResultRow object must be existInCurrentResusltRowInfo in the
    // pResultRowInfo object.
H
Haojun Liao 已提交
235
    if (p1 != NULL) {
236
      // todo
237
      pResult = getResultRowByPos(pResultBuf, p1, true);
238
      ASSERT(pResult->pageId == p1->pageId && pResult->offset == p1->offset);
H
Haojun Liao 已提交
239 240 241
    }
  }

L
Liu Jicong 已提交
242
  // 1. close current opened time window
243
  if (pResultRowInfo->cur.pageId != -1 && ((pResult == NULL) || (pResult->pageId != pResultRowInfo->cur.pageId))) {
244
    SResultRowPosition pos = pResultRowInfo->cur;
X
Xiaoyu Wang 已提交
245
    SFilePage*         pPage = getBufPage(pResultBuf, pos.pageId);
246 247 248 249 250
    releaseBufPage(pResultBuf, pPage);
  }

  // allocate a new buffer page
  if (pResult == NULL) {
H
Haojun Liao 已提交
251
    ASSERT(pSup->resultRowSize > 0);
252
    pResult = getNewResultRow(pResultBuf, &pSup->currentPageId, pSup->resultRowSize);
253

254 255
    // add a new result set for a new group
    SResultRowPosition pos = {.pageId = pResult->pageId, .offset = pResult->offset};
256
    tSimpleHashPut(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pos,
L
Liu Jicong 已提交
257
                   sizeof(SResultRowPosition));
H
Haojun Liao 已提交
258 259
  }

260 261 262
  // 2. set the new time window to be the new active time window
  pResultRowInfo->cur = (SResultRowPosition){.pageId = pResult->pageId, .offset = pResult->offset};

H
Haojun Liao 已提交
263
  // too many time window in query
264
  if (pTaskInfo->execModel == OPTR_EXEC_MODEL_BATCH &&
265
      tSimpleHashGetSize(pSup->pResultRowHashTable) > MAX_INTERVAL_TIME_WINDOW) {
266
    T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
H
Haojun Liao 已提交
267 268
  }

H
Haojun Liao 已提交
269
  return pResult;
H
Haojun Liao 已提交
270 271
}

272
// a new buffer page for each table. Needs to opt this design
L
Liu Jicong 已提交
273
static int32_t addNewWindowResultBuf(SResultRow* pWindowRes, SDiskbasedBuf* pResultBuf, int32_t tid, uint32_t size) {
274 275 276 277
  if (pWindowRes->pageId != -1) {
    return 0;
  }

L
Liu Jicong 已提交
278
  SFilePage* pData = NULL;
279 280 281

  // in the first scan, new space needed for results
  int32_t pageId = -1;
282
  SIDList list = getDataBufPagesIdList(pResultBuf);
283 284

  if (taosArrayGetSize(list) == 0) {
285
    pData = getNewBufPage(pResultBuf, &pageId);
286
    pData->num = sizeof(SFilePage);
287 288
  } else {
    SPageInfo* pi = getLastPageInfo(list);
289
    pData = getBufPage(pResultBuf, getPageId(pi));
290
    pageId = getPageId(pi);
291

292
    if (pData->num + size > getBufPageSize(pResultBuf)) {
293
      // release current page first, and prepare the next one
294
      releaseBufPageInfo(pResultBuf, pi);
295

296
      pData = getNewBufPage(pResultBuf, &pageId);
297
      if (pData != NULL) {
298
        pData->num = sizeof(SFilePage);
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
      }
    }
  }

  if (pData == NULL) {
    return -1;
  }

  // set the number of rows in current disk page
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->offset = (int32_t)pData->num;

    pData->num += size;
    assert(pWindowRes->pageId >= 0);
  }

  return 0;
}

319
//  query_range_start, query_range_end, window_duration, window_start, window_end
320
void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow) {
321 322 323
  pColData->info.type = TSDB_DATA_TYPE_TIMESTAMP;
  pColData->info.bytes = sizeof(int64_t);

H
Haojun Liao 已提交
324
  colInfoDataEnsureCapacity(pColData, 5, false);
325 326 327 328 329 330 331 332 333
  colDataAppendInt64(pColData, 0, &pQueryWindow->skey);
  colDataAppendInt64(pColData, 1, &pQueryWindow->ekey);

  int64_t interval = 0;
  colDataAppendInt64(pColData, 2, &interval);  // this value may be variable in case of 'n' and 'y'.
  colDataAppendInt64(pColData, 3, &pQueryWindow->skey);
  colDataAppendInt64(pColData, 4, &pQueryWindow->ekey);
}

L
Liu Jicong 已提交
334
void cleanupExecTimeWindowInfo(SColumnInfoData* pColData) { colDataDestroy(pColData); }
H
Haojun Liao 已提交
335

336 337 338 339 340 341 342 343 344 345 346 347 348 349
typedef struct {
  bool    hasAgg;
  int32_t numOfRows;
  int32_t startOffset;
} SFunctionCtxStatus;

static void functionCtxSave(SqlFunctionCtx* pCtx, SFunctionCtxStatus* pStatus) {
  pStatus->hasAgg = pCtx->input.colDataAggIsSet;
  pStatus->numOfRows = pCtx->input.numOfRows;
  pStatus->startOffset = pCtx->input.startRowIndex;
}

static void functionCtxRestore(SqlFunctionCtx* pCtx, SFunctionCtxStatus* pStatus) {
  pCtx->input.colDataAggIsSet = pStatus->hasAgg;
H
Haojun Liao 已提交
350
  pCtx->input.numOfRows = pStatus->numOfRows;
351 352 353 354 355
  pCtx->input.startRowIndex = pStatus->startOffset;
}

void doApplyFunctions(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, SColumnInfoData* pTimeWindowData, int32_t offset,
                      int32_t forwardStep, int32_t numOfTotal, int32_t numOfOutput) {
356
  for (int32_t k = 0; k < numOfOutput; ++k) {
H
Haojun Liao 已提交
357
    // keep it temporarily
358 359
    SFunctionCtxStatus status = {0};
    functionCtxSave(&pCtx[k], &status);
360

361
    pCtx[k].input.startRowIndex = offset;
362
    pCtx[k].input.numOfRows = forwardStep;
363 364 365

    // not a whole block involved in query processing, statistics data can not be used
    // NOTE: the original value of isSet have been changed here
366 367
    if (pCtx[k].input.colDataAggIsSet && forwardStep < numOfTotal) {
      pCtx[k].input.colDataAggIsSet = false;
368 369
    }

370 371
    if (fmIsWindowPseudoColumnFunc(pCtx[k].functionId)) {
      SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(&pCtx[k]);
372 373

      char* p = GET_ROWCELL_INTERBUF(pEntryInfo);
374

375
      SColumnInfoData idata = {0};
dengyihao's avatar
dengyihao 已提交
376
      idata.info.type = TSDB_DATA_TYPE_BIGINT;
377
      idata.info.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes;
dengyihao's avatar
dengyihao 已提交
378
      idata.pData = p;
379 380 381 382

      SScalarParam out = {.columnData = &idata};
      SScalarParam tw = {.numOfRows = 5, .columnData = pTimeWindowData};
      pCtx[k].sfp.process(&tw, 1, &out);
383
      pEntryInfo->numOfRes = 1;
384 385 386 387 388 389 390 391
    } else {
      int32_t code = TSDB_CODE_SUCCESS;
      if (functionNeedToExecute(&pCtx[k]) && pCtx[k].fpSet.process != NULL) {
        code = pCtx[k].fpSet.process(&pCtx[k]);

        if (code != TSDB_CODE_SUCCESS) {
          qError("%s apply functions error, code: %s", GET_TASKID(taskInfo), tstrerror(code));
          taskInfo->code = code;
392
          T_LONG_JMP(taskInfo->env, code);
393
        }
394
      }
395

396
      // restore it
397
      functionCtxRestore(&pCtx[k], &status);
398
    }
399 400 401
  }
}

402 403
static int32_t doSetInputDataBlock(SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t order, int32_t scanFlag,
                                   bool createDummyCol);
404

405 406 407
static void doSetInputDataBlockInfo(SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t order) {
  SqlFunctionCtx* pCtx = pExprSup->pCtx;
  for (int32_t i = 0; i < pExprSup->numOfExprs; ++i) {
408
    pCtx[i].order = order;
409
    pCtx[i].input.numOfRows = pBlock->info.rows;
410
    setBlockSMAInfo(&pCtx[i], &pExprSup->pExprInfo[i], pBlock);
411
    pCtx[i].pSrcBlock = pBlock;
412 413 414
  }
}

415
void setInputDataBlock(SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t order, int32_t scanFlag, bool createDummyCol) {
416
  if (pBlock->pBlockAgg != NULL) {
417
    doSetInputDataBlockInfo(pExprSup, pBlock, order);
418
  } else {
419
    doSetInputDataBlock(pExprSup, pBlock, order, scanFlag, createDummyCol);
H
Haojun Liao 已提交
420
  }
421 422
}

L
Liu Jicong 已提交
423 424
static int32_t doCreateConstantValColumnInfo(SInputColumnInfoData* pInput, SFunctParam* pFuncParam, int32_t paramIndex,
                                             int32_t numOfRows) {
425 426 427 428 429 430 431 432
  SColumnInfoData* pColInfo = NULL;
  if (pInput->pData[paramIndex] == NULL) {
    pColInfo = taosMemoryCalloc(1, sizeof(SColumnInfoData));
    if (pColInfo == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }

    // Set the correct column info (data type and bytes)
433 434
    pColInfo->info.type = pFuncParam->param.nType;
    pColInfo->info.bytes = pFuncParam->param.nLen;
435 436

    pInput->pData[paramIndex] = pColInfo;
437 438
  } else {
    pColInfo = pInput->pData[paramIndex];
439 440
  }

H
Haojun Liao 已提交
441
  colInfoDataEnsureCapacity(pColInfo, numOfRows, false);
442

443
  int8_t type = pFuncParam->param.nType;
444 445
  if (type == TSDB_DATA_TYPE_BIGINT || type == TSDB_DATA_TYPE_UBIGINT) {
    int64_t v = pFuncParam->param.i;
dengyihao's avatar
dengyihao 已提交
446
    for (int32_t i = 0; i < numOfRows; ++i) {
447 448 449 450
      colDataAppendInt64(pColInfo, i, &v);
    }
  } else if (type == TSDB_DATA_TYPE_DOUBLE) {
    double v = pFuncParam->param.d;
dengyihao's avatar
dengyihao 已提交
451
    for (int32_t i = 0; i < numOfRows; ++i) {
452 453
      colDataAppendDouble(pColInfo, i, &v);
    }
454
  } else if (type == TSDB_DATA_TYPE_VARCHAR) {
L
Liu Jicong 已提交
455
    char* tmp = taosMemoryMalloc(pFuncParam->param.nLen + VARSTR_HEADER_SIZE);
456
    STR_WITH_SIZE_TO_VARSTR(tmp, pFuncParam->param.pz, pFuncParam->param.nLen);
L
Liu Jicong 已提交
457
    for (int32_t i = 0; i < numOfRows; ++i) {
458 459
      colDataAppend(pColInfo, i, tmp, false);
    }
H
Haojun Liao 已提交
460
    taosMemoryFree(tmp);
461 462 463 464 465
  }

  return TSDB_CODE_SUCCESS;
}

466 467
static int32_t doSetInputDataBlock(SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t order, int32_t scanFlag,
                                   bool createDummyCol) {
468
  int32_t         code = TSDB_CODE_SUCCESS;
469
  SqlFunctionCtx* pCtx = pExprSup->pCtx;
470

471
  for (int32_t i = 0; i < pExprSup->numOfExprs; ++i) {
L
Liu Jicong 已提交
472
    pCtx[i].order = order;
473 474
    pCtx[i].input.numOfRows = pBlock->info.rows;

L
Liu Jicong 已提交
475
    pCtx[i].pSrcBlock = pBlock;
X
Xiaoyu Wang 已提交
476
    pCtx[i].scanFlag = scanFlag;
H
Haojun Liao 已提交
477

478
    SInputColumnInfoData* pInput = &pCtx[i].input;
479
    pInput->uid = pBlock->info.uid;
C
Cary Xu 已提交
480
    pInput->colDataAggIsSet = false;
481

482
    SExprInfo* pOneExpr = &pExprSup->pExprInfo[i];
483
    for (int32_t j = 0; j < pOneExpr->base.numOfParams; ++j) {
dengyihao's avatar
dengyihao 已提交
484
      SFunctParam* pFuncParam = &pOneExpr->base.pParam[j];
G
Ganlin Zhao 已提交
485 486
      if (pFuncParam->type == FUNC_PARAM_TYPE_COLUMN) {
        int32_t slotId = pFuncParam->pCol->slotId;
dengyihao's avatar
dengyihao 已提交
487
        pInput->pData[j] = taosArrayGet(pBlock->pDataBlock, slotId);
488 489 490
        pInput->totalRows = pBlock->info.rows;
        pInput->numOfRows = pBlock->info.rows;
        pInput->startRowIndex = 0;
491

492
        // NOTE: the last parameter is the primary timestamp column
H
Haojun Liao 已提交
493
        // todo: refactor this
494
        if (fmIsImplicitTsFunc(pCtx[i].functionId) && (j == pOneExpr->base.numOfParams - 1)) {
L
Liu Jicong 已提交
495
          pInput->pPTS = pInput->pData[j];  // in case of merge function, this is not always the ts column data.
496
          //          ASSERT(pInput->pPTS->info.type == TSDB_DATA_TYPE_TIMESTAMP);
497
        }
498 499
        ASSERT(pInput->pData[j] != NULL);
      } else if (pFuncParam->type == FUNC_PARAM_TYPE_VALUE) {
500 501 502
        // todo avoid case: top(k, 12), 12 is the value parameter.
        // sum(11), 11 is also the value parameter.
        if (createDummyCol && pOneExpr->base.numOfParams == 1) {
503 504 505 506
          pInput->totalRows = pBlock->info.rows;
          pInput->numOfRows = pBlock->info.rows;
          pInput->startRowIndex = 0;

507
          code = doCreateConstantValColumnInfo(pInput, pFuncParam, j, pBlock->info.rows);
508 509 510
          if (code != TSDB_CODE_SUCCESS) {
            return code;
          }
511
        }
G
Ganlin Zhao 已提交
512 513
      }
    }
H
Haojun Liao 已提交
514
  }
515 516

  return code;
H
Haojun Liao 已提交
517 518
}

519
static int32_t doAggregateImpl(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx) {
520
  for (int32_t k = 0; k < pOperator->exprSupp.numOfExprs; ++k) {
H
Haojun Liao 已提交
521
    if (functionNeedToExecute(&pCtx[k])) {
522
      // todo add a dummy funtion to avoid process check
523 524 525
      if (pCtx[k].fpSet.process == NULL) {
        continue;
      }
H
Haojun Liao 已提交
526

527 528 529 530
      int32_t code = pCtx[k].fpSet.process(&pCtx[k]);
      if (code != TSDB_CODE_SUCCESS) {
        qError("%s aggregate function error happens, code: %s", GET_TASKID(pOperator->pTaskInfo), tstrerror(code));
        return code;
531
      }
532 533
    }
  }
534 535

  return TSDB_CODE_SUCCESS;
536 537
}

H
Haojun Liao 已提交
538
static void setPseudoOutputColInfo(SSDataBlock* pResult, SqlFunctionCtx* pCtx, SArray* pPseudoList) {
dengyihao's avatar
dengyihao 已提交
539
  size_t num = (pPseudoList != NULL) ? taosArrayGetSize(pPseudoList) : 0;
H
Haojun Liao 已提交
540 541 542 543 544
  for (int32_t i = 0; i < num; ++i) {
    pCtx[i].pOutput = taosArrayGet(pResult->pDataBlock, i);
  }
}

545
int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBlock* pSrcBlock, SqlFunctionCtx* pCtx,
X
Xiaoyu Wang 已提交
546
                              int32_t numOfOutput, SArray* pPseudoList) {
H
Haojun Liao 已提交
547
  setPseudoOutputColInfo(pResult, pCtx, pPseudoList);
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567

  if (pSrcBlock == NULL) {
    for (int32_t k = 0; k < numOfOutput; ++k) {
      int32_t outputSlotId = pExpr[k].base.resSchema.slotId;

      ASSERT(pExpr[k].pExpr->nodeType == QUERY_NODE_VALUE);
      SColumnInfoData* pColInfoData = taosArrayGet(pResult->pDataBlock, outputSlotId);

      int32_t type = pExpr[k].base.pParam[0].param.nType;
      if (TSDB_DATA_TYPE_NULL == type) {
        colDataAppendNNULL(pColInfoData, 0, 1);
      } else {
        colDataAppend(pColInfoData, 0, taosVariantGet(&pExpr[k].base.pParam[0].param, type), false);
      }
    }

    pResult->info.rows = 1;
    return TSDB_CODE_SUCCESS;
  }

L
Liu Jicong 已提交
568 569 570 571
  if (pResult != pSrcBlock) {
    pResult->info.groupId = pSrcBlock->info.groupId;
    memcpy(pResult->info.parTbName, pSrcBlock->info.parTbName, TSDB_TABLE_NAME_LEN);
  }
H
Haojun Liao 已提交
572

573 574
  // if the source equals to the destination, it is to create a new column as the result of scalar
  // function or some operators.
575
  bool createNewColModel = (pResult == pSrcBlock);
576 577 578
  if (createNewColModel) {
    blockDataEnsureCapacity(pResult, pResult->info.rows);
  }
579

580 581
  int32_t numOfRows = 0;

582
  for (int32_t k = 0; k < numOfOutput; ++k) {
583 584
    int32_t               outputSlotId = pExpr[k].base.resSchema.slotId;
    SqlFunctionCtx*       pfCtx = &pCtx[k];
585
    SInputColumnInfoData* pInputData = &pfCtx->input;
586

L
Liu Jicong 已提交
587
    if (pExpr[k].pExpr->nodeType == QUERY_NODE_COLUMN) {  // it is a project query
588
      SColumnInfoData* pColInfoData = taosArrayGet(pResult->pDataBlock, outputSlotId);
589
      if (pResult->info.rows > 0 && !createNewColModel) {
590
        colDataMergeCol(pColInfoData, pResult->info.rows, (int32_t*)&pResult->info.capacity, pInputData->pData[0],
591
                        pInputData->numOfRows);
592
      } else {
593
        colDataAssign(pColInfoData, pInputData->pData[0], pInputData->numOfRows, &pResult->info);
594
      }
595

596
      numOfRows = pInputData->numOfRows;
597
    } else if (pExpr[k].pExpr->nodeType == QUERY_NODE_VALUE) {
598
      SColumnInfoData* pColInfoData = taosArrayGet(pResult->pDataBlock, outputSlotId);
599

dengyihao's avatar
dengyihao 已提交
600
      int32_t offset = createNewColModel ? 0 : pResult->info.rows;
601 602 603 604 605 606 607 608

      int32_t type = pExpr[k].base.pParam[0].param.nType;
      if (TSDB_DATA_TYPE_NULL == type) {
        colDataAppendNNULL(pColInfoData, offset, pSrcBlock->info.rows);
      } else {
        for (int32_t i = 0; i < pSrcBlock->info.rows; ++i) {
          colDataAppend(pColInfoData, i + offset, taosVariantGet(&pExpr[k].base.pParam[0].param, type), false);
        }
609
      }
610 611

      numOfRows = pSrcBlock->info.rows;
H
Haojun Liao 已提交
612
    } else if (pExpr[k].pExpr->nodeType == QUERY_NODE_OPERATOR) {
613 614 615
      SArray* pBlockList = taosArrayInit(4, POINTER_BYTES);
      taosArrayPush(pBlockList, &pSrcBlock);

616
      SColumnInfoData* pResColData = taosArrayGet(pResult->pDataBlock, outputSlotId);
617
      SColumnInfoData  idata = {.info = pResColData->info, .hasNull = true};
618

619
      SScalarParam dest = {.columnData = &idata};
X
Xiaoyu Wang 已提交
620
      int32_t      code = scalarCalculate(pExpr[k].pExpr->_optrRoot.pRootNode, pBlockList, &dest);
621 622 623 624
      if (code != TSDB_CODE_SUCCESS) {
        taosArrayDestroy(pBlockList);
        return code;
      }
625

dengyihao's avatar
dengyihao 已提交
626
      int32_t startOffset = createNewColModel ? 0 : pResult->info.rows;
627
      ASSERT(pResult->info.capacity > 0);
628

629
      colDataMergeCol(pResColData, startOffset, (int32_t*)&pResult->info.capacity, &idata, dest.numOfRows);
D
dapan1121 已提交
630
      colDataDestroy(&idata);
L
Liu Jicong 已提交
631

632
      numOfRows = dest.numOfRows;
633 634
      taosArrayDestroy(pBlockList);
    } else if (pExpr[k].pExpr->nodeType == QUERY_NODE_FUNCTION) {
635 636
      // _rowts/_c0, not tbname column
      if (fmIsPseudoColumnFunc(pfCtx->functionId) && (!fmIsScanPseudoColumnFunc(pfCtx->functionId))) {
H
Haojun Liao 已提交
637
        // do nothing
638
      } else if (fmIsIndefiniteRowsFunc(pfCtx->functionId)) {
639 640
        SResultRowEntryInfo* pResInfo = GET_RES_INFO(pfCtx);
        pfCtx->fpSet.init(pfCtx, pResInfo);
641 642 643 644 645 646 647 648 649 650

        pfCtx->pOutput = taosArrayGet(pResult->pDataBlock, outputSlotId);
        pfCtx->offset = createNewColModel ? 0 : pResult->info.rows;  // set the start offset

        // set the timestamp(_rowts) output buffer
        if (taosArrayGetSize(pPseudoList) > 0) {
          int32_t* outputColIndex = taosArrayGet(pPseudoList, 0);
          pfCtx->pTsOutput = (SColumnInfoData*)pCtx[*outputColIndex].pOutput;
        }

651 652 653 654 655
        // link pDstBlock to set selectivity value
        if (pfCtx->subsidiaries.num > 0) {
          pfCtx->pDstBlock = pResult;
        }

656
        numOfRows = pfCtx->fpSet.process(pfCtx);
H
Haojun Liao 已提交
657
      } else if (fmIsAggFunc(pfCtx->functionId)) {
G
Ganlin Zhao 已提交
658
        // selective value output should be set during corresponding function execution
659 660 661
        if (fmIsSelectValueFunc(pfCtx->functionId)) {
          continue;
        }
662 663
        // _group_key function for "partition by tbname" + csum(col_name) query
        SColumnInfoData* pOutput = taosArrayGet(pResult->pDataBlock, outputSlotId);
664
        int32_t          slotId = pfCtx->param[0].pCol->slotId;
665 666 667

        // todo handle the json tag
        SColumnInfoData* pInput = taosArrayGet(pSrcBlock->pDataBlock, slotId);
668
        for (int32_t f = 0; f < pSrcBlock->info.rows; ++f) {
669 670 671 672 673 674 675 676 677
          bool isNull = colDataIsNull_s(pInput, f);
          if (isNull) {
            colDataAppendNULL(pOutput, pResult->info.rows + f);
          } else {
            char* data = colDataGetData(pInput, f);
            colDataAppend(pOutput, pResult->info.rows + f, data, isNull);
          }
        }

H
Haojun Liao 已提交
678 679 680
      } else {
        SArray* pBlockList = taosArrayInit(4, POINTER_BYTES);
        taosArrayPush(pBlockList, &pSrcBlock);
G
Ganlin Zhao 已提交
681

682
        SColumnInfoData* pResColData = taosArrayGet(pResult->pDataBlock, outputSlotId);
683
        SColumnInfoData  idata = {.info = pResColData->info, .hasNull = true};
H
Haojun Liao 已提交
684

685
        SScalarParam dest = {.columnData = &idata};
X
Xiaoyu Wang 已提交
686
        int32_t      code = scalarCalculate((SNode*)pExpr[k].pExpr->_function.pFunctNode, pBlockList, &dest);
687 688 689 690
        if (code != TSDB_CODE_SUCCESS) {
          taosArrayDestroy(pBlockList);
          return code;
        }
691

dengyihao's avatar
dengyihao 已提交
692
        int32_t startOffset = createNewColModel ? 0 : pResult->info.rows;
693
        ASSERT(pResult->info.capacity > 0);
694
        colDataMergeCol(pResColData, startOffset, (int32_t*)&pResult->info.capacity, &idata, dest.numOfRows);
D
dapan1121 已提交
695
        colDataDestroy(&idata);
696 697

        numOfRows = dest.numOfRows;
H
Haojun Liao 已提交
698 699
        taosArrayDestroy(pBlockList);
      }
700
    } else {
701
      return TSDB_CODE_OPS_NOT_SUPPORT;
702 703
    }
  }
704

705 706 707
  if (!createNewColModel) {
    pResult->info.rows += numOfRows;
  }
708 709

  return TSDB_CODE_SUCCESS;
710 711
}

5
54liuyao 已提交
712
bool functionNeedToExecute(SqlFunctionCtx* pCtx) {
713
  struct SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
714

715 716 717 718 719
  // in case of timestamp column, always generated results.
  int32_t functionId = pCtx->functionId;
  if (functionId == -1) {
    return false;
  }
720

721 722
  if (pCtx->scanFlag == REPEAT_SCAN) {
    return fmIsRepeatScanFunc(pCtx->functionId);
723 724
  }

725 726
  if (isRowEntryCompleted(pResInfo)) {
    return false;
727 728
  }

729 730 731
  return true;
}

732 733 734 735 736 737 738
static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SFunctParam* pFuncParam, int32_t type,
                                                int32_t paramIndex, int32_t numOfRows) {
  if (pInput->pData[paramIndex] == NULL) {
    pInput->pData[paramIndex] = taosMemoryCalloc(1, sizeof(SColumnInfoData));
    if (pInput->pData[paramIndex] == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }
739

740 741 742
    // Set the correct column info (data type and bytes)
    pInput->pData[paramIndex]->info.type = type;
    pInput->pData[paramIndex]->info.bytes = tDataTypes[type].bytes;
743
  }
H
Haojun Liao 已提交
744

745 746 747 748 749 750
  SColumnDataAgg* da = NULL;
  if (pInput->pColumnDataAgg[paramIndex] == NULL) {
    da = taosMemoryCalloc(1, sizeof(SColumnDataAgg));
    pInput->pColumnDataAgg[paramIndex] = da;
    if (da == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
751 752
    }
  } else {
753
    da = pInput->pColumnDataAgg[paramIndex];
754 755
  }

756
  ASSERT(!IS_VAR_DATA_TYPE(type));
757

758 759
  if (type == TSDB_DATA_TYPE_BIGINT) {
    int64_t v = pFuncParam->param.i;
760
    *da = (SColumnDataAgg){.numOfNull = 0, .min = v, .max = v, .sum = v * numOfRows};
761 762
  } else if (type == TSDB_DATA_TYPE_DOUBLE) {
    double v = pFuncParam->param.d;
763
    *da = (SColumnDataAgg){.numOfNull = 0};
764

765 766 767 768 769 770
    *(double*)&da->min = v;
    *(double*)&da->max = v;
    *(double*)&da->sum = v * numOfRows;
  } else if (type == TSDB_DATA_TYPE_BOOL) {  // todo validate this data type
    bool v = pFuncParam->param.i;

771
    *da = (SColumnDataAgg){.numOfNull = 0};
772 773 774 775 776
    *(bool*)&da->min = 0;
    *(bool*)&da->max = v;
    *(bool*)&da->sum = v * numOfRows;
  } else if (type == TSDB_DATA_TYPE_TIMESTAMP) {
    // do nothing
777
  } else {
778
    ASSERT(0);
779 780
  }

781 782
  return TSDB_CODE_SUCCESS;
}
783

784
void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pBlock) {
785 786 787 788 789 790 791 792 793
  int32_t numOfRows = pBlock->info.rows;

  SInputColumnInfoData* pInput = &pCtx->input;
  pInput->numOfRows = numOfRows;
  pInput->totalRows = numOfRows;

  if (pBlock->pBlockAgg != NULL) {
    pInput->colDataAggIsSet = true;

794 795
    for (int32_t j = 0; j < pExprInfo->base.numOfParams; ++j) {
      SFunctParam* pFuncParam = &pExprInfo->base.pParam[j];
796

797 798
      if (pFuncParam->type == FUNC_PARAM_TYPE_COLUMN) {
        int32_t slotId = pFuncParam->pCol->slotId;
799 800 801 802
        pInput->pColumnDataAgg[j] = pBlock->pBlockAgg[slotId];
        if (pInput->pColumnDataAgg[j] == NULL) {
          pInput->colDataAggIsSet = false;
        }
803 804 805 806

        // Here we set the column info data since the data type for each column data is required, but
        // the data in the corresponding SColumnInfoData will not be used.
        pInput->pData[j] = taosArrayGet(pBlock->pDataBlock, slotId);
807 808
      } else if (pFuncParam->type == FUNC_PARAM_TYPE_VALUE) {
        doCreateConstantValColumnAggInfo(pInput, pFuncParam, pFuncParam->param.nType, j, pBlock->info.rows);
809 810
      }
    }
811
  } else {
812
    pInput->colDataAggIsSet = false;
813 814 815
  }
}

L
Liu Jicong 已提交
816
bool isTaskKilled(SExecTaskInfo* pTaskInfo) {
817 818
  // query has been executed more than tsShellActivityTimer, and the retrieve has not arrived
  // abort current query execution.
L
Liu Jicong 已提交
819 820
  if (pTaskInfo->owner != 0 &&
      ((taosGetTimestampSec() - pTaskInfo->cost.start / 1000) > 10 * getMaximumIdleDurationSec())
821
      /*(!needBuildResAfterQueryComplete(pTaskInfo))*/) {
822
    assert(pTaskInfo->cost.start != 0);
L
Liu Jicong 已提交
823 824 825
    //    qDebug("QInfo:%" PRIu64 " retrieve not arrive beyond %d ms, abort current query execution, start:%" PRId64
    //           ", current:%d", pQInfo->qId, 1, pQInfo->startExecTs, taosGetTimestampSec());
    //    return true;
826 827 828 829 830
  }

  return false;
}

L
Liu Jicong 已提交
831
void setTaskKilled(SExecTaskInfo* pTaskInfo) { pTaskInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
832 833

/////////////////////////////////////////////////////////////////////////////////////////////
834
STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key) {
L
Liu Jicong 已提交
835
  STimeWindow win = {0};
836
  win.skey = taosTimeTruncate(key, pInterval, precision);
837 838

  /*
H
Haojun Liao 已提交
839
   * if the realSkey > INT64_MAX - pInterval->interval, the query duration between
840 841
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
842 843 844
  win.ekey = taosTimeAdd(win.skey, pInterval->interval, pInterval->intervalUnit, precision) - 1;
  if (win.ekey < win.skey) {
    win.ekey = INT64_MAX;
845
  }
846 847

  return win;
848 849
}

L
Liu Jicong 已提交
850 851
int32_t loadDataBlockOnDemand(SExecTaskInfo* pTaskInfo, STableScanInfo* pTableScanInfo, SSDataBlock* pBlock,
                              uint32_t* status) {
852
  *status = BLK_DATA_NOT_LOAD;
853

H
Haojun Liao 已提交
854
  pBlock->pDataBlock = NULL;
L
Liu Jicong 已提交
855
  pBlock->pBlockAgg = NULL;
H
Haojun Liao 已提交
856

L
Liu Jicong 已提交
857 858
  //  int64_t groupId = pRuntimeEnv->current->groupIndex;
  //  bool    ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr);
859

H
Haojun Liao 已提交
860
  STaskCostInfo* pCost = &pTaskInfo->cost;
861

862 863
//  pCost->totalBlocks += 1;
//  pCost->totalRows += pBlock->info.rows;
H
Haojun Liao 已提交
864
#if 0
865 866 867
  // Calculate all time windows that are overlapping or contain current data block.
  // If current data block is contained by all possible time window, do not load current data block.
  if (/*pQueryAttr->pFilters || */pQueryAttr->groupbyColumn || pQueryAttr->sw.gap > 0 ||
H
Haojun Liao 已提交
868
      (QUERY_IS_INTERVAL_QUERY(pQueryAttr) && overlapWithTimeWindow(pTaskInfo, &pBlock->info))) {
869
    (*status) = BLK_DATA_DATA_LOAD;
870 871 872
  }

  // check if this data block is required to load
873
  if ((*status) != BLK_DATA_DATA_LOAD) {
874 875 876 877 878 879 880
    bool needFilter = true;

    // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
    // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
    if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) {
      SResultRow* pResult = NULL;

H
Haojun Liao 已提交
881
      bool  masterScan = IS_MAIN_SCAN(pRuntimeEnv);
882 883 884 885 886 887
      TSKEY k = ascQuery? pBlock->info.window.skey : pBlock->info.window.ekey;

      STimeWindow win = getActiveTimeWindow(pTableScanInfo->pResultRowInfo, k, pQueryAttr);
      if (pQueryAttr->pointInterpQuery) {
        needFilter = chkWindowOutputBufByKey(pRuntimeEnv, pTableScanInfo->pResultRowInfo, &win, masterScan, &pResult, groupId,
                                    pTableScanInfo->pCtx, pTableScanInfo->numOfOutput,
888
                                    pTableScanInfo->rowEntryInfoOffset);
889 890 891
      } else {
        if (setResultOutputBufByKey(pRuntimeEnv, pTableScanInfo->pResultRowInfo, pBlock->info.uid, &win, masterScan, &pResult, groupId,
                                    pTableScanInfo->pCtx, pTableScanInfo->numOfOutput,
892
                                    pTableScanInfo->rowEntryInfoOffset) != TSDB_CODE_SUCCESS) {
893
          T_LONG_JMP(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
894 895 896 897
        }
      }
    } else if (pQueryAttr->stableQuery && (!pQueryAttr->tsCompQuery) && (!pQueryAttr->diffQuery)) { // stable aggregate, not interval aggregate or normal column aggregate
      doSetTableGroupOutputBuf(pRuntimeEnv, pTableScanInfo->pResultRowInfo, pTableScanInfo->pCtx,
898
                               pTableScanInfo->rowEntryInfoOffset, pTableScanInfo->numOfOutput,
899 900 901 902 903 904
                               pRuntimeEnv->current->groupIndex);
    }

    if (needFilter) {
      (*status) = doFilterByBlockTimeWindow(pTableScanInfo, pBlock);
    } else {
905
      (*status) = BLK_DATA_DATA_LOAD;
906 907 908 909
    }
  }

  SDataBlockInfo* pBlockInfo = &pBlock->info;
H
Haojun Liao 已提交
910
//  *status = updateBlockLoadStatus(pRuntimeEnv->pQueryAttr, *status);
911

912
  if ((*status) == BLK_DATA_NOT_LOAD || (*status) == BLK_DATA_FILTEROUT) {
913 914
    //qDebug("QInfo:0x%"PRIx64" data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", pQInfo->qId, pBlockInfo->window.skey,
//           pBlockInfo->window.ekey, pBlockInfo->rows);
915
    pCost->skipBlocks += 1;
916
  } else if ((*status) == BLK_DATA_SMA_LOAD) {
917 918
    // this function never returns error?
    pCost->loadBlockStatis += 1;
919
//    tsdbRetrieveDatablockSMA(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg);
920 921

    if (pBlock->pBlockAgg == NULL) {  // data block statistics does not exist, load data block
922
//      pBlock->pDataBlock = tsdbRetrieveDataBlock(pTableScanInfo->pTsdbReadHandle, NULL);
923 924 925
      pCost->totalCheckedRows += pBlock->info.rows;
    }
  } else {
926
    assert((*status) == BLK_DATA_DATA_LOAD);
927 928 929

    // load the data block statistics to perform further filter
    pCost->loadBlockStatis += 1;
930
//    tsdbRetrieveDatablockSMA(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg);
931 932 933 934 935 936

    if (pQueryAttr->topBotQuery && pBlock->pBlockAgg != NULL) {
      { // set previous window
        if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) {
          SResultRow* pResult = NULL;

H
Haojun Liao 已提交
937
          bool  masterScan = IS_MAIN_SCAN(pRuntimeEnv);
938 939 940 941 942
          TSKEY k = ascQuery? pBlock->info.window.skey : pBlock->info.window.ekey;

          STimeWindow win = getActiveTimeWindow(pTableScanInfo->pResultRowInfo, k, pQueryAttr);
          if (setResultOutputBufByKey(pRuntimeEnv, pTableScanInfo->pResultRowInfo, pBlock->info.uid, &win, masterScan, &pResult, groupId,
                                      pTableScanInfo->pCtx, pTableScanInfo->numOfOutput,
943
                                      pTableScanInfo->rowEntryInfoOffset) != TSDB_CODE_SUCCESS) {
944
            T_LONG_JMP(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
945 946 947 948 949 950 951 952 953 954
          }
        }
      }
      bool load = false;
      for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) {
        int32_t functionId = pTableScanInfo->pCtx[i].functionId;
        if (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM) {
//          load = topbot_datablock_filter(&pTableScanInfo->pCtx[i], (char*)&(pBlock->pBlockAgg[i].min),
//                                         (char*)&(pBlock->pBlockAgg[i].max));
          if (!load) { // current block has been discard due to filter applied
955
            pCost->skipBlocks += 1;
956 957
            //qDebug("QInfo:0x%"PRIx64" data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", pQInfo->qId,
//                   pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
958
            (*status) = BLK_DATA_FILTEROUT;
959 960 961 962 963 964 965
            return TSDB_CODE_SUCCESS;
          }
        }
      }
    }

    // current block has been discard due to filter applied
H
Haojun Liao 已提交
966
//    if (!doFilterByBlockSMA(pRuntimeEnv, pBlock->pBlockAgg, pTableScanInfo->pCtx, pBlockInfo->rows)) {
967
//      pCost->skipBlocks += 1;
968 969
//      qDebug("QInfo:0x%"PRIx64" data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", pQInfo->qId, pBlockInfo->window.skey,
//             pBlockInfo->window.ekey, pBlockInfo->rows);
970
//      (*status) = BLK_DATA_FILTEROUT;
971 972 973 974 975
//      return TSDB_CODE_SUCCESS;
//    }

    pCost->totalCheckedRows += pBlockInfo->rows;
    pCost->loadBlocks += 1;
976
//    pBlock->pDataBlock = tsdbRetrieveDataBlock(pTableScanInfo->pTsdbReadHandle, NULL);
977 978 979 980 981
//    if (pBlock->pDataBlock == NULL) {
//      return terrno;
//    }

//    if (pQueryAttr->pFilters != NULL) {
982
//      filterSetColFieldData(pQueryAttr->pFilters, taosArrayGetSize(pBlock->pDataBlock), pBlock->pDataBlock);
983
//    }
984

985 986 987 988
//    if (pQueryAttr->pFilters != NULL || pRuntimeEnv->pTsBuf != NULL) {
//      filterColRowsInDataBlock(pRuntimeEnv, pBlock, ascQuery);
//    }
  }
H
Haojun Liao 已提交
989
#endif
990 991 992
  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
993
static void updateTableQueryInfoForReverseScan(STableQueryInfo* pTableQueryInfo) {
994 995 996 997 998
  if (pTableQueryInfo == NULL) {
    return;
  }
}

L
Liu Jicong 已提交
999
void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status) {
1000
  if (status == TASK_NOT_COMPLETED) {
H
Haojun Liao 已提交
1001
    pTaskInfo->status = status;
1002 1003
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
1004
    CLEAR_QUERY_STATUS(pTaskInfo, TASK_NOT_COMPLETED);
H
Haojun Liao 已提交
1005
    pTaskInfo->status |= status;
1006 1007 1008
  }
}

1009
void setResultRowInitCtx(SResultRow* pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset) {
5
54liuyao 已提交
1010
  bool init = false;
1011
  for (int32_t i = 0; i < numOfOutput; ++i) {
1012
    pCtx[i].resultInfo = getResultEntryInfo(pResult, i, rowEntryInfoOffset);
5
54liuyao 已提交
1013 1014 1015
    if (init) {
      continue;
    }
1016 1017 1018 1019 1020

    struct SResultRowEntryInfo* pResInfo = pCtx[i].resultInfo;
    if (isRowEntryCompleted(pResInfo) && isRowEntryInitialized(pResInfo)) {
      continue;
    }
1021 1022 1023 1024 1025

    if (fmIsWindowPseudoColumnFunc(pCtx[i].functionId)) {
      continue;
    }

1026 1027 1028 1029 1030 1031
    if (!pResInfo->initialized) {
      if (pCtx[i].functionId != -1) {
        pCtx[i].fpSet.init(&pCtx[i], pResInfo);
      } else {
        pResInfo->initialized = true;
      }
5
54liuyao 已提交
1032 1033
    } else {
      init = true;
1034 1035 1036 1037
    }
  }
}

1038 1039
static void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoData* p, bool keep,
                                                int32_t status);
1040

H
Haojun Liao 已提交
1041 1042
void doFilter(SSDataBlock* pBlock, SFilterInfo* pFilterInfo, SColMatchInfo* pColMatchInfo) {
  if (pFilterInfo == NULL || pBlock->info.rows == 0) {
S
shenglian zhou 已提交
1043 1044
    return;
  }
1045

1046
  SFilterColumnParam param1 = {.numOfCols = taosArrayGetSize(pBlock->pDataBlock), .pDataBlock = pBlock->pDataBlock};
H
Haojun Liao 已提交
1047
  int32_t code = filterSetDataFromSlotId(pFilterInfo, &param1);
1048

1049
  SColumnInfoData* p = NULL;
1050
  int32_t          status = 0;
H
Haojun Liao 已提交
1051

1052
  // todo the keep seems never to be True??
H
Haojun Liao 已提交
1053
  bool keep = filterExecute(pFilterInfo, pBlock, &p, NULL, param1.numOfCols, &status);
1054
  extractQualifiedTupleByFilterResult(pBlock, p, keep, status);
H
Haojun Liao 已提交
1055

1056
  if (pColMatchInfo != NULL) {
H
Haojun Liao 已提交
1057 1058
    size_t  size = taosArrayGetSize(pColMatchInfo->pList);
    for (int32_t i = 0; i < size; ++i) {
H
Haojun Liao 已提交
1059
      SColMatchItem* pInfo = taosArrayGet(pColMatchInfo->pList, i);
1060
      if (pInfo->colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
H
Haojun Liao 已提交
1061
        SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, pInfo->dstSlotId);
1062
        if (pColData->info.type == TSDB_DATA_TYPE_TIMESTAMP) {
H
Haojun Liao 已提交
1063
          blockDataUpdateTsWindow(pBlock, pInfo->dstSlotId);
1064 1065 1066 1067 1068 1069
          break;
        }
      }
    }
  }

1070 1071
  colDataDestroy(p);
  taosMemoryFree(p);
1072 1073
}

1074
void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoData* p, bool keep, int32_t status) {
1075 1076 1077 1078
  if (keep) {
    return;
  }

H
Haojun Liao 已提交
1079 1080 1081
  int32_t totalRows = pBlock->info.rows;

  if (status == FILTER_RESULT_ALL_QUALIFIED) {
1082
    // here nothing needs to be done
H
Haojun Liao 已提交
1083
  } else if (status == FILTER_RESULT_NONE_QUALIFIED) {
1084
    pBlock->info.rows = 0;
H
Haojun Liao 已提交
1085
  } else {
1086
    SSDataBlock* px = createOneDataBlock(pBlock, true);
1087

1088 1089
    size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock);
    for (int32_t i = 0; i < numOfCols; ++i) {
1090 1091
      SColumnInfoData* pSrc = taosArrayGet(px->pDataBlock, i);
      SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i);
1092
      // it is a reserved column for scalar function, and no data in this column yet.
1093
      if (pDst->pData == NULL || pSrc->pData == NULL) {
1094 1095 1096
        continue;
      }

1097 1098
      colInfoDataCleanup(pDst, pBlock->info.rows);

1099
      int32_t numOfRows = 0;
1100
      for (int32_t j = 0; j < totalRows; ++j) {
1101
        if (((int8_t*)p->pData)[j] == 0) {
D
dapan1121 已提交
1102 1103
          continue;
        }
1104

D
dapan1121 已提交
1105
        if (colDataIsNull_s(pSrc, j)) {
1106
          colDataAppendNULL(pDst, numOfRows);
D
dapan1121 已提交
1107
        } else {
1108
          colDataAppend(pDst, numOfRows, colDataGetData(pSrc, j), false);
D
dapan1121 已提交
1109
        }
1110
        numOfRows += 1;
H
Haojun Liao 已提交
1111
      }
1112

1113
      // todo this value can be assigned directly
1114 1115 1116 1117 1118
      if (pBlock->info.rows == totalRows) {
        pBlock->info.rows = numOfRows;
      } else {
        ASSERT(pBlock->info.rows == numOfRows);
      }
1119
    }
1120

dengyihao's avatar
dengyihao 已提交
1121
    blockDataDestroy(px);  // fix memory leak
1122 1123 1124
  }
}

1125
void doSetTableGroupOutputBuf(SOperatorInfo* pOperator, int32_t numOfOutput, uint64_t groupId) {
1126
  // for simple group by query without interval, all the tables belong to one group result.
1127 1128 1129
  SExecTaskInfo*    pTaskInfo = pOperator->pTaskInfo;
  SAggOperatorInfo* pAggInfo = pOperator->info;

1130
  SResultRowInfo* pResultRowInfo = &pAggInfo->binfo.resultRowInfo;
1131 1132
  SqlFunctionCtx* pCtx = pOperator->exprSupp.pCtx;
  int32_t*        rowEntryInfoOffset = pOperator->exprSupp.rowEntryInfoOffset;
1133

1134
  SResultRow* pResultRow = doSetResultOutBufByKey(pAggInfo->aggSup.pResultBuf, pResultRowInfo, (char*)&groupId,
L
Liu Jicong 已提交
1135
                                                  sizeof(groupId), true, groupId, pTaskInfo, false, &pAggInfo->aggSup);
L
Liu Jicong 已提交
1136
  assert(pResultRow != NULL);
1137 1138 1139 1140 1141 1142

  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pResultRow->pageId == -1) {
dengyihao's avatar
dengyihao 已提交
1143 1144
    int32_t ret =
        addNewWindowResultBuf(pResultRow, pAggInfo->aggSup.pResultBuf, groupId, pAggInfo->binfo.pRes->info.rowSize);
1145 1146 1147 1148 1149
    if (ret != TSDB_CODE_SUCCESS) {
      return;
    }
  }

1150
  setResultRowInitCtx(pResultRow, pCtx, numOfOutput, rowEntryInfoOffset);
1151 1152
}

1153 1154 1155
static void setExecutionContext(SOperatorInfo* pOperator, int32_t numOfOutput, uint64_t groupId) {
  SAggOperatorInfo* pAggInfo = pOperator->info;
  if (pAggInfo->groupId != UINT64_MAX && pAggInfo->groupId == groupId) {
1156 1157
    return;
  }
1158 1159

  doSetTableGroupOutputBuf(pOperator, numOfOutput, groupId);
1160 1161

  // record the current active group id
H
Haojun Liao 已提交
1162
  pAggInfo->groupId = groupId;
1163 1164
}

dengyihao's avatar
dengyihao 已提交
1165 1166
static void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExprs,
                              const int32_t* rowCellOffset) {
1167
  bool returnNotNull = false;
1168
  for (int32_t j = 0; j < numOfExprs; ++j) {
1169
    struct SResultRowEntryInfo* pResInfo = getResultEntryInfo(pRow, j, rowCellOffset);
1170 1171 1172 1173 1174 1175 1176
    if (!isRowEntryInitialized(pResInfo)) {
      continue;
    }

    if (pRow->numOfRows < pResInfo->numOfRes) {
      pRow->numOfRows = pResInfo->numOfRes;
    }
1177

1178
    if (fmIsNotNullOutputFunc(pCtx[j].functionId)) {
1179 1180
      returnNotNull = true;
    }
1181
  }
S
shenglian zhou 已提交
1182 1183
  // if all expr skips all blocks, e.g. all null inputs for max function, output one row in final result.
  //  except for first/last, which require not null output, output no rows
1184
  if (pRow->numOfRows == 0 && !returnNotNull) {
1185
    pRow->numOfRows = 1;
1186 1187 1188
  }
}

1189 1190
static void doCopyResultToDataBlock(SExprInfo* pExprInfo, int32_t numOfExprs, SResultRow* pRow, SqlFunctionCtx* pCtx,
                                    SSDataBlock* pBlock, const int32_t* rowEntryOffset, SExecTaskInfo* pTaskInfo) {
1191 1192 1193
  for (int32_t j = 0; j < numOfExprs; ++j) {
    int32_t slotId = pExprInfo[j].base.resSchema.slotId;

1194
    pCtx[j].resultInfo = getResultEntryInfo(pRow, j, rowEntryOffset);
1195
    if (pCtx[j].fpSet.finalize) {
1196
      if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_group_key") == 0) {
1197 1198
        // for groupkey along with functions that output multiple lines(e.g. Histogram)
        // need to match groupkey result for each output row of that function.
1199 1200 1201 1202 1203
        if (pCtx[j].resultInfo->numOfRes != 0) {
          pCtx[j].resultInfo->numOfRes = pRow->numOfRows;
        }
      }

1204 1205 1206
      int32_t code = pCtx[j].fpSet.finalize(&pCtx[j], pBlock);
      if (TAOS_FAILED(code)) {
        qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code));
1207
        T_LONG_JMP(pTaskInfo->env, code);
1208 1209
      }
    } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) {
1210
      // do nothing
1211
    } else {
1212 1213
      // expand the result into multiple rows. E.g., _wstart, top(k, 20)
      // the _wstart needs to copy to 20 following rows, since the results of top-k expands to 20 different rows.
1214 1215 1216 1217 1218 1219 1220
      SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId);
      char*            in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo);
      for (int32_t k = 0; k < pRow->numOfRows; ++k) {
        colDataAppend(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes);
      }
    }
  }
1221 1222
}

1223 1224 1225
// todo refactor. SResultRow has direct pointer in miainfo
int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, SExprSupp* pSup,
                           SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo) {
1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
  SFilePage*  page = getBufPage(pBuf, resultRowPosition->pageId);
  SResultRow* pRow = (SResultRow*)((char*)page + resultRowPosition->offset);

  SqlFunctionCtx* pCtx = pSup->pCtx;
  SExprInfo*      pExprInfo = pSup->pExprInfo;
  const int32_t*  rowEntryOffset = pSup->rowEntryInfoOffset;

  doUpdateNumOfRows(pCtx, pRow, pSup->numOfExprs, rowEntryOffset);
  if (pRow->numOfRows == 0) {
    releaseBufPage(pBuf, page);
    return 0;
  }

  int32_t size = pBlock->info.capacity;
  while (pBlock->info.rows + pRow->numOfRows > size) {
    size = size * 1.25;
  }

  int32_t code = blockDataEnsureCapacity(pBlock, size);
  if (TAOS_FAILED(code)) {
    releaseBufPage(pBuf, page);
    qError("%s ensure result data capacity failed, code %s", GET_TASKID(pTaskInfo), tstrerror(code));
    T_LONG_JMP(pTaskInfo->env, code);
  }

  doCopyResultToDataBlock(pExprInfo, pSup->numOfExprs, pRow, pCtx, pBlock, rowEntryOffset, pTaskInfo);
1252 1253

  releaseBufPage(pBuf, page);
1254
  pBlock->info.rows += pRow->numOfRows;
1255 1256 1257
  return 0;
}

1258 1259 1260 1261 1262 1263 1264
int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf,
                           SGroupResInfo* pGroupResInfo) {
  SExprInfo*      pExprInfo = pSup->pExprInfo;
  int32_t         numOfExprs = pSup->numOfExprs;
  int32_t*        rowEntryOffset = pSup->rowEntryInfoOffset;
  SqlFunctionCtx* pCtx = pSup->pCtx;

1265
  int32_t numOfRows = getNumOfTotalRes(pGroupResInfo);
1266

1267
  for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) {
L
Liu Jicong 已提交
1268 1269
    SResKeyPos* pPos = taosArrayGetP(pGroupResInfo->pRows, i);
    SFilePage*  page = getBufPage(pBuf, pPos->pos.pageId);
1270

1271
    SResultRow* pRow = (SResultRow*)((char*)page + pPos->pos.offset);
1272

H
Haojun Liao 已提交
1273
    doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset);
1274 1275

    // no results, continue to check the next one
1276 1277
    if (pRow->numOfRows == 0) {
      pGroupResInfo->index += 1;
1278
      releaseBufPage(pBuf, page);
1279 1280 1281
      continue;
    }

1282 1283 1284 1285 1286
    if (pBlock->info.groupId == 0) {
      pBlock->info.groupId = pPos->groupId;
    } else {
      // current value belongs to different group, it can't be packed into one datablock
      if (pBlock->info.groupId != pPos->groupId) {
1287
        releaseBufPage(pBuf, page);
1288 1289 1290 1291
        break;
      }
    }

1292
    if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
1293
      ASSERT(pBlock->info.rows > 0);
1294
      releaseBufPage(pBuf, page);
1295 1296 1297 1298
      break;
    }

    pGroupResInfo->index += 1;
1299
    doCopyResultToDataBlock(pExprInfo, numOfExprs, pRow, pCtx, pBlock, rowEntryOffset, pTaskInfo);
1300

1301
    releaseBufPage(pBuf, page);
1302
    pBlock->info.rows += pRow->numOfRows;
1303 1304
  }

X
Xiaoyu Wang 已提交
1305 1306
  qDebug("%s result generated, rows:%d, groupId:%" PRIu64, GET_TASKID(pTaskInfo), pBlock->info.rows,
         pBlock->info.groupId);
1307

1308
  blockDataUpdateTsWindow(pBlock, 0);
1309 1310 1311
  return 0;
}

1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo,
                           SDiskbasedBuf* pBuf) {
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
  SSDataBlock*   pBlock = pbInfo->pRes;

  // set output datablock version
  pBlock->info.version = pTaskInfo->version;

  blockDataCleanup(pBlock);
  if (!hasRemainResults(pGroupResInfo)) {
    return;
  }

  // clear the existed group id
  pBlock->info.groupId = 0;
  ASSERT(!pbInfo->mergeResultBlock);
  doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo);
  if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) {
    SStreamStateAggOperatorInfo* pInfo = pOperator->info;

    char* tbname = taosHashGet(pInfo->pGroupIdTbNameMap, &pBlock->info.groupId, sizeof(int64_t));
    if (tbname != NULL) {
      memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN);
    } else {
      pBlock->info.parTbName[0] = 0;
    }
  } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION ||
             pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION ||
             pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) {
    SStreamSessionAggOperatorInfo* pInfo = pOperator->info;

    char* tbname = taosHashGet(pInfo->pGroupIdTbNameMap, &pBlock->info.groupId, sizeof(int64_t));
    if (tbname != NULL) {
      memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN);
    } else {
      pBlock->info.parTbName[0] = 0;
    }
  }
}

X
Xiaoyu Wang 已提交
1352 1353
void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo,
                            SDiskbasedBuf* pBuf) {
1354
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
1355
  SSDataBlock*   pBlock = pbInfo->pRes;
1356

1357 1358 1359
  // set output datablock version
  pBlock->info.version = pTaskInfo->version;

1360
  blockDataCleanup(pBlock);
1361
  if (!hasRemainResults(pGroupResInfo)) {
1362 1363 1364
    return;
  }

1365 1366
  // clear the existed group id
  pBlock->info.groupId = 0;
1367 1368 1369
  if (!pbInfo->mergeResultBlock) {
    doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo);
  } else {
dengyihao's avatar
dengyihao 已提交
1370
    while (hasRemainResults(pGroupResInfo)) {
1371 1372 1373
      doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo);
      if (pBlock->info.rows >= pOperator->resultInfo.threshold) {
        break;
1374 1375
      }

1376 1377
      // clearing group id to continue to merge data that belong to different groups
      pBlock->info.groupId = 0;
1378
    }
1379 1380 1381

    // clear the group id info in SSDataBlock, since the client does not need it
    pBlock->info.groupId = 0;
1382 1383 1384
  }
}

L
Liu Jicong 已提交
1385 1386
void queryCostStatis(SExecTaskInfo* pTaskInfo) {
  STaskCostInfo* pSummary = &pTaskInfo->cost;
1387

1388 1389
  SFileBlockLoadRecorder* pRecorder = pSummary->pRecoder;
  if (pSummary->pRecoder != NULL) {
1390
    qDebug(
H
Haojun Liao 已提交
1391 1392 1393 1394 1395
        "%s :cost summary: elapsed time:%.2f ms, extract tableList:%.2f ms, createGroupIdMap:%.2f ms, total blocks:%d, "
        "load block SMA:%d, load data block:%d, total rows:%" PRId64 ", check rows:%" PRId64,
        GET_TASKID(pTaskInfo), pSummary->elapsedTime / 1000.0, pSummary->extractListTime, pSummary->groupIdMapTime,
        pRecorder->totalBlocks, pRecorder->loadBlockStatis, pRecorder->loadBlocks, pRecorder->totalRows,
        pRecorder->totalCheckedRows);
1396
  }
1397 1398
}

L
Liu Jicong 已提交
1399 1400 1401
// static void updateOffsetVal(STaskRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
//   STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr;
//   STableQueryInfo* pTableQueryInfo = pRuntimeEnv->current;
1402
//
L
Liu Jicong 已提交
1403
//   int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQueryAttr->order.order);
1404
//
L
Liu Jicong 已提交
1405 1406 1407 1408
//   if (pQueryAttr->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
//     pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQueryAttr) ? pBlockInfo->window.ekey + step :
//     pBlockInfo->window.skey + step; pQueryAttr->limit.offset = 0; return;
//   }
1409
//
L
Liu Jicong 已提交
1410 1411 1412 1413 1414
//   if (QUERY_IS_ASC_QUERY(pQueryAttr)) {
//     pQueryAttr->pos = (int32_t)pQueryAttr->limit.offset;
//   } else {
//     pQueryAttr->pos = pBlockInfo->rows - (int32_t)pQueryAttr->limit.offset - 1;
//   }
1415
//
L
Liu Jicong 已提交
1416
//   assert(pQueryAttr->pos >= 0 && pQueryAttr->pos <= pBlockInfo->rows - 1);
1417
//
L
Liu Jicong 已提交
1418 1419
//   SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pTsdbReadHandle, NULL);
//   SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
1420
//
L
Liu Jicong 已提交
1421 1422
//   // update the pQueryAttr->limit.offset value, and pQueryAttr->pos value
//   TSKEY *keys = (TSKEY *) pColInfoData->pData;
1423
//
L
Liu Jicong 已提交
1424 1425 1426
//   // update the offset value
//   pTableQueryInfo->lastKey = keys[pQueryAttr->pos];
//   pQueryAttr->limit.offset = 0;
1427
//
L
Liu Jicong 已提交
1428
//   int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
1429
//
L
Liu Jicong 已提交
1430 1431 1432 1433
//   //qDebug("QInfo:0x%"PRIx64" check data block, brange:%" PRId64 "-%" PRId64 ", numBlocksOfStep:%d, numOfRes:%d,
//   lastKey:%"PRId64, GET_TASKID(pRuntimeEnv),
//          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
// }
1434

L
Liu Jicong 已提交
1435 1436
// void skipBlocks(STaskRuntimeEnv *pRuntimeEnv) {
//   STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr;
1437
//
L
Liu Jicong 已提交
1438 1439 1440
//   if (pQueryAttr->limit.offset <= 0 || pQueryAttr->numOfFilterCols > 0) {
//     return;
//   }
1441
//
L
Liu Jicong 已提交
1442 1443
//   pQueryAttr->pos = 0;
//   int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQueryAttr->order.order);
1444
//
L
Liu Jicong 已提交
1445 1446
//   STableQueryInfo* pTableQueryInfo = pRuntimeEnv->current;
//   TsdbQueryHandleT pTsdbReadHandle = pRuntimeEnv->pTsdbReadHandle;
1447
//
L
Liu Jicong 已提交
1448 1449 1450
//   SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
//   while (tsdbNextDataBlock(pTsdbReadHandle)) {
//     if (isTaskKilled(pRuntimeEnv->qinfo)) {
1451
//       T_LONG_JMP(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
L
Liu Jicong 已提交
1452
//     }
1453
//
L
Liu Jicong 已提交
1454
//     tsdbRetrieveDataBlockInfo(pTsdbReadHandle, &blockInfo);
1455
//
L
Liu Jicong 已提交
1456 1457 1458 1459
//     if (pQueryAttr->limit.offset > blockInfo.rows) {
//       pQueryAttr->limit.offset -= blockInfo.rows;
//       pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQueryAttr)) ? blockInfo.window.ekey : blockInfo.window.skey;
//       pTableQueryInfo->lastKey += step;
1460
//
L
Liu Jicong 已提交
1461 1462 1463 1464 1465 1466 1467
//       //qDebug("QInfo:0x%"PRIx64" skip rows:%d, offset:%" PRId64, GET_TASKID(pRuntimeEnv), blockInfo.rows,
//              pQuery->limit.offset);
//     } else {  // find the appropriated start position in current block
//       updateOffsetVal(pRuntimeEnv, &blockInfo);
//       break;
//     }
//   }
1468
//
L
Liu Jicong 已提交
1469
//   if (terrno != TSDB_CODE_SUCCESS) {
1470
//     T_LONG_JMP(pRuntimeEnv->env, terrno);
L
Liu Jicong 已提交
1471 1472 1473 1474 1475 1476 1477
//   }
// }

// static TSKEY doSkipIntervalProcess(STaskRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo,
// STableQueryInfo* pTableQueryInfo) {
//   STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr;
//   SResultRowInfo *pWindowResInfo = &pRuntimeEnv->resultRowInfo;
1478
//
L
Liu Jicong 已提交
1479 1480 1481
//   assert(pQueryAttr->limit.offset == 0);
//   STimeWindow tw = *win;
//   getNextTimeWindow(pQueryAttr, &tw);
1482
//
L
Liu Jicong 已提交
1483 1484
//   if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQueryAttr)) ||
//       (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQueryAttr))) {
1485
//
L
Liu Jicong 已提交
1486 1487 1488 1489
//     // load the data block and check data remaining in current data block
//     // TODO optimize performance
//     SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pTsdbReadHandle, NULL);
//     SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
1490
//
L
Liu Jicong 已提交
1491 1492 1493 1494
//     tw = *win;
//     int32_t startPos =
//         getNextQualifiedWindow(pQueryAttr, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
//     assert(startPos >= 0);
1495
//
L
Liu Jicong 已提交
1496 1497
//     // set the abort info
//     pQueryAttr->pos = startPos;
1498
//
L
Liu Jicong 已提交
1499 1500 1501 1502
//     // reset the query start timestamp
//     pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
//     pQueryAttr->window.skey = pTableQueryInfo->win.skey;
//     TSKEY key = pTableQueryInfo->win.skey;
1503
//
L
Liu Jicong 已提交
1504 1505
//     pWindowResInfo->prevSKey = tw.skey;
//     int32_t index = pRuntimeEnv->resultRowInfo.curIndex;
1506
//
L
Liu Jicong 已提交
1507 1508
//     int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
//     pRuntimeEnv->resultRowInfo.curIndex = index;  // restore the window index
1509
//
L
Liu Jicong 已提交
1510 1511 1512 1513
//     //qDebug("QInfo:0x%"PRIx64" check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d,
//     lastKey:%" PRId64,
//            GET_TASKID(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
//            pQueryAttr->current->lastKey);
1514
//
L
Liu Jicong 已提交
1515 1516 1517 1518 1519
//     return key;
//   } else {  // do nothing
//     pQueryAttr->window.skey      = tw.skey;
//     pWindowResInfo->prevSKey = tw.skey;
//     pTableQueryInfo->lastKey = tw.skey;
1520
//
L
Liu Jicong 已提交
1521 1522
//     return tw.skey;
//   }
1523
//
L
Liu Jicong 已提交
1524 1525 1526 1527 1528 1529 1530 1531 1532 1533
//   return true;
// }

// static bool skipTimeInterval(STaskRuntimeEnv *pRuntimeEnv, TSKEY* start) {
//   STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr;
//   if (QUERY_IS_ASC_QUERY(pQueryAttr)) {
//     assert(*start <= pRuntimeEnv->current->lastKey);
//   } else {
//     assert(*start >= pRuntimeEnv->current->lastKey);
//   }
1534
//
L
Liu Jicong 已提交
1535 1536 1537 1538 1539
//   // if queried with value filter, do NOT forward query start position
//   if (pQueryAttr->limit.offset <= 0 || pQueryAttr->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL ||
//   pRuntimeEnv->pFillInfo != NULL) {
//     return true;
//   }
1540
//
L
Liu Jicong 已提交
1541 1542 1543 1544 1545 1546 1547
//   /*
//    * 1. for interval without interpolation query we forward pQueryAttr->interval.interval at a time for
//    *    pQueryAttr->limit.offset times. Since hole exists, pQueryAttr->interval.interval*pQueryAttr->limit.offset
//    value is
//    *    not valid. otherwise, we only forward pQueryAttr->limit.offset number of points
//    */
//   assert(pRuntimeEnv->resultRowInfo.prevSKey == TSKEY_INITIAL_VAL);
1548
//
L
Liu Jicong 已提交
1549 1550
//   STimeWindow w = TSWINDOW_INITIALIZER;
//   bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr);
1551
//
L
Liu Jicong 已提交
1552 1553
//   SResultRowInfo *pWindowResInfo = &pRuntimeEnv->resultRowInfo;
//   STableQueryInfo *pTableQueryInfo = pRuntimeEnv->current;
1554
//
L
Liu Jicong 已提交
1555 1556 1557
//   SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
//   while (tsdbNextDataBlock(pRuntimeEnv->pTsdbReadHandle)) {
//     tsdbRetrieveDataBlockInfo(pRuntimeEnv->pTsdbReadHandle, &blockInfo);
1558
//
L
Liu Jicong 已提交
1559 1560 1561 1562 1563 1564 1565 1566 1567
//     if (QUERY_IS_ASC_QUERY(pQueryAttr)) {
//       if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
//         getAlignQueryTimeWindow(pQueryAttr, blockInfo.window.skey, blockInfo.window.skey, pQueryAttr->window.ekey,
//         &w); pWindowResInfo->prevSKey = w.skey;
//       }
//     } else {
//       getAlignQueryTimeWindow(pQueryAttr, blockInfo.window.ekey, pQueryAttr->window.ekey, blockInfo.window.ekey, &w);
//       pWindowResInfo->prevSKey = w.skey;
//     }
1568
//
L
Liu Jicong 已提交
1569 1570
//     // the first time window
//     STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQueryAttr);
1571
//
L
Liu Jicong 已提交
1572 1573
//     while (pQueryAttr->limit.offset > 0) {
//       STimeWindow tw = win;
1574
//
L
Liu Jicong 已提交
1575 1576 1577
//       if ((win.ekey <= blockInfo.window.ekey && ascQuery) || (win.ekey >= blockInfo.window.skey && !ascQuery)) {
//         pQueryAttr->limit.offset -= 1;
//         pWindowResInfo->prevSKey = win.skey;
1578
//
L
Liu Jicong 已提交
1579 1580 1581 1582 1583 1584
//         // current time window is aligned with blockInfo.window.ekey
//         // restart it from next data block by set prevSKey to be TSKEY_INITIAL_VAL;
//         if ((win.ekey == blockInfo.window.ekey && ascQuery) || (win.ekey == blockInfo.window.skey && !ascQuery)) {
//           pWindowResInfo->prevSKey = TSKEY_INITIAL_VAL;
//         }
//       }
1585
//
L
Liu Jicong 已提交
1586 1587 1588 1589
//       if (pQueryAttr->limit.offset == 0) {
//         *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
//         return true;
//       }
1590
//
L
Liu Jicong 已提交
1591 1592
//       // current window does not ended in current data block, try next data block
//       getNextTimeWindow(pQueryAttr, &tw);
1593
//
L
Liu Jicong 已提交
1594 1595 1596 1597 1598 1599 1600 1601 1602
//       /*
//        * If the next time window still starts from current data block,
//        * load the primary timestamp column first, and then find the start position for the next queried time window.
//        * Note that only the primary timestamp column is required.
//        * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually
//        required
//        * time window resides in current data block.
//        */
//       if ((tw.skey <= blockInfo.window.ekey && ascQuery) || (tw.ekey >= blockInfo.window.skey && !ascQuery)) {
1603
//
L
Liu Jicong 已提交
1604 1605
//         SArray *pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pTsdbReadHandle, NULL);
//         SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
1606
//
L
Liu Jicong 已提交
1607 1608 1609
//         if ((win.ekey > blockInfo.window.ekey && ascQuery) || (win.ekey < blockInfo.window.skey && !ascQuery)) {
//           pQueryAttr->limit.offset -= 1;
//         }
1610
//
L
Liu Jicong 已提交
1611 1612 1613 1614 1615 1616 1617 1618
//         if (pQueryAttr->limit.offset == 0) {
//           *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
//           return true;
//         } else {
//           tw = win;
//           int32_t startPos =
//               getNextQualifiedWindow(pQueryAttr, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
//           assert(startPos >= 0);
1619
//
L
Liu Jicong 已提交
1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630
//           // set the abort info
//           pQueryAttr->pos = startPos;
//           pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
//           pWindowResInfo->prevSKey = tw.skey;
//           win = tw;
//         }
//       } else {
//         break;  // offset is not 0, and next time window begins or ends in the next block.
//       }
//     }
//   }
1631
//
L
Liu Jicong 已提交
1632 1633
//   // check for error
//   if (terrno != TSDB_CODE_SUCCESS) {
1634
//     T_LONG_JMP(pRuntimeEnv->env, terrno);
L
Liu Jicong 已提交
1635
//   }
1636
//
L
Liu Jicong 已提交
1637 1638
//   return true;
// }
1639

1640
int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num) {
H
Haojun Liao 已提交
1641
  if (p->pDownstream == NULL) {
H
Haojun Liao 已提交
1642
    assert(p->numOfDownstream == 0);
1643 1644
  }

wafwerar's avatar
wafwerar 已提交
1645
  p->pDownstream = taosMemoryCalloc(1, num * POINTER_BYTES);
1646 1647 1648 1649 1650 1651 1652
  if (p->pDownstream == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

  memcpy(p->pDownstream, pDownstream, num * POINTER_BYTES);
  p->numOfDownstream = num;
  return TSDB_CODE_SUCCESS;
1653 1654
}

1655 1656 1657 1658
typedef struct SFetchRspHandleWrapper {
  uint32_t exchangeId;
  int32_t  sourceIndex;
} SFetchRspHandleWrapper;
1659

D
dapan1121 已提交
1660
int32_t loadRemoteDataCallback(void* param, SDataBuf* pMsg, int32_t code) {
X
Xiaoyu Wang 已提交
1661
  SFetchRspHandleWrapper* pWrapper = (SFetchRspHandleWrapper*)param;
1662 1663 1664 1665

  SExchangeInfo* pExchangeInfo = taosAcquireRef(exchangeObjRefPool, pWrapper->exchangeId);
  if (pExchangeInfo == NULL) {
    qWarn("failed to acquire exchange operator, since it may have been released");
1666
    taosMemoryFree(pMsg->pData);
1667 1668 1669
    return TSDB_CODE_SUCCESS;
  }

X
Xiaoyu Wang 已提交
1670
  int32_t          index = pWrapper->sourceIndex;
1671
  SSourceDataInfo* pSourceDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, index);
1672

H
Haojun Liao 已提交
1673 1674
  if (code == TSDB_CODE_SUCCESS) {
    pSourceDataInfo->pRsp = pMsg->pData;
1675

H
Haojun Liao 已提交
1676 1677
    SRetrieveTableRsp* pRsp = pSourceDataInfo->pRsp;
    pRsp->numOfRows = htonl(pRsp->numOfRows);
dengyihao's avatar
dengyihao 已提交
1678
    pRsp->compLen = htonl(pRsp->compLen);
1679
    pRsp->numOfCols = htonl(pRsp->numOfCols);
dengyihao's avatar
dengyihao 已提交
1680
    pRsp->useconds = htobe64(pRsp->useconds);
1681
    pRsp->numOfBlocks = htonl(pRsp->numOfBlocks);
1682

1683
    ASSERT(pRsp != NULL);
H
Haojun Liao 已提交
1684 1685
    qDebug("%s fetch rsp received, index:%d, blocks:%d, rows:%d", pSourceDataInfo->taskId, index, pRsp->numOfBlocks,
           pRsp->numOfRows);
H
Haojun Liao 已提交
1686
  } else {
1687
    taosMemoryFree(pMsg->pData);
H
Haojun Liao 已提交
1688
    pSourceDataInfo->code = code;
1689
    qDebug("%s fetch rsp received, index:%d, error:%s", pSourceDataInfo->taskId, index, tstrerror(code));
H
Haojun Liao 已提交
1690
  }
H
Haojun Liao 已提交
1691

H
Haojun Liao 已提交
1692
  pSourceDataInfo->status = EX_SOURCE_DATA_READY;
1693 1694 1695 1696

  tsem_post(&pExchangeInfo->ready);
  taosReleaseRef(exchangeObjRefPool, pWrapper->exchangeId);

wmmhello's avatar
wmmhello 已提交
1697
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
1698 1699
}

D
dapan1121 已提交
1700
void qProcessRspMsg(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) {
S
Shengliang Guan 已提交
1701 1702
  SMsgSendInfo* pSendInfo = (SMsgSendInfo*)pMsg->info.ahandle;
  assert(pMsg->info.ahandle != NULL);
H
Haojun Liao 已提交
1703 1704 1705 1706

  SDataBuf buf = {.len = pMsg->contLen, .pData = NULL};

  if (pMsg->contLen > 0) {
wafwerar's avatar
wafwerar 已提交
1707
    buf.pData = taosMemoryCalloc(1, pMsg->contLen);
H
Haojun Liao 已提交
1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718
    if (buf.pData == NULL) {
      terrno = TSDB_CODE_OUT_OF_MEMORY;
      pMsg->code = TSDB_CODE_OUT_OF_MEMORY;
    } else {
      memcpy(buf.pData, pMsg->pCont, pMsg->contLen);
    }
  }

  pSendInfo->fp(pSendInfo->param, &buf, pMsg->code);
  rpcFreeCont(pMsg->pCont);
  destroySendMsgInfo(pSendInfo);
1719 1720
}

L
Liu Jicong 已提交
1721
static int32_t doSendFetchDataRequest(SExchangeInfo* pExchangeInfo, SExecTaskInfo* pTaskInfo, int32_t sourceIndex) {
1722
  size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources);
1723

L
Liu Jicong 已提交
1724 1725
  SDownstreamSourceNode* pSource = taosArrayGet(pExchangeInfo->pSources, sourceIndex);
  SSourceDataInfo*       pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, sourceIndex);
1726

1727 1728
  ASSERT(pDataInfo->status == EX_SOURCE_DATA_NOT_READY);

1729
  SFetchRspHandleWrapper* pWrapper = taosMemoryCalloc(1, sizeof(SFetchRspHandleWrapper));
X
Xiaoyu Wang 已提交
1730
  pWrapper->exchangeId = pExchangeInfo->self;
1731 1732
  pWrapper->sourceIndex = sourceIndex;

D
dapan1121 已提交
1733 1734
  if (pSource->localExec) {
    SDataBuf pBuf = {0};
1735 1736 1737
    int32_t  code =
        (*pTaskInfo->localFetch.fp)(pTaskInfo->localFetch.handle, pSource->schedId, pTaskInfo->id.queryId,
                                    pSource->taskId, 0, pSource->execId, &pBuf.pData, pTaskInfo->localFetch.explainRes);
D
dapan1121 已提交
1738
    loadRemoteDataCallback(pWrapper, &pBuf, code);
D
dapan1121 已提交
1739
    taosMemoryFree(pWrapper);
D
dapan1121 已提交
1740
  } else {
D
dapan1121 已提交
1741 1742 1743
    SResFetchReq* pMsg = taosMemoryCalloc(1, sizeof(SResFetchReq));
    if (NULL == pMsg) {
      pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
Haojun Liao 已提交
1744
      taosMemoryFree(pWrapper);
D
dapan1121 已提交
1745 1746 1747
      return pTaskInfo->code;
    }

D
dapan1121 已提交
1748
    qDebug("%s build fetch msg and send to vgId:%d, ep:%s, taskId:0x%" PRIx64 ", execId:%d, %d/%" PRIzu,
1749 1750
           GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->addr.epSet.eps[0].fqdn, pSource->taskId,
           pSource->execId, sourceIndex, totalSources);
D
dapan1121 已提交
1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761

    pMsg->header.vgId = htonl(pSource->addr.nodeId);
    pMsg->sId = htobe64(pSource->schedId);
    pMsg->taskId = htobe64(pSource->taskId);
    pMsg->queryId = htobe64(pTaskInfo->id.queryId);
    pMsg->execId = htonl(pSource->execId);

    // send the fetch remote task result reques
    SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo));
    if (NULL == pMsgSendInfo) {
      taosMemoryFreeClear(pMsg);
H
Haojun Liao 已提交
1762
      taosMemoryFree(pWrapper);
D
dapan1121 已提交
1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773
      qError("%s prepare message %d failed", GET_TASKID(pTaskInfo), (int32_t)sizeof(SMsgSendInfo));
      pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      return pTaskInfo->code;
    }

    pMsgSendInfo->param = pWrapper;
    pMsgSendInfo->paramFreeFp = taosMemoryFree;
    pMsgSendInfo->msgInfo.pData = pMsg;
    pMsgSendInfo->msgInfo.len = sizeof(SResFetchReq);
    pMsgSendInfo->msgType = pSource->fetchMsgType;
    pMsgSendInfo->fp = loadRemoteDataCallback;
1774

D
dapan1121 已提交
1775
    int64_t transporterId = 0;
1776 1777
    int32_t code =
        asyncSendMsgToServer(pExchangeInfo->pTransporter, &pSource->addr.epSet, &transporterId, pMsgSendInfo);
D
dapan1121 已提交
1778
  }
1779

1780 1781 1782
  return TSDB_CODE_SUCCESS;
}

1783 1784 1785 1786 1787 1788 1789 1790
void updateLoadRemoteInfo(SLoadRemoteDataInfo* pInfo, int32_t numOfRows, int32_t dataLen, int64_t startTs,
                          SOperatorInfo* pOperator) {
  pInfo->totalRows += numOfRows;
  pInfo->totalSize += dataLen;
  pInfo->totalElapsed += (taosGetTimestampUs() - startTs);
  pOperator->resultInfo.totalRows += numOfRows;
}

H
Haojun Liao 已提交
1791
int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, char* pData, SArray* pColList, char** pNextStart) {
H
Haojun Liao 已提交
1792
  if (pColList == NULL) {  // data from other sources
1793
    blockDataCleanup(pRes);
dengyihao's avatar
dengyihao 已提交
1794
    *pNextStart = (char*)blockDecode(pRes, pData);
H
Haojun Liao 已提交
1795
  } else {  // extract data according to pColList
1796 1797 1798 1799 1800
    char* pStart = pData;

    int32_t numOfCols = htonl(*(int32_t*)pStart);
    pStart += sizeof(int32_t);

1801
    // todo refactor:extract method
1802
    SSysTableSchema* pSchema = (SSysTableSchema*)pStart;
dengyihao's avatar
dengyihao 已提交
1803
    for (int32_t i = 0; i < numOfCols; ++i) {
1804 1805 1806 1807 1808 1809 1810
      SSysTableSchema* p = (SSysTableSchema*)pStart;

      p->colId = htons(p->colId);
      p->bytes = htonl(p->bytes);
      pStart += sizeof(SSysTableSchema);
    }

1811
    SSDataBlock* pBlock = createDataBlock();
dengyihao's avatar
dengyihao 已提交
1812
    for (int32_t i = 0; i < numOfCols; ++i) {
1813 1814
      SColumnInfoData idata = createColumnInfoData(pSchema[i].type, pSchema[i].bytes, pSchema[i].colId);
      blockDataAppendColInfo(pBlock, &idata);
1815 1816
    }

1817
    blockDecode(pBlock, pStart);
1818
    blockDataEnsureCapacity(pRes, pBlock->info.rows);
1819

H
Haojun Liao 已提交
1820
    // data from mnode
1821
    pRes->info.rows = pBlock->info.rows;
1822 1823
    relocateColumnData(pRes, pColList, pBlock->pDataBlock, false);
    blockDataDestroy(pBlock);
1824
  }
1825

1826 1827
  // todo move this to time window aggregator, since the primary timestamp may not be known by exchange operator.
  blockDataUpdateTsWindow(pRes, 0);
1828 1829
  return TSDB_CODE_SUCCESS;
}
1830

L
Liu Jicong 已提交
1831 1832
static void* setAllSourcesCompleted(SOperatorInfo* pOperator, int64_t startTs) {
  SExchangeInfo* pExchangeInfo = pOperator->info;
1833
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
H
Haojun Liao 已提交
1834

1835
  int64_t              el = taosGetTimestampUs() - startTs;
H
Haojun Liao 已提交
1836
  SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo;
1837

H
Haojun Liao 已提交
1838
  pLoadInfo->totalElapsed += el;
H
Haojun Liao 已提交
1839

1840
  size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources);
L
Liu Jicong 已提交
1841 1842 1843
  qDebug("%s all %" PRIzu " sources are exhausted, total rows: %" PRIu64 " bytes:%" PRIu64 ", elapsed:%.2f ms",
         GET_TASKID(pTaskInfo), totalSources, pLoadInfo->totalRows, pLoadInfo->totalSize,
         pLoadInfo->totalElapsed / 1000.0);
1844 1845 1846 1847 1848

  doSetOperatorCompleted(pOperator);
  return NULL;
}

1849 1850
static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeInfo* pExchangeInfo,
                                           SExecTaskInfo* pTaskInfo) {
1851 1852 1853 1854
  int32_t code = 0;
  int64_t startTs = taosGetTimestampUs();
  size_t  totalSources = taosArrayGetSize(pExchangeInfo->pSources);

H
Haojun Liao 已提交
1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867
  int32_t completed = 0;
  for (int32_t k = 0; k < totalSources; ++k) {
    SSourceDataInfo* p = taosArrayGet(pExchangeInfo->pSourceDataInfo, k);
    if (p->status == EX_SOURCE_DATA_EXHAUSTED) {
      completed += 1;
    }
  }

  if (completed == totalSources) {
    setAllSourcesCompleted(pOperator, startTs);
    return;
  }

1868
  while (1) {
1869
//    printf("1\n");
1870
    tsem_wait(&pExchangeInfo->ready);
1871
//    printf("2\n");
1872

1873 1874
    for (int32_t i = 0; i < totalSources; ++i) {
      SSourceDataInfo* pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, i);
1875
      if (pDataInfo->status == EX_SOURCE_DATA_EXHAUSTED) {
1876
//        printf("========:%d is completed\n", i);
H
Haojun Liao 已提交
1877 1878
        continue;
      }
1879

H
Haojun Liao 已提交
1880
//      printf("index:%d -  status:%d\n", i, pDataInfo->status);
1881
      if (pDataInfo->status != EX_SOURCE_DATA_READY) {
1882
//        printf("-----------%d, status:%d, continue\n", i, pDataInfo->status);
1883 1884 1885
        continue;
      }

1886 1887 1888 1889 1890
      if (pDataInfo->code != TSDB_CODE_SUCCESS) {
        code = pDataInfo->code;
        goto _error;
      }

L
Liu Jicong 已提交
1891
      SRetrieveTableRsp*     pRsp = pDataInfo->pRsp;
X
Xiaoyu Wang 已提交
1892
      SDownstreamSourceNode* pSource = taosArrayGet(pExchangeInfo->pSources, i);
1893

1894
      // todo
H
Haojun Liao 已提交
1895
      SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo;
1896
      if (pRsp->numOfRows == 0) {
1897
        pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
H
Haojun Liao 已提交
1898
//        printf("%d completed, try next\n", i);
1899

1900
        qDebug("%s vgId:%d, taskId:0x%" PRIx64 " execId:%d index:%d completed, rowsOfSource:%" PRIu64
1901
               ", totalRows:%" PRIu64 ", completed:%d try next %d/%" PRIzu,
D
dapan1121 已提交
1902
               GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, i, pDataInfo->totalRows,
H
Haojun Liao 已提交
1903
               pExchangeInfo->loadInfo.totalRows, completed, i + 1, totalSources);
D
dapan1121 已提交
1904
        taosMemoryFreeClear(pDataInfo->pRsp);
1905

H
Haojun Liao 已提交
1906 1907 1908 1909 1910 1911
//        if (completed == totalSources) {
//          return;
//        } else {
//          break;
//        }
         break;
1912
      }
H
Haojun Liao 已提交
1913

1914
      SRetrieveTableRsp* pRetrieveRsp = pDataInfo->pRsp;
dengyihao's avatar
dengyihao 已提交
1915 1916 1917
      int32_t            index = 0;
      char*              pStart = pRetrieveRsp->data;
      while (index++ < pRetrieveRsp->numOfBlocks) {
H
Haojun Liao 已提交
1918
        printf("results, numOfBLock: %d\n", pRetrieveRsp->numOfBlocks);
1919
        SSDataBlock* pb = createOneDataBlock(pExchangeInfo->pDummyBlock, false);
H
Haojun Liao 已提交
1920
        code = extractDataBlockFromFetchRsp(pb, pStart, NULL, &pStart);
1921 1922 1923 1924 1925 1926
        if (code != 0) {
          taosMemoryFreeClear(pDataInfo->pRsp);
          goto _error;
        }

        taosArrayPush(pExchangeInfo->pResultBlockList, &pb);
1927 1928
      }

1929
      updateLoadRemoteInfo(pLoadInfo, pRetrieveRsp->numOfRows, pRetrieveRsp->compLen, startTs, pOperator);
1930

H
Haojun Liao 已提交
1931
//      int32_t completed = 0;
1932
      if (pRsp->completed == 1) {
1933 1934
        pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;

H
Haojun Liao 已提交
1935 1936 1937 1938 1939 1940
//        for (int32_t k = 0; k < totalSources; ++k) {
//          SSourceDataInfo* p = taosArrayGet(pExchangeInfo->pSourceDataInfo, k);
//          if (p->status == EX_SOURCE_DATA_EXHAUSTED) {
//            completed += 1;
//          }
//        }
1941

dengyihao's avatar
dengyihao 已提交
1942
        qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64
1943 1944
               " execId:%d index:%d completed, blocks:%d, numOfRows:%d, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64
               ", total:%.2f Kb, completed:%d try next %d/%" PRIzu,
H
Haojun Liao 已提交
1945 1946
               GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, i, pRsp->numOfBlocks,
               pRsp->numOfRows, pDataInfo->totalRows, pLoadInfo->totalRows, pLoadInfo->totalSize / 1024.0,
1947
               completed, i + 1, totalSources);
1948
      } else {
dengyihao's avatar
dengyihao 已提交
1949
        qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64
1950
               " execId:%d blocks:%d, numOfRows:%d, totalRows:%" PRIu64 ", total:%.2f Kb",
dengyihao's avatar
dengyihao 已提交
1951 1952
               GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pRsp->numOfBlocks,
               pRsp->numOfRows, pLoadInfo->totalRows, pLoadInfo->totalSize / 1024.0);
1953 1954
      }

1955 1956
      taosMemoryFreeClear(pDataInfo->pRsp);

1957 1958
      if (pDataInfo->status != EX_SOURCE_DATA_EXHAUSTED) {
        pDataInfo->status = EX_SOURCE_DATA_NOT_READY;
1959 1960
        code = doSendFetchDataRequest(pExchangeInfo, pTaskInfo, i);
        if (code != TSDB_CODE_SUCCESS) {
1961
          taosMemoryFreeClear(pDataInfo->pRsp);
1962 1963 1964 1965
          goto _error;
        }
      }

H
Haojun Liao 已提交
1966 1967 1968
//      if (completed == totalSources) {
//        setAllSourcesCompleted(pOperator, startTs);
//      }
1969

1970
      return;
1971 1972
    }

1973 1974 1975 1976 1977 1978 1979 1980
    int32_t completed = 0;
    for (int32_t k = 0; k < totalSources; ++k) {
      SSourceDataInfo* p = taosArrayGet(pExchangeInfo->pSourceDataInfo, k);
      if (p->status == EX_SOURCE_DATA_EXHAUSTED) {
        completed += 1;
      }
    }

1981
    if (completed == totalSources) {
1982
      return;
1983 1984 1985
    }
  }

1986
_error:
1987 1988 1989
  pTaskInfo->code = code;
}

L
Liu Jicong 已提交
1990 1991 1992
static int32_t prepareConcurrentlyLoad(SOperatorInfo* pOperator) {
  SExchangeInfo* pExchangeInfo = pOperator->info;
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
1993

L
Liu Jicong 已提交
1994
  size_t  totalSources = taosArrayGetSize(pExchangeInfo->pSources);
1995 1996 1997
  int64_t startTs = taosGetTimestampUs();

  // Asynchronously send all fetch requests to all sources.
L
Liu Jicong 已提交
1998
  for (int32_t i = 0; i < totalSources; ++i) {
1999 2000
    int32_t code = doSendFetchDataRequest(pExchangeInfo, pTaskInfo, i);
    if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2001 2002
      pTaskInfo->code = code;
      return code;
2003 2004 2005 2006
    }
  }

  int64_t endTs = taosGetTimestampUs();
2007
  qDebug("%s send all fetch requests to %" PRIzu " sources completed, elapsed:%.2fms", GET_TASKID(pTaskInfo),
X
Xiaoyu Wang 已提交
2008
         totalSources, (endTs - startTs) / 1000.0);
2009

2010
  pOperator->status = OP_RES_TO_RETURN;
H
Haojun Liao 已提交
2011
  pOperator->cost.openCost = taosGetTimestampUs() - startTs;
2012

2013
  tsem_wait(&pExchangeInfo->ready);
2014
  tsem_post(&pExchangeInfo->ready);
H
Haojun Liao 已提交
2015
  return TSDB_CODE_SUCCESS;
2016 2017
}

2018
static int32_t seqLoadRemoteData(SOperatorInfo* pOperator) {
L
Liu Jicong 已提交
2019 2020
  SExchangeInfo* pExchangeInfo = pOperator->info;
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
2021

L
Liu Jicong 已提交
2022
  size_t  totalSources = taosArrayGetSize(pExchangeInfo->pSources);
2023
  int64_t startTs = taosGetTimestampUs();
2024

L
Liu Jicong 已提交
2025
  while (1) {
2026
    if (pExchangeInfo->current >= totalSources) {
2027 2028
      setAllSourcesCompleted(pOperator, startTs);
      return TSDB_CODE_SUCCESS;
2029
    }
2030

2031 2032 2033
    doSendFetchDataRequest(pExchangeInfo, pTaskInfo, pExchangeInfo->current);
    tsem_wait(&pExchangeInfo->ready);

dengyihao's avatar
dengyihao 已提交
2034
    SSourceDataInfo*       pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, pExchangeInfo->current);
X
Xiaoyu Wang 已提交
2035
    SDownstreamSourceNode* pSource = taosArrayGet(pExchangeInfo->pSources, pExchangeInfo->current);
2036

H
Haojun Liao 已提交
2037
    if (pDataInfo->code != TSDB_CODE_SUCCESS) {
2038 2039
      qError("%s vgId:%d, taskID:0x%" PRIx64 " execId:%d error happens, code:%s", GET_TASKID(pTaskInfo),
             pSource->addr.nodeId, pSource->taskId, pSource->execId, tstrerror(pDataInfo->code));
H
Haojun Liao 已提交
2040
      pOperator->pTaskInfo->code = pDataInfo->code;
2041
      return pOperator->pTaskInfo->code;
H
Haojun Liao 已提交
2042 2043
    }

L
Liu Jicong 已提交
2044
    SRetrieveTableRsp*   pRsp = pDataInfo->pRsp;
H
Haojun Liao 已提交
2045
    SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo;
2046
    if (pRsp->numOfRows == 0) {
2047
      qDebug("%s vgId:%d, taskID:0x%" PRIx64 " execId:%d %d of total completed, rowsOfSource:%" PRIu64
2048
             ", totalRows:%" PRIu64 " try next",
D
dapan1121 已提交
2049
             GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pExchangeInfo->current + 1,
H
Haojun Liao 已提交
2050
             pDataInfo->totalRows, pLoadInfo->totalRows);
H
Haojun Liao 已提交
2051

2052
      pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
2053
      pExchangeInfo->current += 1;
D
dapan1121 已提交
2054
      taosMemoryFreeClear(pDataInfo->pRsp);
2055 2056
      continue;
    }
H
Haojun Liao 已提交
2057

2058 2059 2060
    SRetrieveTableRsp* pRetrieveRsp = pDataInfo->pRsp;

    char*   pStart = pRetrieveRsp->data;
H
Haojun Liao 已提交
2061
    int32_t code = extractDataBlockFromFetchRsp(NULL, pStart, NULL, &pStart);
2062 2063

    if (pRsp->completed == 1) {
D
dapan1121 已提交
2064
      qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d numOfRows:%d, rowsOfSource:%" PRIu64
2065
             ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 " try next %d/%" PRIzu,
2066
             GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pRetrieveRsp->numOfRows,
2067 2068
             pDataInfo->totalRows, pLoadInfo->totalRows, pLoadInfo->totalSize, pExchangeInfo->current + 1,
             totalSources);
2069

2070
      pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
2071 2072
      pExchangeInfo->current += 1;
    } else {
D
dapan1121 已提交
2073
      qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d numOfRows:%d, totalRows:%" PRIu64
2074
             ", totalBytes:%" PRIu64,
2075
             GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pRetrieveRsp->numOfRows,
2076
             pLoadInfo->totalRows, pLoadInfo->totalSize);
2077 2078
    }

2079 2080 2081
    updateLoadRemoteInfo(pLoadInfo, pRetrieveRsp->numOfRows, pRetrieveRsp->compLen, startTs, pOperator);
    pDataInfo->totalRows += pRetrieveRsp->numOfRows;

2082
    taosMemoryFreeClear(pDataInfo->pRsp);
2083
    return TSDB_CODE_SUCCESS;
2084
  }
2085 2086
}

L
Liu Jicong 已提交
2087
static int32_t prepareLoadRemoteData(SOperatorInfo* pOperator) {
2088
  if (OPTR_IS_OPENED(pOperator)) {
H
Haojun Liao 已提交
2089 2090 2091
    return TSDB_CODE_SUCCESS;
  }

2092 2093
  int64_t st = taosGetTimestampUs();

L
Liu Jicong 已提交
2094
  SExchangeInfo* pExchangeInfo = pOperator->info;
2095
  if (!pExchangeInfo->seqLoadData) {
H
Haojun Liao 已提交
2096 2097 2098 2099 2100 2101
    int32_t code = prepareConcurrentlyLoad(pOperator);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
  }

2102
  OPTR_SET_OPENED(pOperator);
2103
  pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0;
H
Haojun Liao 已提交
2104 2105 2106
  return TSDB_CODE_SUCCESS;
}

2107 2108 2109 2110 2111
static void freeBlock(void* pParam) {
  SSDataBlock* pBlock = *(SSDataBlock**)pParam;
  blockDataDestroy(pBlock);
}

2112
static SSDataBlock* doLoadRemoteDataImpl(SOperatorInfo* pOperator) {
L
Liu Jicong 已提交
2113 2114
  SExchangeInfo* pExchangeInfo = pOperator->info;
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
2115

2116
  pTaskInfo->code = pOperator->fpSet._openFn(pOperator);
2117
  if (pTaskInfo->code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2118 2119
    return NULL;
  }
2120

2121
  size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources);
H
Haojun Liao 已提交
2122

2123
  SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo;
2124
  if (pOperator->status == OP_EXEC_DONE) {
L
Liu Jicong 已提交
2125 2126 2127
    qDebug("%s all %" PRIzu " source(s) are exhausted, total rows:%" PRIu64 " bytes:%" PRIu64 ", elapsed:%.2f ms",
           GET_TASKID(pTaskInfo), totalSources, pLoadInfo->totalRows, pLoadInfo->totalSize,
           pLoadInfo->totalElapsed / 1000.0);
2128 2129 2130
    return NULL;
  }

2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143
  size_t size = taosArrayGetSize(pExchangeInfo->pResultBlockList);
  if (size == 0 || pExchangeInfo->rspBlockIndex >= size) {
    pExchangeInfo->rspBlockIndex = 0;
    taosArrayClearEx(pExchangeInfo->pResultBlockList, freeBlock);
    if (pExchangeInfo->seqLoadData) {
      seqLoadRemoteData(pOperator);
    } else {
      concurrentlyLoadRemoteDataImpl(pOperator, pExchangeInfo, pTaskInfo);
    }

    if (taosArrayGetSize(pExchangeInfo->pResultBlockList) == 0) {
      return NULL;
    }
2144
  }
2145 2146 2147

  // we have buffered retrieved datablock, return it directly
  return taosArrayGetP(pExchangeInfo->pResultBlockList, pExchangeInfo->rspBlockIndex++);
H
Haojun Liao 已提交
2148
}
2149

2150 2151 2152 2153 2154 2155 2156 2157
static SSDataBlock* doLoadRemoteData(SOperatorInfo* pOperator) {
  SExchangeInfo* pExchangeInfo = pOperator->info;
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;

  if (pOperator->status == OP_EXEC_DONE) {
    return NULL;
  }

L
Liu Jicong 已提交
2158
  while (1) {
2159 2160 2161 2162 2163 2164 2165
    SSDataBlock* pBlock = doLoadRemoteDataImpl(pOperator);
    if (pBlock == NULL) {
      return NULL;
    }

    SLimitInfo* pLimitInfo = &pExchangeInfo->limitInfo;
    if (hasLimitOffsetInfo(pLimitInfo)) {
2166
      int32_t status = handleLimitOffset(pOperator, pLimitInfo, pBlock, false);
2167 2168 2169
      if (status == PROJECT_RETRIEVE_CONTINUE) {
        continue;
      } else if (status == PROJECT_RETRIEVE_DONE) {
2170
        size_t rows = pBlock->info.rows;
2171 2172 2173 2174 2175 2176
        pExchangeInfo->limitInfo.numOfOutputRows += rows;

        if (rows == 0) {
          doSetOperatorCompleted(pOperator);
          return NULL;
        } else {
2177
          return pBlock;
2178 2179 2180
        }
      }
    } else {
2181
      return pBlock;
2182 2183 2184 2185
    }
  }
}

2186
static int32_t initDataSource(int32_t numOfSources, SExchangeInfo* pInfo, const char* id) {
2187
  pInfo->pSourceDataInfo = taosArrayInit(numOfSources, sizeof(SSourceDataInfo));
H
Haojun Liao 已提交
2188 2189
  if (pInfo->pSourceDataInfo == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
2190 2191
  }

L
Liu Jicong 已提交
2192
  for (int32_t i = 0; i < numOfSources; ++i) {
2193
    SSourceDataInfo dataInfo = {0};
H
Haojun Liao 已提交
2194
    dataInfo.status = EX_SOURCE_DATA_NOT_READY;
2195
    dataInfo.taskId = id;
L
Liu Jicong 已提交
2196
    dataInfo.index = i;
X
Xiaoyu Wang 已提交
2197
    SSourceDataInfo* pDs = taosArrayPush(pInfo->pSourceDataInfo, &dataInfo);
2198
    if (pDs == NULL) {
H
Haojun Liao 已提交
2199 2200 2201 2202 2203 2204 2205 2206
      taosArrayDestroy(pInfo->pSourceDataInfo);
      return TSDB_CODE_OUT_OF_MEMORY;
    }
  }

  return TSDB_CODE_SUCCESS;
}

2207
static int32_t initExchangeOperator(SExchangePhysiNode* pExNode, SExchangeInfo* pInfo, const char* id) {
2208
  size_t numOfSources = LIST_LENGTH(pExNode->pSrcEndPoints);
H
Haojun Liao 已提交
2209

2210
  if (numOfSources == 0) {
X
Xiaoyu Wang 已提交
2211
    qError("%s invalid number: %d of sources in exchange operator", id, (int32_t)numOfSources);
2212 2213 2214
    return TSDB_CODE_INVALID_PARA;
  }

H
Haojun Liao 已提交
2215
  pInfo->pSources = taosArrayInit(numOfSources, sizeof(SDownstreamSourceNode));
wmmhello's avatar
wmmhello 已提交
2216
  if (pInfo->pSources == NULL) {
2217
    return TSDB_CODE_OUT_OF_MEMORY;
H
Haojun Liao 已提交
2218 2219
  }

L
Liu Jicong 已提交
2220
  for (int32_t i = 0; i < numOfSources; ++i) {
D
dapan1121 已提交
2221
    SDownstreamSourceNode* pNode = (SDownstreamSourceNode*)nodesListGetNode((SNodeList*)pExNode->pSrcEndPoints, i);
H
Haojun Liao 已提交
2222 2223
    taosArrayPush(pInfo->pSources, pNode);
  }
2224

2225
  initLimitInfo(pExNode->node.pLimit, pExNode->node.pSlimit, &pInfo->limitInfo);
2226 2227
  pInfo->self = taosAddRef(exchangeObjRefPool, pInfo);

2228
  return initDataSource(numOfSources, pInfo, id);
2229 2230 2231 2232 2233 2234
}

SOperatorInfo* createExchangeOperatorInfo(void* pTransporter, SExchangePhysiNode* pExNode, SExecTaskInfo* pTaskInfo) {
  SExchangeInfo* pInfo = taosMemoryCalloc(1, sizeof(SExchangeInfo));
  SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
  if (pInfo == NULL || pOperator == NULL) {
H
Haojun Liao 已提交
2235
    goto _error;
2236
  }
H
Haojun Liao 已提交
2237

2238
  int32_t code = initExchangeOperator(pExNode, pInfo, GET_TASKID(pTaskInfo));
2239 2240 2241
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
2242 2243

  tsem_init(&pInfo->ready, 0, 0);
2244 2245
  pInfo->pDummyBlock = createResDataBlock(pExNode->node.pOutputDataBlockDesc);
  pInfo->pResultBlockList = taosArrayInit(1, POINTER_BYTES);
2246

2247
  pInfo->seqLoadData = false;
2248
  pInfo->pTransporter = pTransporter;
2249

2250
  pOperator->name = "ExchangeOperator";
X
Xiaoyu Wang 已提交
2251
  pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_EXCHANGE;
X
Xiaoyu Wang 已提交
2252
  pOperator->blocking = false;
2253 2254
  pOperator->status = OP_NOT_OPENED;
  pOperator->info = pInfo;
2255
  pOperator->exprSupp.numOfExprs = taosArrayGetSize(pInfo->pDummyBlock->pDataBlock);
X
Xiaoyu Wang 已提交
2256
  pOperator->pTaskInfo = pTaskInfo;
2257

5
54liuyao 已提交
2258 2259
  pOperator->fpSet =
      createOperatorFpSet(prepareLoadRemoteData, doLoadRemoteData, NULL, NULL, destroyExchangeOperatorInfo, NULL);
2260
  return pOperator;
H
Haojun Liao 已提交
2261

2262
_error:
H
Haojun Liao 已提交
2263
  if (pInfo != NULL) {
2264
    doDestroyExchangeOperatorInfo(pInfo);
H
Haojun Liao 已提交
2265 2266
  }

wafwerar's avatar
wafwerar 已提交
2267
  taosMemoryFreeClear(pOperator);
2268
  pTaskInfo->code = code;
H
Haojun Liao 已提交
2269
  return NULL;
2270 2271
}

dengyihao's avatar
dengyihao 已提交
2272 2273
static int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, size_t keyBufSize,
                                const char* pKey);
2274

L
Liu Jicong 已提交
2275
static bool needToMerge(SSDataBlock* pBlock, SArray* groupInfo, char** buf, int32_t rowIndex) {
2276 2277 2278 2279
  size_t size = taosArrayGetSize(groupInfo);
  if (size == 0) {
    return true;
  }
2280

2281 2282
  for (int32_t i = 0; i < size; ++i) {
    int32_t* index = taosArrayGet(groupInfo, i);
2283

2284
    SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, *index);
L
Liu Jicong 已提交
2285
    bool             isNull = colDataIsNull(pColInfo, rowIndex, pBlock->info.rows, NULL);
2286

2287 2288 2289
    if ((isNull && buf[i] != NULL) || (!isNull && buf[i] == NULL)) {
      return false;
    }
2290

2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303
    char* pCell = colDataGetData(pColInfo, rowIndex);
    if (IS_VAR_DATA_TYPE(pColInfo->info.type)) {
      if (varDataLen(pCell) != varDataLen(buf[i])) {
        return false;
      } else {
        if (memcmp(varDataVal(pCell), varDataVal(buf[i]), varDataLen(pCell)) != 0) {
          return false;
        }
      }
    } else {
      if (memcmp(pCell, buf[i], pColInfo->info.bytes) != 0) {
        return false;
      }
2304 2305 2306
    }
  }

2307
  return 0;
2308 2309
}

2310
static bool saveCurrentTuple(char** rowColData, SArray* pColumnList, SSDataBlock* pBlock, int32_t rowIndex) {
L
Liu Jicong 已提交
2311
  int32_t size = (int32_t)taosArrayGetSize(pColumnList);
2312

L
Liu Jicong 已提交
2313 2314
  for (int32_t i = 0; i < size; ++i) {
    int32_t*         index = taosArrayGet(pColumnList, i);
2315
    SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, *index);
H
Haojun Liao 已提交
2316

2317 2318 2319
    char* data = colDataGetData(pColInfo, rowIndex);
    memcpy(rowColData[i], data, colDataGetLength(pColInfo, rowIndex));
  }
2320

2321 2322
  return true;
}
2323

X
Xiaoyu Wang 已提交
2324
int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scanFlag) {
2325
  // todo add more information about exchange operation
2326
  int32_t type = pOperator->operatorType;
X
Xiaoyu Wang 已提交
2327
  if (type == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE || type == QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN ||
2328
      type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN ||
2329
      type == QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN) {
2330 2331 2332
    *order = TSDB_ORDER_ASC;
    *scanFlag = MAIN_SCAN;
    return TSDB_CODE_SUCCESS;
2333
  } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) {
2334 2335 2336 2337
    STableScanInfo* pTableScanInfo = pOperator->info;
    *order = pTableScanInfo->cond.order;
    *scanFlag = pTableScanInfo->scanFlag;
    return TSDB_CODE_SUCCESS;
2338 2339 2340 2341 2342
  } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN) {
    STableMergeScanInfo* pTableScanInfo = pOperator->info;
    *order = pTableScanInfo->cond.order;
    *scanFlag = pTableScanInfo->scanFlag;
    return TSDB_CODE_SUCCESS;
2343
  } else {
H
Haojun Liao 已提交
2344
    if (pOperator->pDownstream == NULL || pOperator->pDownstream[0] == NULL) {
2345
      return TSDB_CODE_INVALID_PARA;
H
Haojun Liao 已提交
2346
    } else {
2347
      return getTableScanInfo(pOperator->pDownstream[0], order, scanFlag);
2348 2349 2350
    }
  }
}
2351

2352
// this is a blocking operator
L
Liu Jicong 已提交
2353
static int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) {
H
Haojun Liao 已提交
2354 2355
  if (OPTR_IS_OPENED(pOperator)) {
    return TSDB_CODE_SUCCESS;
2356 2357
  }

H
Haojun Liao 已提交
2358
  SExecTaskInfo*    pTaskInfo = pOperator->pTaskInfo;
2359
  SAggOperatorInfo* pAggInfo = pOperator->info;
H
Haojun Liao 已提交
2360

2361 2362
  SExprSupp*     pSup = &pOperator->exprSupp;
  SOperatorInfo* downstream = pOperator->pDownstream[0];
2363

2364 2365
  int64_t st = taosGetTimestampUs();

2366 2367 2368
  int32_t order = TSDB_ORDER_ASC;
  int32_t scanFlag = MAIN_SCAN;

H
Haojun Liao 已提交
2369
  while (1) {
2370
    SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream);
2371 2372 2373 2374
    if (pBlock == NULL) {
      break;
    }

2375 2376
    int32_t code = getTableScanInfo(pOperator, &order, &scanFlag);
    if (code != TSDB_CODE_SUCCESS) {
2377
      T_LONG_JMP(pTaskInfo->env, code);
2378
    }
2379

2380
    // there is an scalar expression that needs to be calculated before apply the group aggregation.
2381 2382 2383
    if (pAggInfo->scalarExprSup.pExprInfo != NULL) {
      SExprSupp* pSup1 = &pAggInfo->scalarExprSup;
      code = projectApplyFunctions(pSup1->pExprInfo, pBlock, pBlock, pSup1->pCtx, pSup1->numOfExprs, NULL);
2384
      if (code != TSDB_CODE_SUCCESS) {
2385
        T_LONG_JMP(pTaskInfo->env, code);
2386
      }
2387 2388
    }

2389
    // the pDataBlock are always the same one, no need to call this again
2390
    setExecutionContext(pOperator, pOperator->exprSupp.numOfExprs, pBlock->info.groupId);
2391
    setInputDataBlock(pSup, pBlock, order, scanFlag, true);
2392
    code = doAggregateImpl(pOperator, pSup->pCtx);
2393
    if (code != 0) {
2394
      T_LONG_JMP(pTaskInfo->env, code);
2395
    }
2396 2397
  }

2398
  initGroupedResultInfo(&pAggInfo->groupResInfo, pAggInfo->aggSup.pResultRowHashTable, 0);
H
Haojun Liao 已提交
2399
  OPTR_SET_OPENED(pOperator);
2400

2401
  pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0;
H
Haojun Liao 已提交
2402 2403 2404
  return TSDB_CODE_SUCCESS;
}

2405
static SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) {
L
Liu Jicong 已提交
2406
  SAggOperatorInfo* pAggInfo = pOperator->info;
H
Haojun Liao 已提交
2407 2408 2409 2410 2411 2412
  SOptrBasicInfo*   pInfo = &pAggInfo->binfo;

  if (pOperator->status == OP_EXEC_DONE) {
    return NULL;
  }

L
Liu Jicong 已提交
2413
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
2414
  pTaskInfo->code = pOperator->fpSet._openFn(pOperator);
H
Haojun Liao 已提交
2415
  if (pTaskInfo->code != TSDB_CODE_SUCCESS) {
2416
    doSetOperatorCompleted(pOperator);
H
Haojun Liao 已提交
2417 2418 2419
    return NULL;
  }

H
Haojun Liao 已提交
2420
  blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity);
S
slzhou 已提交
2421 2422
  while (1) {
    doBuildResultDatablock(pOperator, pInfo, &pAggInfo->groupResInfo, pAggInfo->aggSup.pResultBuf);
H
Haojun Liao 已提交
2423
    doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL);
S
slzhou 已提交
2424

2425
    if (!hasRemainResults(&pAggInfo->groupResInfo)) {
S
slzhou 已提交
2426 2427 2428
      doSetOperatorCompleted(pOperator);
      break;
    }
2429

S
slzhou 已提交
2430 2431 2432 2433
    if (pInfo->pRes->info.rows > 0) {
      break;
    }
  }
2434

2435
  size_t rows = blockDataGetNumOfRows(pInfo->pRes);
2436 2437
  pOperator->resultInfo.totalRows += rows;

2438
  return (rows == 0) ? NULL : pInfo->pRes;
2439 2440
}

wmmhello's avatar
wmmhello 已提交
2441
int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* length) {
2442
  if (result == NULL || length == NULL) {
wmmhello's avatar
wmmhello 已提交
2443 2444 2445
    return TSDB_CODE_TSC_INVALID_INPUT;
  }
  SOptrBasicInfo* pInfo = (SOptrBasicInfo*)(pOperator->info);
2446
  SAggSupporter*  pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo));
2447
  int32_t         size = tSimpleHashGetSize(pSup->pResultRowHashTable);
2448 2449 2450
  size_t          keyLen = sizeof(uint64_t) * 2;  // estimate the key length
  int32_t         totalSize =
      sizeof(int32_t) + sizeof(int32_t) + size * (sizeof(int32_t) + keyLen + sizeof(int32_t) + pSup->resultRowSize);
wmmhello's avatar
wmmhello 已提交
2451

C
Cary Xu 已提交
2452 2453 2454 2455 2456 2457
  // no result
  if (getTotalBufSize(pSup->pResultBuf) == 0) {
    *result = NULL;
    *length = 0;
    return TSDB_CODE_SUCCESS;
  }
2458

wmmhello's avatar
wmmhello 已提交
2459
  *result = (char*)taosMemoryCalloc(1, totalSize);
L
Liu Jicong 已提交
2460
  if (*result == NULL) {
wmmhello's avatar
wmmhello 已提交
2461
    return TSDB_CODE_OUT_OF_MEMORY;
wmmhello's avatar
wmmhello 已提交
2462
  }
wmmhello's avatar
wmmhello 已提交
2463

wmmhello's avatar
wmmhello 已提交
2464
  int32_t offset = sizeof(int32_t);
wmmhello's avatar
wmmhello 已提交
2465 2466
  *(int32_t*)(*result + offset) = size;
  offset += sizeof(int32_t);
2467 2468

  // prepare memory
2469
  SResultRowPosition* pos = &pInfo->resultRowInfo.cur;
dengyihao's avatar
dengyihao 已提交
2470 2471
  void*               pPage = getBufPage(pSup->pResultBuf, pos->pageId);
  SResultRow*         pRow = (SResultRow*)((char*)pPage + pos->offset);
2472 2473
  setBufPageDirty(pPage, true);
  releaseBufPage(pSup->pResultBuf, pPage);
L
Liu Jicong 已提交
2474

2475 2476 2477 2478
  int32_t iter = 0;
  void*   pIter = NULL;
  while ((pIter = tSimpleHashIterate(pSup->pResultRowHashTable, pIter, &iter))) {
    void*               key = tSimpleHashGetKey(pIter, &keyLen);
2479
    SResultRowPosition* p1 = (SResultRowPosition*)pIter;
2480

dengyihao's avatar
dengyihao 已提交
2481
    pPage = (SFilePage*)getBufPage(pSup->pResultBuf, p1->pageId);
2482
    pRow = (SResultRow*)((char*)pPage + p1->offset);
2483 2484
    setBufPageDirty(pPage, true);
    releaseBufPage(pSup->pResultBuf, pPage);
wmmhello's avatar
wmmhello 已提交
2485 2486 2487

    // recalculate the result size
    int32_t realTotalSize = offset + sizeof(int32_t) + keyLen + sizeof(int32_t) + pSup->resultRowSize;
L
Liu Jicong 已提交
2488
    if (realTotalSize > totalSize) {
wmmhello's avatar
wmmhello 已提交
2489
      char* tmp = (char*)taosMemoryRealloc(*result, realTotalSize);
L
Liu Jicong 已提交
2490
      if (tmp == NULL) {
wafwerar's avatar
wafwerar 已提交
2491
        taosMemoryFree(*result);
wmmhello's avatar
wmmhello 已提交
2492
        *result = NULL;
wmmhello's avatar
wmmhello 已提交
2493
        return TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
2494
      } else {
wmmhello's avatar
wmmhello 已提交
2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506
        *result = tmp;
      }
    }
    // save key
    *(int32_t*)(*result + offset) = keyLen;
    offset += sizeof(int32_t);
    memcpy(*result + offset, key, keyLen);
    offset += keyLen;

    // save value
    *(int32_t*)(*result + offset) = pSup->resultRowSize;
    offset += sizeof(int32_t);
2507
    memcpy(*result + offset, pRow, pSup->resultRowSize);
wmmhello's avatar
wmmhello 已提交
2508 2509 2510
    offset += pSup->resultRowSize;
  }

wmmhello's avatar
wmmhello 已提交
2511 2512 2513 2514
  *(int32_t*)(*result) = offset;
  *length = offset;

  return TDB_CODE_SUCCESS;
wmmhello's avatar
wmmhello 已提交
2515 2516
}

2517 2518 2519 2520 2521
int32_t handleLimitOffset(SOperatorInfo* pOperator, SLimitInfo* pLimitInfo, SSDataBlock* pBlock, bool holdDataInBuf) {
  if (pLimitInfo->remainGroupOffset > 0) {
    if (pLimitInfo->currentGroupId == 0) {  // it is the first group
      pLimitInfo->currentGroupId = pBlock->info.groupId;
      blockDataCleanup(pBlock);
2522
      return PROJECT_RETRIEVE_CONTINUE;
2523 2524 2525
    } else if (pLimitInfo->currentGroupId != pBlock->info.groupId) {
      // now it is the data from a new group
      pLimitInfo->remainGroupOffset -= 1;
2526 2527

      // ignore data block in current group
2528 2529
      if (pLimitInfo->remainGroupOffset > 0) {
        blockDataCleanup(pBlock);
2530 2531 2532 2533 2534
        return PROJECT_RETRIEVE_CONTINUE;
      }
    }

    // set current group id of the project operator
2535
    pLimitInfo->currentGroupId = pBlock->info.groupId;
2536 2537
  }

2538
  // here check for a new group data, we need to handle the data of the previous group.
2539 2540 2541
  if (pLimitInfo->currentGroupId != 0 && pLimitInfo->currentGroupId != pBlock->info.groupId) {
    pLimitInfo->numOfOutputGroups += 1;
    if ((pLimitInfo->slimit.limit > 0) && (pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups)) {
2542
      pOperator->status = OP_EXEC_DONE;
2543
      blockDataCleanup(pBlock);
2544 2545 2546 2547 2548

      return PROJECT_RETRIEVE_DONE;
    }

    // reset the value for a new group data
2549 2550
    pLimitInfo->numOfOutputRows = 0;
    pLimitInfo->remainOffset = pLimitInfo->limit.offset;
2551 2552 2553 2554 2555

    // existing rows that belongs to previous group.
    if (pBlock->info.rows > 0) {
      return PROJECT_RETRIEVE_DONE;
    }
2556 2557 2558 2559 2560
  }

  // here we reach the start position, according to the limit/offset requirements.

  // set current group id
2561
  pLimitInfo->currentGroupId = pBlock->info.groupId;
2562

2563 2564 2565
  if (pLimitInfo->remainOffset >= pBlock->info.rows) {
    pLimitInfo->remainOffset -= pBlock->info.rows;
    blockDataCleanup(pBlock);
2566
    return PROJECT_RETRIEVE_CONTINUE;
2567 2568 2569
  } else if (pLimitInfo->remainOffset < pBlock->info.rows && pLimitInfo->remainOffset > 0) {
    blockDataTrimFirstNRows(pBlock, pLimitInfo->remainOffset);
    pLimitInfo->remainOffset = 0;
2570 2571
  }

2572
  // check for the limitation in each group
2573 2574 2575 2576
  if (pLimitInfo->limit.limit >= 0 && pLimitInfo->numOfOutputRows + pBlock->info.rows >= pLimitInfo->limit.limit) {
    int32_t keepRows = (int32_t)(pLimitInfo->limit.limit - pLimitInfo->numOfOutputRows);
    blockDataKeepFirstNRows(pBlock, keepRows);
    if (pLimitInfo->slimit.limit > 0 && pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups) {
2577 2578 2579
      pOperator->status = OP_EXEC_DONE;
    }

2580
    return PROJECT_RETRIEVE_DONE;
2581
  }
2582

2583
  // todo optimize performance
2584 2585
  // If there are slimit/soffset value exists, multi-round result can not be packed into one group, since the
  // they may not belong to the same group the limit/offset value is not valid in this case.
2586 2587
  if ((!holdDataInBuf) || (pBlock->info.rows >= pOperator->resultInfo.threshold) || pLimitInfo->slimit.offset != -1 ||
      pLimitInfo->slimit.limit != -1) {
2588
    return PROJECT_RETRIEVE_DONE;
L
Liu Jicong 已提交
2589
  } else {  // not full enough, continue to accumulate the output data in the buffer.
2590 2591 2592 2593
    return PROJECT_RETRIEVE_CONTINUE;
  }
}

2594
static void doApplyScalarCalculation(SOperatorInfo* pOperator, SSDataBlock* pBlock, int32_t order, int32_t scanFlag);
L
Liu Jicong 已提交
2595 2596
static void doHandleRemainBlockForNewGroupImpl(SOperatorInfo* pOperator, SFillOperatorInfo* pInfo,
                                               SResultInfo* pResultInfo, SExecTaskInfo* pTaskInfo) {
2597
  pInfo->totalInputRows = pInfo->existNewGroupBlock->info.rows;
2598 2599 2600 2601 2602
  SSDataBlock* pResBlock = pInfo->pFinalRes;

  int32_t order = TSDB_ORDER_ASC;
  int32_t scanFlag = MAIN_SCAN;
  getTableScanInfo(pOperator, &order, &scanFlag);
H
Haojun Liao 已提交
2603

L
Liu Jicong 已提交
2604 2605
  int64_t ekey =
      Q_STATUS_EQUAL(pTaskInfo->status, TASK_COMPLETED) ? pInfo->win.ekey : pInfo->existNewGroupBlock->info.window.ekey;
2606 2607
  taosResetFillInfo(pInfo->pFillInfo, getFillInfoStart(pInfo->pFillInfo));

2608
  blockDataCleanup(pInfo->pRes);
2609 2610 2611 2612
  doApplyScalarCalculation(pOperator, pInfo->existNewGroupBlock, order, scanFlag);

  taosFillSetStartInfo(pInfo->pFillInfo, pInfo->pRes->info.rows, ekey);
  taosFillSetInputDataBlock(pInfo->pFillInfo, pInfo->pRes);
2613

2614 2615
  int32_t numOfResultRows = pResultInfo->capacity - pResBlock->info.rows;
  taosFillResultDataBlock(pInfo->pFillInfo, pResBlock, numOfResultRows);
H
Haojun Liao 已提交
2616

2617
  pInfo->curGroupId = pInfo->existNewGroupBlock->info.groupId;
2618 2619 2620
  pInfo->existNewGroupBlock = NULL;
}

L
Liu Jicong 已提交
2621 2622
static void doHandleRemainBlockFromNewGroup(SOperatorInfo* pOperator, SFillOperatorInfo* pInfo,
                                            SResultInfo* pResultInfo, SExecTaskInfo* pTaskInfo) {
2623
  if (taosFillHasMoreResults(pInfo->pFillInfo)) {
H
Haojun Liao 已提交
2624 2625
    int32_t numOfResultRows = pResultInfo->capacity - pInfo->pFinalRes->info.rows;
    taosFillResultDataBlock(pInfo->pFillInfo, pInfo->pFinalRes, numOfResultRows);
2626 2627
    pInfo->pRes->info.groupId = pInfo->curGroupId;
    return;
2628 2629 2630 2631
  }

  // handle the cached new group data block
  if (pInfo->existNewGroupBlock) {
2632 2633 2634 2635 2636 2637
    doHandleRemainBlockForNewGroupImpl(pOperator, pInfo, pResultInfo, pTaskInfo);
  }
}

static void doApplyScalarCalculation(SOperatorInfo* pOperator, SSDataBlock* pBlock, int32_t order, int32_t scanFlag) {
  SFillOperatorInfo* pInfo = pOperator->info;
L
Liu Jicong 已提交
2638
  SExprSupp*         pSup = &pOperator->exprSupp;
2639
  setInputDataBlock(pSup, pBlock, order, scanFlag, false);
2640 2641
  projectApplyFunctions(pSup->pExprInfo, pInfo->pRes, pBlock, pSup->pCtx, pSup->numOfExprs, NULL);

2642 2643 2644 2645
  // reset the row value before applying the no-fill functions to the input data block, which is "pBlock" in this case.
  pInfo->pRes->info.rows = 0;
  SExprSupp* pNoFillSupp = &pInfo->noFillExprSupp;
  setInputDataBlock(pNoFillSupp, pBlock, order, scanFlag, false);
2646

2647 2648
  projectApplyFunctions(pNoFillSupp->pExprInfo, pInfo->pRes, pBlock, pNoFillSupp->pCtx, pNoFillSupp->numOfExprs, NULL);
  pInfo->pRes->info.groupId = pBlock->info.groupId;
2649 2650
}

S
slzhou 已提交
2651
static SSDataBlock* doFillImpl(SOperatorInfo* pOperator) {
L
Liu Jicong 已提交
2652 2653
  SFillOperatorInfo* pInfo = pOperator->info;
  SExecTaskInfo*     pTaskInfo = pOperator->pTaskInfo;
2654

H
Haojun Liao 已提交
2655
  SResultInfo* pResultInfo = &pOperator->resultInfo;
H
Haojun Liao 已提交
2656
  SSDataBlock* pResBlock = pInfo->pFinalRes;
2657 2658

  blockDataCleanup(pResBlock);
2659

H
Haojun Liao 已提交
2660 2661
  int32_t order = TSDB_ORDER_ASC;
  int32_t scanFlag = MAIN_SCAN;
2662
  getTableScanInfo(pOperator, &order, &scanFlag);
2663

2664
  doHandleRemainBlockFromNewGroup(pOperator, pInfo, pResultInfo, pTaskInfo);
2665
  if (pResBlock->info.rows > 0) {
2666
    pResBlock->info.groupId = pInfo->curGroupId;
2667
    return pResBlock;
H
Haojun Liao 已提交
2668
  }
2669

H
Haojun Liao 已提交
2670
  SOperatorInfo* pDownstream = pOperator->pDownstream[0];
L
Liu Jicong 已提交
2671
  while (1) {
2672
    SSDataBlock* pBlock = pDownstream->fpSet.getNextFn(pDownstream);
2673 2674
    if (pBlock == NULL) {
      if (pInfo->totalInputRows == 0) {
2675
        doSetOperatorCompleted(pOperator);
2676 2677
        return NULL;
      }
2678

2679
      taosFillSetStartInfo(pInfo->pFillInfo, 0, pInfo->win.ekey);
2680
    } else {
2681
      blockDataUpdateTsWindow(pBlock, pInfo->primarySrcSlotId);
2682 2683

      blockDataCleanup(pInfo->pRes);
2684 2685
      blockDataEnsureCapacity(pInfo->pRes, pBlock->info.rows);
      blockDataEnsureCapacity(pInfo->pFinalRes, pBlock->info.rows);
2686
      doApplyScalarCalculation(pOperator, pBlock, order, scanFlag);
2687

H
Haojun Liao 已提交
2688 2689 2690
      if (pInfo->curGroupId == 0 || pInfo->curGroupId == pInfo->pRes->info.groupId) {
        pInfo->curGroupId = pInfo->pRes->info.groupId;  // the first data block
        pInfo->totalInputRows += pInfo->pRes->info.rows;
2691

H
Haojun Liao 已提交
2692 2693 2694 2695 2696
        if (order == pInfo->pFillInfo->order) {
          taosFillSetStartInfo(pInfo->pFillInfo, pInfo->pRes->info.rows, pBlock->info.window.ekey);
        } else {
          taosFillSetStartInfo(pInfo->pFillInfo, pInfo->pRes->info.rows, pBlock->info.window.skey);
        }
H
Haojun Liao 已提交
2697
        taosFillSetInputDataBlock(pInfo->pFillInfo, pInfo->pRes);
L
Liu Jicong 已提交
2698
      } else if (pInfo->curGroupId != pBlock->info.groupId) {  // the new group data block
2699 2700 2701 2702 2703
        pInfo->existNewGroupBlock = pBlock;

        // Fill the previous group data block, before handle the data block of new group.
        // Close the fill operation for previous group data block
        taosFillSetStartInfo(pInfo->pFillInfo, 0, pInfo->win.ekey);
2704 2705 2706
      }
    }

2707 2708
    int32_t numOfResultRows = pOperator->resultInfo.capacity - pResBlock->info.rows;
    taosFillResultDataBlock(pInfo->pFillInfo, pResBlock, numOfResultRows);
2709 2710

    // current group has no more result to return
2711
    if (pResBlock->info.rows > 0) {
2712 2713
      // 1. The result in current group not reach the threshold of output result, continue
      // 2. If multiple group results existing in one SSDataBlock is not allowed, return immediately
2714
      if (pResBlock->info.rows > pResultInfo->threshold || pBlock == NULL || pInfo->existNewGroupBlock != NULL) {
2715
        pResBlock->info.groupId = pInfo->curGroupId;
2716
        return pResBlock;
2717 2718
      }

2719
      doHandleRemainBlockFromNewGroup(pOperator, pInfo, pResultInfo, pTaskInfo);
2720
      if (pResBlock->info.rows >= pOperator->resultInfo.threshold || pBlock == NULL) {
2721
        pResBlock->info.groupId = pInfo->curGroupId;
2722
        return pResBlock;
2723 2724 2725
      }
    } else if (pInfo->existNewGroupBlock) {  // try next group
      assert(pBlock != NULL);
2726 2727 2728 2729

      blockDataCleanup(pResBlock);

      doHandleRemainBlockForNewGroupImpl(pOperator, pInfo, pResultInfo, pTaskInfo);
2730
      if (pResBlock->info.rows > pResultInfo->threshold) {
2731
        pResBlock->info.groupId = pInfo->curGroupId;
2732
        return pResBlock;
2733 2734 2735 2736 2737 2738 2739
      }
    } else {
      return NULL;
    }
  }
}

S
slzhou 已提交
2740 2741 2742 2743 2744 2745 2746 2747
static SSDataBlock* doFill(SOperatorInfo* pOperator) {
  SFillOperatorInfo* pInfo = pOperator->info;
  SExecTaskInfo*     pTaskInfo = pOperator->pTaskInfo;

  if (pOperator->status == OP_EXEC_DONE) {
    return NULL;
  }

S
slzhou 已提交
2748
  SSDataBlock* fillResult = NULL;
S
slzhou 已提交
2749
  while (true) {
S
slzhou 已提交
2750
    fillResult = doFillImpl(pOperator);
S
slzhou 已提交
2751 2752 2753 2754 2755
    if (fillResult == NULL) {
      doSetOperatorCompleted(pOperator);
      break;
    }

H
Haojun Liao 已提交
2756
    doFilter(fillResult, pOperator->exprSupp.pFilterInfo, &pInfo->matchInfo );
S
slzhou 已提交
2757 2758 2759 2760 2761
    if (fillResult->info.rows > 0) {
      break;
    }
  }

S
slzhou 已提交
2762
  if (fillResult != NULL) {
2763
    pOperator->resultInfo.totalRows += fillResult->info.rows;
S
slzhou 已提交
2764
  }
S
slzhou 已提交
2765

S
slzhou 已提交
2766
  return fillResult;
S
slzhou 已提交
2767 2768
}

2769
void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs) {
C
Cary Xu 已提交
2770 2771 2772 2773 2774
  for (int32_t i = 0; i < numOfExprs; ++i) {
    SExprInfo* pExprInfo = &pExpr[i];
    for (int32_t j = 0; j < pExprInfo->base.numOfParams; ++j) {
      if (pExprInfo->base.pParam[j].type == FUNC_PARAM_TYPE_COLUMN) {
        taosMemoryFreeClear(pExprInfo->base.pParam[j].pCol);
H
Haojun Liao 已提交
2775
      }
2776
    }
C
Cary Xu 已提交
2777 2778 2779

    taosMemoryFree(pExprInfo->base.pParam);
    taosMemoryFree(pExprInfo->pExpr);
H
Haojun Liao 已提交
2780 2781 2782
  }
}

2783 2784 2785 2786 2787
static void destroyOperatorInfo(SOperatorInfo* pOperator) {
  if (pOperator == NULL) {
    return;
  }

2788
  if (pOperator->fpSet.closeFn != NULL) {
2789
    pOperator->fpSet.closeFn(pOperator->info);
2790 2791
  }

H
Haojun Liao 已提交
2792
  if (pOperator->pDownstream != NULL) {
L
Liu Jicong 已提交
2793
    for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
H
Haojun Liao 已提交
2794
      destroyOperatorInfo(pOperator->pDownstream[i]);
2795 2796
    }

wafwerar's avatar
wafwerar 已提交
2797
    taosMemoryFreeClear(pOperator->pDownstream);
H
Haojun Liao 已提交
2798
    pOperator->numOfDownstream = 0;
2799 2800
  }

2801
  cleanupExprSupp(&pOperator->exprSupp);
wafwerar's avatar
wafwerar 已提交
2802
  taosMemoryFreeClear(pOperator);
2803 2804
}

2805 2806 2807 2808 2809 2810
int32_t getBufferPgSize(int32_t rowSize, uint32_t* defaultPgsz, uint32_t* defaultBufsz) {
  *defaultPgsz = 4096;
  while (*defaultPgsz < rowSize * 4) {
    *defaultPgsz <<= 1u;
  }

2811
  // The default buffer for each operator in query is 10MB.
2812
  // at least four pages need to be in buffer
2813 2814
  // TODO: make this variable to be configurable.
  *defaultBufsz = 4096 * 2560;
2815 2816 2817 2818 2819 2820 2821
  if ((*defaultBufsz) <= (*defaultPgsz)) {
    (*defaultBufsz) = (*defaultPgsz) * 4;
  }

  return 0;
}

dengyihao's avatar
dengyihao 已提交
2822 2823
int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, size_t keyBufSize,
                         const char* pKey) {
L
Liu Jicong 已提交
2824
  int32_t    code = 0;
2825 2826
  _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY);

2827
  pAggSup->currentPageId = -1;
dengyihao's avatar
dengyihao 已提交
2828 2829
  pAggSup->resultRowSize = getResultRowSize(pCtx, numOfOutput);
  pAggSup->keyBuf = taosMemoryCalloc(1, keyBufSize + POINTER_BYTES + sizeof(int64_t));
2830
  pAggSup->pResultRowHashTable = tSimpleHashInit(10, hashFn);
2831

H
Haojun Liao 已提交
2832
  if (pAggSup->keyBuf == NULL || pAggSup->pResultRowHashTable == NULL) {
2833 2834 2835
    return TSDB_CODE_OUT_OF_MEMORY;
  }

dengyihao's avatar
dengyihao 已提交
2836
  uint32_t defaultPgsz = 0;
2837 2838
  uint32_t defaultBufsz = 0;
  getBufferPgSize(pAggSup->resultRowSize, &defaultPgsz, &defaultBufsz);
H
Haojun Liao 已提交
2839

wafwerar's avatar
wafwerar 已提交
2840
  if (!osTempSpaceAvailable()) {
H
Haojun Liao 已提交
2841 2842 2843
    code = TSDB_CODE_NO_AVAIL_DISK;
    qError("Init stream agg supporter failed since %s, %s", terrstr(code), pKey);
    return code;
wafwerar's avatar
wafwerar 已提交
2844
  }
2845

H
Haojun Liao 已提交
2846
  code = createDiskbasedBuf(&pAggSup->pResultBuf, defaultPgsz, defaultBufsz, pKey, tsTempDir);
H
Haojun Liao 已提交
2847
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2848
    qError("Create agg result buf failed since %s, %s", tstrerror(code), pKey);
H
Haojun Liao 已提交
2849 2850 2851
    return code;
  }

H
Haojun Liao 已提交
2852
  return code;
2853 2854
}

2855
void cleanupAggSup(SAggSupporter* pAggSup) {
wafwerar's avatar
wafwerar 已提交
2856
  taosMemoryFreeClear(pAggSup->keyBuf);
2857
  tSimpleHashCleanup(pAggSup->pResultRowHashTable);
H
Haojun Liao 已提交
2858
  destroyDiskbasedBuf(pAggSup->pResultBuf);
2859 2860
}

L
Liu Jicong 已提交
2861 2862
int32_t initAggInfo(SExprSupp* pSup, SAggSupporter* pAggSup, SExprInfo* pExprInfo, int32_t numOfCols, size_t keyBufSize,
                    const char* pkey) {
2863 2864 2865 2866 2867
  int32_t code = initExprSupp(pSup, pExprInfo, numOfCols);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

2868 2869 2870 2871 2872
  code = doInitAggInfoSup(pAggSup, pSup->pCtx, numOfCols, keyBufSize, pkey);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

L
Liu Jicong 已提交
2873
  for (int32_t i = 0; i < numOfCols; ++i) {
2874
    pSup->pCtx[i].saveHandle.pBuf = pAggSup->pResultBuf;
2875 2876
  }

2877
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
2878 2879
}

L
Liu Jicong 已提交
2880
void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows) {
wmmhello's avatar
wmmhello 已提交
2881
  ASSERT(numOfRows != 0);
2882 2883
  pResultInfo->capacity = numOfRows;
  pResultInfo->threshold = numOfRows * 0.75;
2884

2885 2886
  if (pResultInfo->threshold == 0) {
    pResultInfo->threshold = numOfRows;
2887 2888 2889
  }
}

2890 2891 2892 2893 2894
void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock) {
  pInfo->pRes = pBlock;
  initResultRowInfo(&pInfo->resultRowInfo);
}

5
54liuyao 已提交
2895
void* destroySqlFunctionCtx(SqlFunctionCtx* pCtx, int32_t numOfOutput) {
2896 2897 2898 2899 2900 2901 2902 2903 2904 2905
  if (pCtx == NULL) {
    return NULL;
  }

  for (int32_t i = 0; i < numOfOutput; ++i) {
    for (int32_t j = 0; j < pCtx[i].numOfParams; ++j) {
      taosVariantDestroy(&pCtx[i].param[j].param);
    }

    taosMemoryFreeClear(pCtx[i].subsidiaries.pCtx);
2906
    taosMemoryFreeClear(pCtx[i].subsidiaries.buf);
2907 2908 2909 2910 2911 2912 2913 2914
    taosMemoryFree(pCtx[i].input.pData);
    taosMemoryFree(pCtx[i].input.pColumnDataAgg);
  }

  taosMemoryFreeClear(pCtx);
  return NULL;
}

2915
int32_t initExprSupp(SExprSupp* pSup, SExprInfo* pExprInfo, int32_t numOfExpr) {
2916 2917 2918 2919
  pSup->pExprInfo = pExprInfo;
  pSup->numOfExprs = numOfExpr;
  if (pSup->pExprInfo != NULL) {
    pSup->pCtx = createSqlFunctionCtx(pExprInfo, numOfExpr, &pSup->rowEntryInfoOffset);
2920 2921 2922
    if (pSup->pCtx == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }
2923
  }
2924 2925

  return TSDB_CODE_SUCCESS;
2926 2927
}

2928 2929 2930 2931
void cleanupExprSupp(SExprSupp* pSupp) {
  destroySqlFunctionCtx(pSupp->pCtx, pSupp->numOfExprs);
  if (pSupp->pExprInfo != NULL) {
    destroyExprInfo(pSupp->pExprInfo, pSupp->numOfExprs);
C
Cary Xu 已提交
2932
    taosMemoryFreeClear(pSupp->pExprInfo);
2933
  }
H
Haojun Liao 已提交
2934 2935 2936 2937 2938 2939

  if (pSupp->pFilterInfo != NULL) {
    filterFreeInfo(pSupp->pFilterInfo);
    pSupp->pFilterInfo = NULL;
  }

2940 2941 2942
  taosMemoryFree(pSupp->rowEntryInfoOffset);
}

2943 2944
SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode* pAggNode,
                                           SExecTaskInfo* pTaskInfo) {
wafwerar's avatar
wafwerar 已提交
2945
  SAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SAggOperatorInfo));
L
Liu Jicong 已提交
2946
  SOperatorInfo*    pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
H
Haojun Liao 已提交
2947 2948 2949
  if (pInfo == NULL || pOperator == NULL) {
    goto _error;
  }
H
Haojun Liao 已提交
2950

H
Haojun Liao 已提交
2951 2952 2953 2954
  SSDataBlock* pResBlock = createResDataBlock(pAggNode->node.pOutputDataBlockDesc);
  initBasicInfo(&pInfo->binfo, pResBlock);

  size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES;
2955
  initResultSizeInfo(&pOperator->resultInfo, 4096);
H
Haojun Liao 已提交
2956

2957 2958 2959
  int32_t    num = 0;
  SExprInfo* pExprInfo = createExprInfo(pAggNode->pAggFuncs, pAggNode->pGroupKeys, &num);
  int32_t    code = initAggInfo(&pOperator->exprSupp, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str);
L
Liu Jicong 已提交
2960
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2961 2962
    goto _error;
  }
H
Haojun Liao 已提交
2963

H
Haojun Liao 已提交
2964 2965 2966 2967 2968 2969
  int32_t    numOfScalarExpr = 0;
  SExprInfo* pScalarExprInfo = NULL;
  if (pAggNode->pExprs != NULL) {
    pScalarExprInfo = createExprInfo(pAggNode->pExprs, NULL, &numOfScalarExpr);
  }

2970 2971 2972 2973
  code = initExprSupp(&pInfo->scalarExprSup, pScalarExprInfo, numOfScalarExpr);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
2974

H
Haojun Liao 已提交
2975 2976 2977 2978 2979
  code = filterInitFromNode((SNode*)pAggNode->node.pConditions, &pOperator->exprSupp.pFilterInfo, 0);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

H
Haojun Liao 已提交
2980
  pInfo->binfo.mergeResultBlock = pAggNode->mergeDataBlock;
2981
  pInfo->groupId = UINT64_MAX;
dengyihao's avatar
dengyihao 已提交
2982
  pOperator->name = "TableAggregate";
2983
  pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_HASH_AGG;
2984
  pOperator->blocking = true;
dengyihao's avatar
dengyihao 已提交
2985 2986 2987
  pOperator->status = OP_NOT_OPENED;
  pOperator->info = pInfo;
  pOperator->pTaskInfo = pTaskInfo;
H
Haojun Liao 已提交
2988

5
54liuyao 已提交
2989 2990
  pOperator->fpSet =
      createOperatorFpSet(doOpenAggregateOptr, getAggregateResult, NULL, NULL, destroyAggOperatorInfo, NULL);
H
Haojun Liao 已提交
2991

2992 2993 2994 2995 2996 2997
  if (downstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) {
    STableScanInfo* pTableScanInfo = downstream->info;
    pTableScanInfo->pdInfo.pExprSup = &pOperator->exprSupp;
    pTableScanInfo->pdInfo.pAggSup = &pInfo->aggSup;
  }

H
Haojun Liao 已提交
2998 2999 3000 3001
  code = appendDownstream(pOperator, &downstream, 1);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
3002 3003

  return pOperator;
H
Haojun Liao 已提交
3004

3005
_error:
H
Haojun Liao 已提交
3006 3007 3008 3009
  if (pInfo != NULL) {
    destroyAggOperatorInfo(pInfo);
  }

3010 3011 3012
  if (pOperator != NULL) {
    cleanupExprSupp(&pOperator->exprSupp);
  }
H
Haojun Liao 已提交
3013

3014
  taosMemoryFreeClear(pOperator);
H
Haojun Liao 已提交
3015
  pTaskInfo->code = code;
H
Haojun Liao 已提交
3016
  return NULL;
3017 3018
}

3019
void cleanupBasicInfo(SOptrBasicInfo* pInfo) {
3020
  assert(pInfo != NULL);
H
Haojun Liao 已提交
3021
  pInfo->pRes = blockDataDestroy(pInfo->pRes);
3022 3023
}

H
Haojun Liao 已提交
3024 3025 3026 3027 3028 3029 3030
static void freeItem(void* pItem) {
  void** p = pItem;
  if (*p != NULL) {
    taosMemoryFreeClear(*p);
  }
}

3031
void destroyAggOperatorInfo(void* param) {
L
Liu Jicong 已提交
3032
  SAggOperatorInfo* pInfo = (SAggOperatorInfo*)param;
L
Liu Jicong 已提交
3033 3034
  cleanupBasicInfo(&pInfo->binfo);

H
Haojun Liao 已提交
3035
  cleanupAggSup(&pInfo->aggSup);
S
shenglian zhou 已提交
3036
  cleanupExprSupp(&pInfo->scalarExprSup);
H
Haojun Liao 已提交
3037
  cleanupGroupResInfo(&pInfo->groupResInfo);
D
dapan1121 已提交
3038
  taosMemoryFreeClear(param);
3039
}
3040

3041
void destroyFillOperatorInfo(void* param) {
L
Liu Jicong 已提交
3042
  SFillOperatorInfo* pInfo = (SFillOperatorInfo*)param;
3043
  pInfo->pFillInfo = taosDestroyFillInfo(pInfo->pFillInfo);
H
Haojun Liao 已提交
3044
  pInfo->pRes = blockDataDestroy(pInfo->pRes);
H
Haojun Liao 已提交
3045 3046
  pInfo->pFinalRes = blockDataDestroy(pInfo->pFinalRes);

3047
  cleanupExprSupp(&pInfo->noFillExprSupp);
H
Haojun Liao 已提交
3048

wafwerar's avatar
wafwerar 已提交
3049
  taosMemoryFreeClear(pInfo->p);
H
Haojun Liao 已提交
3050
  taosArrayDestroy(pInfo->matchInfo.pList);
D
dapan1121 已提交
3051
  taosMemoryFreeClear(param);
3052 3053
}

3054
void destroyExchangeOperatorInfo(void* param) {
L
Liu Jicong 已提交
3055
  SExchangeInfo* pExInfo = (SExchangeInfo*)param;
3056 3057 3058
  taosRemoveRef(exchangeObjRefPool, pExInfo->self);
}

L
Liu Jicong 已提交
3059
void freeSourceDataInfo(void* p) {
3060 3061 3062 3063
  SSourceDataInfo* pInfo = (SSourceDataInfo*)p;
  taosMemoryFreeClear(pInfo->pRsp);
}

3064
void doDestroyExchangeOperatorInfo(void* param) {
X
Xiaoyu Wang 已提交
3065
  SExchangeInfo* pExInfo = (SExchangeInfo*)param;
3066

H
Haojun Liao 已提交
3067
  taosArrayDestroy(pExInfo->pSources);
3068
  taosArrayDestroyEx(pExInfo->pSourceDataInfo, freeSourceDataInfo);
3069 3070 3071 3072

  if (pExInfo->pResultBlockList != NULL) {
    taosArrayDestroyEx(pExInfo->pResultBlockList, freeBlock);
    pExInfo->pResultBlockList = NULL;
H
Haojun Liao 已提交
3073 3074
  }

3075
  blockDataDestroy(pExInfo->pDummyBlock);
L
Liu Jicong 已提交
3076

3077
  tsem_destroy(&pExInfo->ready);
D
dapan1121 已提交
3078
  taosMemoryFreeClear(param);
H
Haojun Liao 已提交
3079 3080
}

H
Haojun Liao 已提交
3081 3082 3083 3084
static int32_t initFillInfo(SFillOperatorInfo* pInfo, SExprInfo* pExpr, int32_t numOfCols, SExprInfo* pNotFillExpr,
                            int32_t numOfNotFillCols, SNodeListNode* pValNode, STimeWindow win, int32_t capacity,
                            const char* id, SInterval* pInterval, int32_t fillType, int32_t order) {
  SFillColInfo* pColInfo = createFillColInfo(pExpr, numOfCols, pNotFillExpr, numOfNotFillCols, pValNode);
H
Haojun Liao 已提交
3085

H
Haojun Liao 已提交
3086 3087 3088
  int64_t     startKey = (order == TSDB_ORDER_ASC) ? win.skey : win.ekey;
  STimeWindow w = getAlignQueryTimeWindow(pInterval, pInterval->precision, startKey);
  w = getFirstQualifiedTimeWindow(startKey, &w, pInterval, order);
H
Haojun Liao 已提交
3089

L
Liu Jicong 已提交
3090 3091
  pInfo->pFillInfo = taosCreateFillInfo(w.skey, numOfCols, numOfNotFillCols, capacity, pInterval, fillType, pColInfo,
                                        pInfo->primaryTsCol, order, id);
H
Haojun Liao 已提交
3092

H
Haojun Liao 已提交
3093 3094 3095 3096 3097 3098 3099
  if (order == TSDB_ORDER_ASC) {
    pInfo->win.skey = win.skey;
    pInfo->win.ekey = win.ekey;
  } else {
    pInfo->win.skey = win.ekey;
    pInfo->win.ekey = win.skey;
  }
L
Liu Jicong 已提交
3100
  pInfo->p = taosMemoryCalloc(numOfCols, POINTER_BYTES);
3101

H
Haojun Liao 已提交
3102
  if (pInfo->pFillInfo == NULL || pInfo->p == NULL) {
H
Haojun Liao 已提交
3103 3104
    taosMemoryFree(pInfo->pFillInfo);
    taosMemoryFree(pInfo->p);
H
Haojun Liao 已提交
3105 3106 3107 3108 3109 3110
    return TSDB_CODE_OUT_OF_MEMORY;
  } else {
    return TSDB_CODE_SUCCESS;
  }
}

3111
static bool isWstartColumnExist(SFillOperatorInfo* pInfo) {
3112
  if (pInfo->noFillExprSupp.numOfExprs == 0) {
3113 3114
    return false;
  }
3115 3116 3117

  for (int32_t i = 0; i < pInfo->noFillExprSupp.numOfExprs; ++i) {
    SExprInfo* exprInfo = pInfo->noFillExprSupp.pExprInfo + i;
3118
    if (exprInfo->pExpr->nodeType == QUERY_NODE_COLUMN && exprInfo->base.numOfParams == 1 &&
3119 3120 3121 3122 3123 3124 3125
        exprInfo->base.pParam[0].pCol->colType == COLUMN_TYPE_WINDOW_START) {
      return true;
    }
  }
  return false;
}

3126 3127
static int32_t createPrimaryTsExprIfNeeded(SFillOperatorInfo* pInfo, SFillPhysiNode* pPhyFillNode, SExprSupp* pExprSupp,
                                           const char* idStr) {
3128
  bool wstartExist = isWstartColumnExist(pInfo);
3129

3130 3131
  if (wstartExist == false) {
    if (pPhyFillNode->pWStartTs->type != QUERY_NODE_TARGET) {
3132
      qError("pWStartTs of fill physical node is not a target node, %s", idStr);
3133 3134 3135
      return TSDB_CODE_QRY_SYS_ERROR;
    }

3136 3137
    SExprInfo* pExpr = taosMemoryRealloc(pExprSupp->pExprInfo, (pExprSupp->numOfExprs + 1) * sizeof(SExprInfo));
    if (pExpr == NULL) {
3138 3139 3140
      return TSDB_CODE_OUT_OF_MEMORY;
    }

3141 3142 3143
    createExprFromTargetNode(&pExpr[pExprSupp->numOfExprs], (STargetNode*)pPhyFillNode->pWStartTs);
    pExprSupp->numOfExprs += 1;
    pExprSupp->pExprInfo = pExpr;
3144
  }
3145

3146 3147 3148
  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
3149 3150
SOperatorInfo* createFillOperatorInfo(SOperatorInfo* downstream, SFillPhysiNode* pPhyFillNode,
                                      SExecTaskInfo* pTaskInfo) {
3151 3152 3153 3154 3155 3156
  SFillOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SFillOperatorInfo));
  SOperatorInfo*     pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
  if (pInfo == NULL || pOperator == NULL) {
    goto _error;
  }

H
Haojun Liao 已提交
3157 3158 3159 3160
  pInfo->pRes = createResDataBlock(pPhyFillNode->node.pOutputDataBlockDesc);
  SExprInfo* pExprInfo = createExprInfo(pPhyFillNode->pFillExprs, NULL, &pInfo->numOfExpr);
  pOperator->exprSupp.pExprInfo = pExprInfo;

3161 3162 3163 3164 3165 3166 3167 3168
  SExprSupp* pNoFillSupp = &pInfo->noFillExprSupp;
  pNoFillSupp->pExprInfo = createExprInfo(pPhyFillNode->pNotFillExprs, NULL, &pNoFillSupp->numOfExprs);
  int32_t code = createPrimaryTsExprIfNeeded(pInfo, pPhyFillNode, pNoFillSupp, pTaskInfo->id.str);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

  code = initExprSupp(pNoFillSupp, pNoFillSupp->pExprInfo, pNoFillSupp->numOfExprs);
3169 3170 3171
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
H
Haojun Liao 已提交
3172

L
Liu Jicong 已提交
3173
  SInterval* pInterval =
3174
      QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == downstream->operatorType
3175 3176
          ? &((SMergeAlignedIntervalAggOperatorInfo*)downstream->info)->intervalAggOperatorInfo->interval
          : &((SIntervalAggOperatorInfo*)downstream->info)->interval;
3177

3178
  int32_t order = (pPhyFillNode->inputTsOrder == ORDER_ASC) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
3179
  int32_t type = convertFillType(pPhyFillNode->mode);
3180

H
Haojun Liao 已提交
3181
  SResultInfo* pResultInfo = &pOperator->resultInfo;
H
Haojun Liao 已提交
3182

3183
  initResultSizeInfo(&pOperator->resultInfo, 4096);
H
Haojun Liao 已提交
3184 3185 3186 3187 3188
  blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity);
  code = initExprSupp(&pOperator->exprSupp, pExprInfo, pInfo->numOfExpr);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
H
Haojun Liao 已提交
3189

H
Haojun Liao 已提交
3190 3191
  pInfo->primaryTsCol = ((STargetNode*)pPhyFillNode->pWStartTs)->slotId;
  pInfo->primarySrcSlotId = ((SColumnNode*)((STargetNode*)pPhyFillNode->pWStartTs)->pExpr)->slotId;
3192

3193
  int32_t numOfOutputCols = 0;
H
Haojun Liao 已提交
3194 3195
  code = extractColMatchInfo(pPhyFillNode->pFillExprs, pPhyFillNode->node.pOutputDataBlockDesc, &numOfOutputCols,
                             COL_MATCH_FROM_SLOT_ID, &pInfo->matchInfo);
3196

3197
  code = initFillInfo(pInfo, pExprInfo, pInfo->numOfExpr, pNoFillSupp->pExprInfo, pNoFillSupp->numOfExprs,
3198 3199
                      (SNodeListNode*)pPhyFillNode->pValues, pPhyFillNode->timeRange, pResultInfo->capacity,
                      pTaskInfo->id.str, pInterval, type, order);
3200 3201 3202
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
3203

H
Haojun Liao 已提交
3204
  pInfo->pFinalRes = createOneDataBlock(pInfo->pRes, false);
H
Haojun Liao 已提交
3205 3206
  blockDataEnsureCapacity(pInfo->pFinalRes, pOperator->resultInfo.capacity);

H
Haojun Liao 已提交
3207 3208 3209 3210 3211
  code = filterInitFromNode((SNode*)pPhyFillNode->node.pConditions, &pOperator->exprSupp.pFilterInfo, 0);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

3212 3213 3214 3215
  pOperator->name = "FillOperator";
  pOperator->blocking = false;
  pOperator->status = OP_NOT_OPENED;
  pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_FILL;
3216
  pOperator->exprSupp.numOfExprs = pInfo->numOfExpr;
3217 3218
  pOperator->info = pInfo;
  pOperator->pTaskInfo = pTaskInfo;
H
Haojun Liao 已提交
3219

5
54liuyao 已提交
3220
  pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doFill, NULL, NULL, destroyFillOperatorInfo, NULL);
3221

3222
  code = appendDownstream(pOperator, &downstream, 1);
3223
  return pOperator;
H
Haojun Liao 已提交
3224

3225
_error:
H
Haojun Liao 已提交
3226 3227 3228 3229
  if (pInfo != NULL) {
    destroyFillOperatorInfo(pInfo);
  }

3230
  pTaskInfo->code = code;
wafwerar's avatar
wafwerar 已提交
3231
  taosMemoryFreeClear(pOperator);
H
Haojun Liao 已提交
3232
  return NULL;
3233 3234
}

D
dapan1121 已提交
3235
static SExecTaskInfo* createExecTaskInfo(uint64_t queryId, uint64_t taskId, EOPTR_EXEC_MODEL model, char* dbFName) {
wafwerar's avatar
wafwerar 已提交
3236
  SExecTaskInfo* pTaskInfo = taosMemoryCalloc(1, sizeof(SExecTaskInfo));
H
Haojun Liao 已提交
3237 3238 3239 3240 3241
  if (pTaskInfo == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

3242
  setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED);
H
Haojun Liao 已提交
3243

3244
  pTaskInfo->schemaInfo.dbname = strdup(dbFName);
3245
  pTaskInfo->cost.created = taosGetTimestampMs();
H
Haojun Liao 已提交
3246
  pTaskInfo->id.queryId = queryId;
dengyihao's avatar
dengyihao 已提交
3247
  pTaskInfo->execModel = model;
H
Haojun Liao 已提交
3248
  pTaskInfo->pTableInfoList = tableListCreate();
H
Haojun Liao 已提交
3249

wafwerar's avatar
wafwerar 已提交
3250
  char* p = taosMemoryCalloc(1, 128);
L
Liu Jicong 已提交
3251
  snprintf(p, 128, "TID:0x%" PRIx64 " QID:0x%" PRIx64, taskId, queryId);
H
Haojun Liao 已提交
3252
  pTaskInfo->id.str = p;
H
Haojun Liao 已提交
3253

3254 3255
  return pTaskInfo;
}
H
Haojun Liao 已提交
3256

H
Haojun Liao 已提交
3257 3258
SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode);

3259
int32_t extractTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, SExecTaskInfo* pTaskInfo) {
3260 3261
  SMetaReader mr = {0};
  metaReaderInit(&mr, pHandle->meta, 0);
3262
  int32_t code = metaGetTableEntryByUid(&mr, pScanNode->uid);
3263
  if (code != TSDB_CODE_SUCCESS) {
L
Liu Jicong 已提交
3264 3265
    qError("failed to get the table meta, uid:0x%" PRIx64 ", suid:0x%" PRIx64 ", %s", pScanNode->uid, pScanNode->suid,
           GET_TASKID(pTaskInfo));
H
Haojun Liao 已提交
3266

D
dapan1121 已提交
3267
    metaReaderClear(&mr);
3268
    return terrno;
D
dapan1121 已提交
3269
  }
3270

3271 3272
  SSchemaInfo* pSchemaInfo = &pTaskInfo->schemaInfo;
  pSchemaInfo->tablename = strdup(mr.me.name);
3273 3274

  if (mr.me.type == TSDB_SUPER_TABLE) {
3275 3276
    pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.stbEntry.schemaRow);
    pSchemaInfo->tversion = mr.me.stbEntry.schemaTag.version;
3277
  } else if (mr.me.type == TSDB_CHILD_TABLE) {
3278 3279
    tDecoderClear(&mr.coder);

3280 3281
    tb_uid_t suid = mr.me.ctbEntry.suid;
    metaGetTableEntryByUid(&mr, suid);
3282 3283
    pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.stbEntry.schemaRow);
    pSchemaInfo->tversion = mr.me.stbEntry.schemaTag.version;
3284
  } else {
3285
    pSchemaInfo->sw = tCloneSSchemaWrapper(&mr.me.ntbEntry.schemaRow);
3286
  }
3287 3288

  metaReaderClear(&mr);
3289

H
Haojun Liao 已提交
3290 3291 3292 3293 3294
  pSchemaInfo->qsw = extractQueriedColumnSchema(pScanNode);
  return TSDB_CODE_SUCCESS;
}

SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode) {
3295 3296 3297
  int32_t numOfCols = LIST_LENGTH(pScanNode->pScanCols);
  int32_t numOfTags = LIST_LENGTH(pScanNode->pScanPseudoCols);

3298
  SSchemaWrapper* pqSw = taosMemoryCalloc(1, sizeof(SSchemaWrapper));
3299
  pqSw->pSchema = taosMemoryCalloc(numOfCols + numOfTags, sizeof(SSchema));
3300

L
Liu Jicong 已提交
3301
  for (int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
3302
    STargetNode* pNode = (STargetNode*)nodesListGetNode(pScanNode->pScanCols, i);
3303 3304
    SColumnNode* pColNode = (SColumnNode*)pNode->pExpr;

H
Haojun Liao 已提交
3305 3306 3307
    SSchema* pSchema = &pqSw->pSchema[pqSw->nCols++];
    pSchema->colId = pColNode->colId;
    pSchema->type = pColNode->node.resType.type;
H
Haojun Liao 已提交
3308 3309
    pSchema->bytes = pColNode->node.resType.bytes;
    tstrncpy(pSchema->name, pColNode->colName, tListLen(pSchema->name));
3310 3311
  }

3312
  // this the tags and pseudo function columns, we only keep the tag columns
3313
  for (int32_t i = 0; i < numOfTags; ++i) {
3314 3315 3316 3317 3318 3319 3320 3321 3322
    STargetNode* pNode = (STargetNode*)nodesListGetNode(pScanNode->pScanPseudoCols, i);

    int32_t type = nodeType(pNode->pExpr);
    if (type == QUERY_NODE_COLUMN) {
      SColumnNode* pColNode = (SColumnNode*)pNode->pExpr;

      SSchema* pSchema = &pqSw->pSchema[pqSw->nCols++];
      pSchema->colId = pColNode->colId;
      pSchema->type = pColNode->node.resType.type;
H
Haojun Liao 已提交
3323
      pSchema->bytes = pColNode->node.resType.bytes;
H
Haojun Liao 已提交
3324
      tstrncpy(pSchema->name, pColNode->colName, tListLen(pSchema->name));
3325 3326 3327
    }
  }

H
Haojun Liao 已提交
3328
  return pqSw;
3329 3330
}

3331 3332
static void cleanupTableSchemaInfo(SSchemaInfo* pSchemaInfo) {
  taosMemoryFreeClear(pSchemaInfo->dbname);
3333
  taosMemoryFreeClear(pSchemaInfo->tablename);
3334 3335
  tDeleteSSchemaWrapper(pSchemaInfo->sw);
  tDeleteSSchemaWrapper(pSchemaInfo->qsw);
3336 3337
}

3338
static void cleanupStreamInfo(SStreamTaskInfo* pStreamInfo) { tDeleteSSchemaWrapper(pStreamInfo->schema); }
3339

3340
bool groupbyTbname(SNodeList* pGroupList) {
3341
  bool bytbname = false;
3342
  if (LIST_LENGTH(pGroupList) == 1) {
3343 3344 3345 3346 3347 3348 3349 3350 3351 3352
    SNode* p = nodesListGetNode(pGroupList, 0);
    if (p->type == QUERY_NODE_FUNCTION) {
      // partition by tbname/group by tbname
      bytbname = (strcmp(((struct SFunctionNode*)p)->functionName, "tbname") == 0);
    }
  }

  return bytbname;
}

3353 3354
SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, SNode* pTagCond,
                                  SNode* pTagIndexCond, const char* pUser) {
3355
  int32_t         type = nodeType(pPhyNode);
3356
  STableListInfo* pTableListInfo = pTaskInfo->pTableInfoList;
3357
  const char*     idstr = GET_TASKID(pTaskInfo);
3358

X
Xiaoyu Wang 已提交
3359
  if (pPhyNode->pChildren == NULL || LIST_LENGTH(pPhyNode->pChildren) == 0) {
3360
    SOperatorInfo* pOperator = NULL;
H
Haojun Liao 已提交
3361
    if (QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN == type) {
dengyihao's avatar
dengyihao 已提交
3362
      STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode;
H
Haojun Liao 已提交
3363

3364 3365 3366 3367 3368 3369
      // NOTE: this is an patch to fix the physical plan
      // TODO remove it later
      if (pTableScanNode->scan.node.pLimit != NULL) {
        pTableScanNode->groupSort = true;
      }

L
Liu Jicong 已提交
3370 3371
      int32_t code =
          createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort, pHandle,
H
Haojun Liao 已提交
3372
                                  pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo);
3373
      if (code) {
wmmhello's avatar
wmmhello 已提交
3374
        pTaskInfo->code = code;
3375
        qError("failed to createScanTableListInfo, code:%s, %s", tstrerror(code), idstr);
D
dapan1121 已提交
3376 3377
        return NULL;
      }
wmmhello's avatar
wmmhello 已提交
3378

3379
      code = extractTableSchemaInfo(pHandle, &pTableScanNode->scan, pTaskInfo);
S
slzhou 已提交
3380
      if (code) {
3381
        pTaskInfo->code = terrno;
wmmhello's avatar
wmmhello 已提交
3382 3383 3384
        return NULL;
      }

3385
      pOperator = createTableScanOperatorInfo(pTableScanNode, pHandle, pTaskInfo);
D
dapan1121 已提交
3386 3387 3388 3389 3390
      if (NULL == pOperator) {
        pTaskInfo->code = terrno;
        return NULL;
      }

3391 3392
      STableScanInfo* pScanInfo = pOperator->info;
      pTaskInfo->cost.pRecoder = &pScanInfo->readRecorder;
S
slzhou 已提交
3393 3394
    } else if (QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN == type) {
      STableMergeScanPhysiNode* pTableScanNode = (STableMergeScanPhysiNode*)pPhyNode;
H
Haojun Liao 已提交
3395 3396 3397

      int32_t code = createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, true, pHandle,
                                             pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo);
L
Liu Jicong 已提交
3398
      if (code) {
wmmhello's avatar
wmmhello 已提交
3399
        pTaskInfo->code = code;
H
Haojun Liao 已提交
3400
        qError("failed to createScanTableListInfo, code: %s", tstrerror(code));
wmmhello's avatar
wmmhello 已提交
3401 3402
        return NULL;
      }
3403

3404
      code = extractTableSchemaInfo(pHandle, &pTableScanNode->scan, pTaskInfo);
wmmhello's avatar
wmmhello 已提交
3405 3406 3407 3408
      if (code) {
        pTaskInfo->code = terrno;
        return NULL;
      }
wmmhello's avatar
wmmhello 已提交
3409

3410
      pOperator = createTableMergeScanOperatorInfo(pTableScanNode, pTableListInfo, pHandle, pTaskInfo);
D
dapan1121 已提交
3411 3412 3413 3414
      if (NULL == pOperator) {
        pTaskInfo->code = terrno;
        return NULL;
      }
wmmhello's avatar
wmmhello 已提交
3415

3416 3417
      STableScanInfo* pScanInfo = pOperator->info;
      pTaskInfo->cost.pRecoder = &pScanInfo->readRecorder;
H
Haojun Liao 已提交
3418
    } else if (QUERY_NODE_PHYSICAL_PLAN_EXCHANGE == type) {
3419 3420
      pOperator = createExchangeOperatorInfo(pHandle ? pHandle->pMsgCb->clientRpc : NULL, (SExchangePhysiNode*)pPhyNode,
                                             pTaskInfo);
H
Haojun Liao 已提交
3421
    } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN == type) {
5
54liuyao 已提交
3422
      STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode;
5
54liuyao 已提交
3423
      if (pHandle->vnode) {
L
Liu Jicong 已提交
3424 3425
        int32_t code =
            createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort,
H
Haojun Liao 已提交
3426
                                    pHandle, pTableListInfo, pTagCond, pTagIndexCond, pTaskInfo);
L
Liu Jicong 已提交
3427
        if (code) {
wmmhello's avatar
wmmhello 已提交
3428
          pTaskInfo->code = code;
H
Haojun Liao 已提交
3429
          qError("failed to createScanTableListInfo, code: %s", tstrerror(code));
wmmhello's avatar
wmmhello 已提交
3430 3431
          return NULL;
        }
L
Liu Jicong 已提交
3432 3433

#ifndef NDEBUG
H
Haojun Liao 已提交
3434
        int32_t sz = tableListGetSize(pTableListInfo);
H
Haojun Liao 已提交
3435 3436
        qDebug("create stream task, total:%d", sz);

L
Liu Jicong 已提交
3437
        for (int32_t i = 0; i < sz; i++) {
H
Haojun Liao 已提交
3438
          STableKeyInfo* pKeyInfo = tableListGetInfo(pTableListInfo, i);
3439
          qDebug("add table uid:%" PRIu64 ", gid:%" PRIu64, pKeyInfo->uid, pKeyInfo->groupId);
L
Liu Jicong 已提交
3440 3441
        }
#endif
3442
      }
3443

H
Haojun Liao 已提交
3444
      pTaskInfo->schemaInfo.qsw = extractQueriedColumnSchema(&pTableScanNode->scan);
3445
      pOperator = createStreamScanOperatorInfo(pHandle, pTableScanNode, pTagCond, pTaskInfo);
H
Haojun Liao 已提交
3446
    } else if (QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN == type) {
L
Liu Jicong 已提交
3447
      SSystemTableScanPhysiNode* pSysScanPhyNode = (SSystemTableScanPhysiNode*)pPhyNode;
3448
      pOperator = createSysTableScanOperatorInfo(pHandle, pSysScanPhyNode, pUser, pTaskInfo);
3449
    } else if (QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN == type) {
X
Xiaoyu Wang 已提交
3450
      STagScanPhysiNode* pScanPhyNode = (STagScanPhysiNode*)pPhyNode;
3451 3452

      int32_t code = createScanTableListInfo(pScanPhyNode, NULL, false, pHandle, pTableListInfo, pTagCond,
H
Haojun Liao 已提交
3453
                                             pTagIndexCond, pTaskInfo);
3454
      if (code != TSDB_CODE_SUCCESS) {
3455
        pTaskInfo->code = code;
H
Haojun Liao 已提交
3456
        qError("failed to getTableList, code: %s", tstrerror(code));
3457 3458 3459
        return NULL;
      }

3460
      pOperator = createTagScanOperatorInfo(pHandle, pScanPhyNode, pTableListInfo, pTaskInfo);
3461
    } else if (QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN == type) {
3462
      SBlockDistScanPhysiNode* pBlockNode = (SBlockDistScanPhysiNode*)pPhyNode;
3463 3464

      if (pBlockNode->tableType == TSDB_SUPER_TABLE) {
H
Haojun Liao 已提交
3465 3466
        SArray* pList = taosArrayInit(4, sizeof(STableKeyInfo));
        int32_t code = vnodeGetAllTableList(pHandle->vnode, pBlockNode->uid, pList);
3467 3468 3469 3470
        if (code != TSDB_CODE_SUCCESS) {
          pTaskInfo->code = terrno;
          return NULL;
        }
H
Haojun Liao 已提交
3471

3472
        for (int32_t i = 0; i < tableListGetSize(pTableListInfo); ++i) {
H
Haojun Liao 已提交
3473
          STableKeyInfo* p = taosArrayGet(pList, i);
H
Haojun Liao 已提交
3474
          tableListAddTableInfo(pTableListInfo, p->uid, 0);
H
Haojun Liao 已提交
3475 3476
        }
        taosArrayDestroy(pList);
3477
      } else {  // Create group with only one table
H
Haojun Liao 已提交
3478
        tableListAddTableInfo(pTableListInfo, pBlockNode->uid, 0);
3479 3480
      }

3481
      pOperator = createDataBlockInfoScanOperator(pHandle, pBlockNode, pTaskInfo);
H
Haojun Liao 已提交
3482 3483 3484
    } else if (QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN == type) {
      SLastRowScanPhysiNode* pScanNode = (SLastRowScanPhysiNode*)pPhyNode;

L
Liu Jicong 已提交
3485
      int32_t code = createScanTableListInfo(&pScanNode->scan, pScanNode->pGroupTags, true, pHandle, pTableListInfo,
H
Haojun Liao 已提交
3486
                                             pTagCond, pTagIndexCond, pTaskInfo);
3487 3488 3489 3490
      if (code != TSDB_CODE_SUCCESS) {
        pTaskInfo->code = code;
        return NULL;
      }
3491

3492
      code = extractTableSchemaInfo(pHandle, &pScanNode->scan, pTaskInfo);
3493 3494 3495
      if (code != TSDB_CODE_SUCCESS) {
        pTaskInfo->code = code;
        return NULL;
H
Haojun Liao 已提交
3496 3497
      }

3498
      pOperator = createCacherowsScanOperator(pScanNode, pHandle, pTaskInfo);
3499
    } else if (QUERY_NODE_PHYSICAL_PLAN_PROJECT == type) {
3500
      pOperator = createProjectOperatorInfo(NULL, (SProjectPhysiNode*)pPhyNode, pTaskInfo);
H
Haojun Liao 已提交
3501 3502
    } else {
      ASSERT(0);
H
Haojun Liao 已提交
3503
    }
3504 3505 3506 3507 3508

    if (pOperator != NULL) {
      pOperator->resultDataBlockId = pPhyNode->pOutputDataBlockDesc->dataBlockId;
    }

3509
    return pOperator;
H
Haojun Liao 已提交
3510 3511
  }

3512
  size_t          size = LIST_LENGTH(pPhyNode->pChildren);
3513
  SOperatorInfo** ops = taosMemoryCalloc(size, POINTER_BYTES);
3514 3515 3516 3517
  if (ops == NULL) {
    return NULL;
  }

dengyihao's avatar
dengyihao 已提交
3518
  for (int32_t i = 0; i < size; ++i) {
3519
    SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, i);
3520
    ops[i] = createOperatorTree(pChildNode, pTaskInfo, pHandle, pTagCond, pTagIndexCond, pUser);
3521
    if (ops[i] == NULL) {
H
Haojun Liao 已提交
3522
      taosMemoryFree(ops);
3523 3524
      return NULL;
    }
3525
  }
H
Haojun Liao 已提交
3526

3527
  SOperatorInfo* pOptr = NULL;
H
Haojun Liao 已提交
3528
  if (QUERY_NODE_PHYSICAL_PLAN_PROJECT == type) {
3529
    pOptr = createProjectOperatorInfo(ops[0], (SProjectPhysiNode*)pPhyNode, pTaskInfo);
3530
  } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_AGG == type) {
H
Haojun Liao 已提交
3531 3532
    SAggPhysiNode* pAggNode = (SAggPhysiNode*)pPhyNode;
    if (pAggNode->pGroupKeys != NULL) {
H
Haojun Liao 已提交
3533
      pOptr = createGroupOperatorInfo(ops[0], pAggNode, pTaskInfo);
H
Haojun Liao 已提交
3534
    } else {
H
Haojun Liao 已提交
3535
      pOptr = createAggregateOperatorInfo(ops[0], pAggNode, pTaskInfo);
H
Haojun Liao 已提交
3536
    }
3537
  } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL == type) {
H
Haojun Liao 已提交
3538
    SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode;
H
Haojun Liao 已提交
3539

H
Haojun Liao 已提交
3540 3541
    bool isStream = (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type);
    pOptr = createIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo, isStream);
3542
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) {
3543
    pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo);
3544 3545
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == type) {
    SMergeAlignedIntervalPhysiNode* pIntervalPhyNode = (SMergeAlignedIntervalPhysiNode*)pPhyNode;
3546
    pOptr = createMergeAlignedIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo);
S
shenglian zhou 已提交
3547
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL == type) {
X
Xiaoyu Wang 已提交
3548
    SMergeIntervalPhysiNode* pIntervalPhyNode = (SMergeIntervalPhysiNode*)pPhyNode;
3549
    pOptr = createMergeIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo);
5
54liuyao 已提交
3550
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL == type) {
3551
    int32_t children = 0;
5
54liuyao 已提交
3552 3553
    pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL == type) {
5
54liuyao 已提交
3554
    int32_t children = pHandle->numOfVgroups;
5
54liuyao 已提交
3555
    pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
H
Haojun Liao 已提交
3556
  } else if (QUERY_NODE_PHYSICAL_PLAN_SORT == type) {
3557
    pOptr = createSortOperatorInfo(ops[0], (SSortPhysiNode*)pPhyNode, pTaskInfo);
S
shenglian zhou 已提交
3558 3559
  } else if (QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT == type) {
    pOptr = createGroupSortOperatorInfo(ops[0], (SGroupSortPhysiNode*)pPhyNode, pTaskInfo);
X
Xiaoyu Wang 已提交
3560
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE == type) {
3561
    SMergePhysiNode* pMergePhyNode = (SMergePhysiNode*)pPhyNode;
3562
    pOptr = createMultiwayMergeOperatorInfo(ops, size, pMergePhyNode, pTaskInfo);
3563
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION == type) {
H
Haojun Liao 已提交
3564
    SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode;
H
Haojun Liao 已提交
3565
    pOptr = createSessionAggOperatorInfo(ops[0], pSessionNode, pTaskInfo);
3566
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION == type) {
3567 3568 3569 3570 3571
    pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo);
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION == type) {
    int32_t children = 0;
    pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION == type) {
3572
    int32_t children = pHandle->numOfVgroups;
3573
    pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
H
Haojun Liao 已提交
3574
  } else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) {
3575
    pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo);
3576
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION == type) {
3577
    pOptr = createStreamPartitionOperatorInfo(ops[0], (SStreamPartitionPhysiNode*)pPhyNode, pTaskInfo);
3578
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE == type) {
dengyihao's avatar
dengyihao 已提交
3579
    SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode;
3580
    pOptr = createStatewindowOperatorInfo(ops[0], pStateNode, pTaskInfo);
3581
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) {
5
54liuyao 已提交
3582
    pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo);
3583
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) {
3584
    pOptr = createMergeJoinOperatorInfo(ops, size, (SSortMergeJoinPhysiNode*)pPhyNode, pTaskInfo);
3585
  } else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) {
H
Haojun Liao 已提交
3586
    pOptr = createFillOperatorInfo(ops[0], (SFillPhysiNode*)pPhyNode, pTaskInfo);
5
54liuyao 已提交
3587 3588
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FILL == type) {
    pOptr = createStreamFillOperatorInfo(ops[0], (SStreamFillPhysiNode*)pPhyNode, pTaskInfo);
H
Haojun Liao 已提交
3589 3590
  } else if (QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC == type) {
    pOptr = createIndefinitOutputOperatorInfo(ops[0], pPhyNode, pTaskInfo);
3591 3592
  } else if (QUERY_NODE_PHYSICAL_PLAN_INTERP_FUNC == type) {
    pOptr = createTimeSliceOperatorInfo(ops[0], pPhyNode, pTaskInfo);
H
Haojun Liao 已提交
3593 3594
  } else {
    ASSERT(0);
H
Haojun Liao 已提交
3595
  }
3596

3597
  taosMemoryFree(ops);
3598 3599 3600 3601
  if (pOptr) {
    pOptr->resultDataBlockId = pPhyNode->pOutputDataBlockDesc->dataBlockId;
  }

3602
  return pOptr;
3603
}
H
Haojun Liao 已提交
3604

L
Liu Jicong 已提交
3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617
static int32_t extractTbscanInStreamOpTree(SOperatorInfo* pOperator, STableScanInfo** ppInfo) {
  if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
    if (pOperator->numOfDownstream == 0) {
      qError("failed to find stream scan operator");
      return TSDB_CODE_QRY_APP_ERROR;
    }

    if (pOperator->numOfDownstream > 1) {
      qError("join not supported for stream block scan");
      return TSDB_CODE_QRY_APP_ERROR;
    }
    return extractTbscanInStreamOpTree(pOperator->pDownstream[0], ppInfo);
  } else {
3618 3619 3620
    SStreamScanInfo* pInfo = pOperator->info;
    ASSERT(pInfo->pTableScanOp->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN);
    *ppInfo = pInfo->pTableScanOp->info;
L
Liu Jicong 已提交
3621 3622 3623 3624
    return 0;
  }
}

3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646
int32_t extractTableScanNode(SPhysiNode* pNode, STableScanPhysiNode** ppNode) {
  if (pNode->pChildren == NULL || LIST_LENGTH(pNode->pChildren) == 0) {
    if (QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN == pNode->type) {
      *ppNode = (STableScanPhysiNode*)pNode;
      return 0;
    } else {
      ASSERT(0);
      terrno = TSDB_CODE_QRY_APP_ERROR;
      return -1;
    }
  } else {
    if (LIST_LENGTH(pNode->pChildren) != 1) {
      ASSERT(0);
      terrno = TSDB_CODE_QRY_APP_ERROR;
      return -1;
    }
    SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pNode->pChildren, 0);
    return extractTableScanNode(pChildNode, ppNode);
  }
  return -1;
}

3647
#if 0
L
Liu Jicong 已提交
3648 3649 3650 3651 3652
int32_t rebuildReader(SOperatorInfo* pOperator, SSubplan* plan, SReadHandle* pHandle, int64_t uid, int64_t ts) {
  STableScanInfo* pTableScanInfo = NULL;
  if (extractTbscanInStreamOpTree(pOperator, &pTableScanInfo) < 0) {
    return -1;
  }
3653

L
Liu Jicong 已提交
3654 3655 3656 3657
  STableScanPhysiNode* pNode = NULL;
  if (extractTableScanNode(plan->pNode, &pNode) < 0) {
    ASSERT(0);
  }
3658

H
Haojun Liao 已提交
3659
  tsdbReaderClose(pTableScanInfo->dataReader);
3660

L
Liu Jicong 已提交
3661
  STableListInfo info = {0};
H
Haojun Liao 已提交
3662
  pTableScanInfo->dataReader = doCreateDataReader(pNode, pHandle, &info, NULL);
L
Liu Jicong 已提交
3663 3664 3665 3666
  if (pTableScanInfo->dataReader == NULL) {
    ASSERT(0);
    qError("failed to create data reader");
    return TSDB_CODE_QRY_APP_ERROR;
3667
  }
L
Liu Jicong 已提交
3668
  // TODO: set uid and ts to data reader
3669 3670
  return 0;
}
3671
#endif
3672

C
Cary Xu 已提交
3673
int32_t encodeOperator(SOperatorInfo* ops, char** result, int32_t* length, int32_t* nOptrWithVal) {
wmmhello's avatar
wmmhello 已提交
3674
  int32_t code = TDB_CODE_SUCCESS;
3675
  char*   pCurrent = NULL;
wmmhello's avatar
wmmhello 已提交
3676
  int32_t currLength = 0;
3677
  if (ops->fpSet.encodeResultRow) {
C
Cary Xu 已提交
3678
    if (result == NULL || length == NULL || nOptrWithVal == NULL) {
wmmhello's avatar
wmmhello 已提交
3679 3680 3681
      return TSDB_CODE_TSC_INVALID_INPUT;
    }
    code = ops->fpSet.encodeResultRow(ops, &pCurrent, &currLength);
wmmhello's avatar
wmmhello 已提交
3682

3683 3684
    if (code != TDB_CODE_SUCCESS) {
      if (*result != NULL) {
wmmhello's avatar
wmmhello 已提交
3685 3686 3687 3688
        taosMemoryFree(*result);
        *result = NULL;
      }
      return code;
C
Cary Xu 已提交
3689 3690 3691
    } else if (currLength == 0) {
      ASSERT(!pCurrent);
      goto _downstream;
wmmhello's avatar
wmmhello 已提交
3692
    }
wmmhello's avatar
wmmhello 已提交
3693

C
Cary Xu 已提交
3694 3695
    ++(*nOptrWithVal);

C
Cary Xu 已提交
3696
    ASSERT(currLength >= 0);
wmmhello's avatar
wmmhello 已提交
3697

3698
    if (*result == NULL) {
wmmhello's avatar
wmmhello 已提交
3699
      *result = (char*)taosMemoryCalloc(1, currLength + sizeof(int32_t));
wmmhello's avatar
wmmhello 已提交
3700 3701 3702 3703 3704 3705
      if (*result == NULL) {
        taosMemoryFree(pCurrent);
        return TSDB_CODE_OUT_OF_MEMORY;
      }
      memcpy(*result + sizeof(int32_t), pCurrent, currLength);
      *(int32_t*)(*result) = currLength + sizeof(int32_t);
3706
    } else {
wmmhello's avatar
wmmhello 已提交
3707
      int32_t sizePre = *(int32_t*)(*result);
3708
      char*   tmp = (char*)taosMemoryRealloc(*result, sizePre + currLength);
wmmhello's avatar
wmmhello 已提交
3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720
      if (tmp == NULL) {
        taosMemoryFree(pCurrent);
        taosMemoryFree(*result);
        *result = NULL;
        return TSDB_CODE_OUT_OF_MEMORY;
      }
      *result = tmp;
      memcpy(*result + sizePre, pCurrent, currLength);
      *(int32_t*)(*result) += currLength;
    }
    taosMemoryFree(pCurrent);
    *length = *(int32_t*)(*result);
wmmhello's avatar
wmmhello 已提交
3721 3722
  }

3723
_downstream:
wmmhello's avatar
wmmhello 已提交
3724
  for (int32_t i = 0; i < ops->numOfDownstream; ++i) {
C
Cary Xu 已提交
3725
    code = encodeOperator(ops->pDownstream[i], result, length, nOptrWithVal);
3726
    if (code != TDB_CODE_SUCCESS) {
wmmhello's avatar
wmmhello 已提交
3727
      return code;
wmmhello's avatar
wmmhello 已提交
3728 3729
    }
  }
wmmhello's avatar
wmmhello 已提交
3730
  return TDB_CODE_SUCCESS;
wmmhello's avatar
wmmhello 已提交
3731 3732
}

H
Haojun Liao 已提交
3733
int32_t decodeOperator(SOperatorInfo* ops, const char* result, int32_t length) {
wmmhello's avatar
wmmhello 已提交
3734
  int32_t code = TDB_CODE_SUCCESS;
3735 3736
  if (ops->fpSet.decodeResultRow) {
    if (result == NULL) {
wmmhello's avatar
wmmhello 已提交
3737 3738
      return TSDB_CODE_TSC_INVALID_INPUT;
    }
H
Haojun Liao 已提交
3739

3740
    ASSERT(length == *(int32_t*)result);
H
Haojun Liao 已提交
3741 3742

    const char* data = result + sizeof(int32_t);
L
Liu Jicong 已提交
3743
    code = ops->fpSet.decodeResultRow(ops, (char*)data);
3744
    if (code != TDB_CODE_SUCCESS) {
wmmhello's avatar
wmmhello 已提交
3745 3746
      return code;
    }
wmmhello's avatar
wmmhello 已提交
3747

wmmhello's avatar
wmmhello 已提交
3748
    int32_t totalLength = *(int32_t*)result;
3749 3750
    int32_t dataLength = *(int32_t*)data;

3751
    if (totalLength == dataLength + sizeof(int32_t)) {  // the last data
wmmhello's avatar
wmmhello 已提交
3752 3753
      result = NULL;
      length = 0;
3754
    } else {
wmmhello's avatar
wmmhello 已提交
3755 3756 3757 3758
      result += dataLength;
      *(int32_t*)(result) = totalLength - dataLength;
      length = totalLength - dataLength;
    }
wmmhello's avatar
wmmhello 已提交
3759 3760
  }

wmmhello's avatar
wmmhello 已提交
3761 3762
  for (int32_t i = 0; i < ops->numOfDownstream; ++i) {
    code = decodeOperator(ops->pDownstream[i], result, length);
3763
    if (code != TDB_CODE_SUCCESS) {
wmmhello's avatar
wmmhello 已提交
3764
      return code;
wmmhello's avatar
wmmhello 已提交
3765 3766
    }
  }
wmmhello's avatar
wmmhello 已提交
3767
  return TDB_CODE_SUCCESS;
wmmhello's avatar
wmmhello 已提交
3768 3769
}

D
dapan1121 已提交
3770
int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, qTaskInfo_t* pTaskInfo, SReadHandle* readHandle) {
D
dapan1121 已提交
3771
  SExecTaskInfo* pTask = *(SExecTaskInfo**)pTaskInfo;
3772

D
dapan1121 已提交
3773
  switch (pNode->type) {
D
dapan1121 已提交
3774 3775 3776 3777 3778 3779
    case QUERY_NODE_PHYSICAL_PLAN_QUERY_INSERT: {
      SInserterParam* pInserterParam = taosMemoryCalloc(1, sizeof(SInserterParam));
      if (NULL == pInserterParam) {
        return TSDB_CODE_OUT_OF_MEMORY;
      }
      pInserterParam->readHandle = readHandle;
L
Liu Jicong 已提交
3780

D
dapan1121 已提交
3781 3782 3783
      *pParam = pInserterParam;
      break;
    }
D
dapan1121 已提交
3784
    case QUERY_NODE_PHYSICAL_PLAN_DELETE: {
3785
      SDeleterParam* pDeleterParam = taosMemoryCalloc(1, sizeof(SDeleterParam));
D
dapan1121 已提交
3786 3787 3788
      if (NULL == pDeleterParam) {
        return TSDB_CODE_OUT_OF_MEMORY;
      }
H
Haojun Liao 已提交
3789 3790 3791 3792
      int32_t tbNum = tableListGetSize(pTask->pTableInfoList);
      pDeleterParam->suid = tableListGetSuid(pTask->pTableInfoList);

      // TODO extract uid list
D
dapan1121 已提交
3793 3794 3795 3796 3797
      pDeleterParam->pUidList = taosArrayInit(tbNum, sizeof(uint64_t));
      if (NULL == pDeleterParam->pUidList) {
        taosMemoryFree(pDeleterParam);
        return TSDB_CODE_OUT_OF_MEMORY;
      }
H
Haojun Liao 已提交
3798

D
dapan1121 已提交
3799
      for (int32_t i = 0; i < tbNum; ++i) {
H
Haojun Liao 已提交
3800
        STableKeyInfo* pTable = tableListGetInfo(pTask->pTableInfoList, i);
D
dapan1121 已提交
3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813
        taosArrayPush(pDeleterParam->pUidList, &pTable->uid);
      }

      *pParam = pDeleterParam;
      break;
    }
    default:
      break;
  }

  return TSDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
3814
int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId,
D
dapan1121 已提交
3815
                               char* sql, EOPTR_EXEC_MODEL model) {
H
Haojun Liao 已提交
3816 3817
  uint64_t queryId = pPlan->id.queryId;

D
dapan1121 已提交
3818
  *pTaskInfo = createExecTaskInfo(queryId, taskId, model, pPlan->dbFName);
H
Haojun Liao 已提交
3819 3820 3821
  if (*pTaskInfo == NULL) {
    goto _complete;
  }
H
Haojun Liao 已提交
3822

3823
  if (pHandle) {
L
Liu Jicong 已提交
3824
    /*(*pTaskInfo)->streamInfo.fillHistoryVer1 = pHandle->fillHistoryVer1;*/
3825 3826 3827
    if (pHandle->pStateBackend) {
      (*pTaskInfo)->streamInfo.pState = pHandle->pStateBackend;
    }
H
Haojun Liao 已提交
3828 3829
  }

3830
  (*pTaskInfo)->sql = sql;
D
dapan1121 已提交
3831
  sql = NULL;
H
Haojun Liao 已提交
3832

3833
  (*pTaskInfo)->pSubplan = pPlan;
3834 3835
  (*pTaskInfo)->pRoot =
      createOperatorTree(pPlan->pNode, *pTaskInfo, pHandle, pPlan->pTagCond, pPlan->pTagIndexCond, pPlan->user);
L
Liu Jicong 已提交
3836

D
dapan1121 已提交
3837
  if (NULL == (*pTaskInfo)->pRoot) {
H
Haojun Liao 已提交
3838
    terrno = (*pTaskInfo)->code;
D
dapan1121 已提交
3839
    goto _complete;
3840 3841
  }

H
Haojun Liao 已提交
3842
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
3843

H
Haojun Liao 已提交
3844
_complete:
D
dapan1121 已提交
3845
  taosMemoryFree(sql);
H
Haojun Liao 已提交
3846
  doDestroyTask(*pTaskInfo);
H
Haojun Liao 已提交
3847
  return terrno;
H
Haojun Liao 已提交
3848 3849
}

L
Liu Jicong 已提交
3850
void doDestroyTask(SExecTaskInfo* pTaskInfo) {
H
Haojun Liao 已提交
3851 3852
  qDebug("%s execTask is freed", GET_TASKID(pTaskInfo));

H
Haojun Liao 已提交
3853
  pTaskInfo->pTableInfoList = tableListDestroy(pTaskInfo->pTableInfoList);
H
Haojun Liao 已提交
3854
  destroyOperatorInfo(pTaskInfo->pRoot);
3855
  cleanupTableSchemaInfo(&pTaskInfo->schemaInfo);
3856
  cleanupStreamInfo(&pTaskInfo->streamInfo);
3857

D
dapan1121 已提交
3858
  if (!pTaskInfo->localFetch.localExec) {
D
dapan1121 已提交
3859 3860
    nodesDestroyNode((SNode*)pTaskInfo->pSubplan);
  }
3861

wafwerar's avatar
wafwerar 已提交
3862 3863 3864
  taosMemoryFreeClear(pTaskInfo->sql);
  taosMemoryFreeClear(pTaskInfo->id.str);
  taosMemoryFreeClear(pTaskInfo);
3865 3866 3867 3868
}

static int64_t getQuerySupportBufSize(size_t numOfTables) {
  size_t s1 = sizeof(STableQueryInfo);
L
Liu Jicong 已提交
3869 3870
  //  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
  return (int64_t)(s1 * 1.5 * numOfTables);
3871 3872 3873 3874 3875 3876 3877
}

int32_t checkForQueryBuf(size_t numOfTables) {
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSizeBytes < 0) {
    return TSDB_CODE_SUCCESS;
  } else if (tsQueryBufferSizeBytes > 0) {
L
Liu Jicong 已提交
3878
    while (1) {
3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904
      int64_t s = tsQueryBufferSizeBytes;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSizeBytes, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

void releaseQueryBuf(size_t numOfTables) {
  if (tsQueryBufferSizeBytes < 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSizeBytes, t);
}
D
dapan1121 已提交
3905

H
Haojun Liao 已提交
3906
int32_t getOperatorExplainExecInfo(SOperatorInfo* operatorInfo, SArray* pExecInfoList) {
3907
  SExplainExecInfo  execInfo = {0};
H
Haojun Liao 已提交
3908
  SExplainExecInfo* pExplainInfo = taosArrayPush(pExecInfoList, &execInfo);
3909

H
Haojun Liao 已提交
3910 3911 3912 3913 3914
  pExplainInfo->numOfRows = operatorInfo->resultInfo.totalRows;
  pExplainInfo->startupCost = operatorInfo->cost.openCost;
  pExplainInfo->totalCost = operatorInfo->cost.totalCost;
  pExplainInfo->verboseLen = 0;
  pExplainInfo->verboseInfo = NULL;
D
dapan1121 已提交
3915

3916
  if (operatorInfo->fpSet.getExplainFn) {
3917 3918
    int32_t code =
        operatorInfo->fpSet.getExplainFn(operatorInfo, &pExplainInfo->verboseInfo, &pExplainInfo->verboseLen);
D
dapan1121 已提交
3919
    if (code) {
3920
      qError("%s operator getExplainFn failed, code:%s", GET_TASKID(operatorInfo->pTaskInfo), tstrerror(code));
D
dapan1121 已提交
3921 3922 3923
      return code;
    }
  }
dengyihao's avatar
dengyihao 已提交
3924

D
dapan1121 已提交
3925
  int32_t code = 0;
D
dapan1121 已提交
3926
  for (int32_t i = 0; i < operatorInfo->numOfDownstream; ++i) {
H
Haojun Liao 已提交
3927 3928
    code = getOperatorExplainExecInfo(operatorInfo->pDownstream[i], pExecInfoList);
    if (code != TSDB_CODE_SUCCESS) {
3929
      //      taosMemoryFreeClear(*pRes);
D
dapan1121 已提交
3930 3931 3932 3933 3934
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
  }

  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
3935
}
5
54liuyao 已提交
3936

3937 3938
int32_t setOutputBuf(SStreamState* pState, STimeWindow* win, SResultRow** pResult, int64_t tableGroupId,
                     SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset, SAggSupporter* pAggSup) {
3939 3940 3941 3942 3943 3944
  SWinKey key = {
      .ts = win->skey,
      .groupId = tableGroupId,
  };
  char*   value = NULL;
  int32_t size = pAggSup->resultRowSize;
5
54liuyao 已提交
3945

3946
  if (streamStateAddIfNotExist(pState, &key, (void**)&value, &size) < 0) {
3947 3948 3949 3950 3951 3952 3953 3954 3955 3956
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
  *pResult = (SResultRow*)value;
  ASSERT(*pResult);
  // set time window for current result
  (*pResult)->win = (*win);
  setResultRowInitCtx(*pResult, pCtx, numOfOutput, rowEntryInfoOffset);
  return TSDB_CODE_SUCCESS;
}

3957 3958
int32_t releaseOutputBuf(SStreamState* pState, SWinKey* pKey, SResultRow* pResult) {
  streamStateReleaseBuf(pState, pKey, pResult);
3959 3960 3961
  return TSDB_CODE_SUCCESS;
}

3962 3963
int32_t saveOutputBuf(SStreamState* pState, SWinKey* pKey, SResultRow* pResult, int32_t resSize) {
  streamStatePut(pState, pKey, pResult, resSize);
3964 3965 3966
  return TSDB_CODE_SUCCESS;
}

3967
int32_t buildDataBlockFromGroupRes(SOperatorInfo* pOperator, SStreamState* pState, SSDataBlock* pBlock, SExprSupp* pSup,
3968
                                   SGroupResInfo* pGroupResInfo) {
3969
  SExecTaskInfo*  pTaskInfo = pOperator->pTaskInfo;
3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981
  SExprInfo*      pExprInfo = pSup->pExprInfo;
  int32_t         numOfExprs = pSup->numOfExprs;
  int32_t*        rowEntryOffset = pSup->rowEntryInfoOffset;
  SqlFunctionCtx* pCtx = pSup->pCtx;

  int32_t numOfRows = getNumOfTotalRes(pGroupResInfo);

  for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) {
    SResKeyPos* pPos = taosArrayGetP(pGroupResInfo->pRows, i);
    int32_t     size = 0;
    void*       pVal = NULL;
    SWinKey     key = {
3982 3983
            .ts = *(TSKEY*)pPos->key,
            .groupId = pPos->groupId,
3984
    };
3985
    int32_t code = streamStateGet(pState, &key, &pVal, &size);
3986 3987 3988 3989 3990 3991
    ASSERT(code == 0);
    SResultRow* pRow = (SResultRow*)pVal;
    doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset);
    // no results, continue to check the next one
    if (pRow->numOfRows == 0) {
      pGroupResInfo->index += 1;
3992
      releaseOutputBuf(pState, &key, pRow);
3993 3994 3995 3996 3997
      continue;
    }

    if (pBlock->info.groupId == 0) {
      pBlock->info.groupId = pPos->groupId;
3998 3999 4000 4001
      SStreamIntervalOperatorInfo* pInfo = pOperator->info;
      char* tbname = taosHashGet(pInfo->pGroupIdTbNameMap, &pBlock->info.groupId, sizeof(int64_t));
      if (tbname != NULL) {
        memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN);
L
Liu Jicong 已提交
4002 4003
      } else {
        pBlock->info.parTbName[0] = 0;
4004
      }
4005 4006 4007
    } else {
      // current value belongs to different group, it can't be packed into one datablock
      if (pBlock->info.groupId != pPos->groupId) {
4008
        releaseOutputBuf(pState, &key, pRow);
4009 4010 4011 4012 4013 4014
        break;
      }
    }

    if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
      ASSERT(pBlock->info.rows > 0);
4015
      releaseOutputBuf(pState, &key, pRow);
4016 4017 4018 4019 4020 4021 4022 4023 4024 4025
      break;
    }

    pGroupResInfo->index += 1;

    for (int32_t j = 0; j < numOfExprs; ++j) {
      int32_t slotId = pExprInfo[j].base.resSchema.slotId;

      pCtx[j].resultInfo = getResultEntryInfo(pRow, j, rowEntryOffset);
      if (pCtx[j].fpSet.finalize) {
4026 4027 4028 4029
        int32_t code1 = pCtx[j].fpSet.finalize(&pCtx[j], pBlock);
        if (TAOS_FAILED(code1)) {
          qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code1));
          T_LONG_JMP(pTaskInfo->env, code1);
4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042
        }
      } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) {
        // do nothing, todo refactor
      } else {
        // expand the result into multiple rows. E.g., _wstart, top(k, 20)
        // the _wstart needs to copy to 20 following rows, since the results of top-k expands to 20 different rows.
        SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId);
        char*            in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo);
        for (int32_t k = 0; k < pRow->numOfRows; ++k) {
          colDataAppend(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes);
        }
      }
    }
5
54liuyao 已提交
4043

4044
    pBlock->info.rows += pRow->numOfRows;
4045
    releaseOutputBuf(pState, &key, pRow);
4046 4047 4048 4049
  }
  blockDataUpdateTsWindow(pBlock, 0);
  return TSDB_CODE_SUCCESS;
}
5
54liuyao 已提交
4050 4051 4052 4053 4054 4055 4056

int32_t saveSessionDiscBuf(SStreamState* pState, SSessionKey* key, void* buf, int32_t size) {
  streamStateSessionPut(pState, key, (const void*)buf, size);
  releaseOutputBuf(pState, NULL, (SResultRow*)buf);
  return TSDB_CODE_SUCCESS;
}

4057
int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, SStreamState* pState, SSDataBlock* pBlock,
5
54liuyao 已提交
4058
                                    SExprSupp* pSup, SGroupResInfo* pGroupResInfo) {
4059
  SExecTaskInfo*  pTaskInfo = pOperator->pTaskInfo;
5
54liuyao 已提交
4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072
  SExprInfo*      pExprInfo = pSup->pExprInfo;
  int32_t         numOfExprs = pSup->numOfExprs;
  int32_t*        rowEntryOffset = pSup->rowEntryInfoOffset;
  SqlFunctionCtx* pCtx = pSup->pCtx;

  int32_t numOfRows = getNumOfTotalRes(pGroupResInfo);

  for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) {
    SSessionKey* pKey = taosArrayGet(pGroupResInfo->pRows, i);
    int32_t      size = 0;
    void*        pVal = NULL;
    int32_t      code = streamStateSessionGet(pState, pKey, &pVal, &size);
    ASSERT(code == 0);
4073 4074 4075 4076
    if (code == -1) {
      // coverity scan
      continue;
    }
5
54liuyao 已提交
4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087
    SResultRow* pRow = (SResultRow*)pVal;
    doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset);
    // no results, continue to check the next one
    if (pRow->numOfRows == 0) {
      pGroupResInfo->index += 1;
      releaseOutputBuf(pState, NULL, pRow);
      continue;
    }

    if (pBlock->info.groupId == 0) {
      pBlock->info.groupId = pKey->groupId;
4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112

      if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) {
        SStreamStateAggOperatorInfo* pInfo = pOperator->info;

        char* tbname = taosHashGet(pInfo->pGroupIdTbNameMap, &pBlock->info.groupId, sizeof(int64_t));
        if (tbname != NULL) {
          memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN);
        } else {
          pBlock->info.parTbName[0] = 0;
        }
      } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION ||
                 pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION ||
                 pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) {
        SStreamSessionAggOperatorInfo* pInfo = pOperator->info;

        char* tbname = taosHashGet(pInfo->pGroupIdTbNameMap, &pBlock->info.groupId, sizeof(int64_t));
        if (tbname != NULL) {
          memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN);
        } else {
          pBlock->info.parTbName[0] = 0;
        }
      } else {
        ASSERT(0);
      }

5
54liuyao 已提交
4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157
    } else {
      // current value belongs to different group, it can't be packed into one datablock
      if (pBlock->info.groupId != pKey->groupId) {
        releaseOutputBuf(pState, NULL, pRow);
        break;
      }
    }

    if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
      ASSERT(pBlock->info.rows > 0);
      releaseOutputBuf(pState, NULL, pRow);
      break;
    }

    pGroupResInfo->index += 1;

    for (int32_t j = 0; j < numOfExprs; ++j) {
      int32_t slotId = pExprInfo[j].base.resSchema.slotId;

      pCtx[j].resultInfo = getResultEntryInfo(pRow, j, rowEntryOffset);
      if (pCtx[j].fpSet.finalize) {
        int32_t code1 = pCtx[j].fpSet.finalize(&pCtx[j], pBlock);
        if (TAOS_FAILED(code1)) {
          qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code1));
          T_LONG_JMP(pTaskInfo->env, code1);
        }
      } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) {
        // do nothing, todo refactor
      } else {
        // expand the result into multiple rows. E.g., _wstart, top(k, 20)
        // the _wstart needs to copy to 20 following rows, since the results of top-k expands to 20 different rows.
        SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId);
        char*            in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo);
        for (int32_t k = 0; k < pRow->numOfRows; ++k) {
          colDataAppend(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes);
        }
      }
    }

    pBlock->info.rows += pRow->numOfRows;
    // saveSessionDiscBuf(pState, pKey, pVal, size);
    releaseOutputBuf(pState, NULL, pRow);
  }
  blockDataUpdateTsWindow(pBlock, 0);
  return TSDB_CODE_SUCCESS;
4158
}