executorimpl.c 192.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
15

H
Haojun Liao 已提交
16 17
#include "filter.h"
#include "function.h"
18 19
#include "functionMgt.h"
#include "os.h"
H
Haojun Liao 已提交
20
#include "querynodes.h"
21
#include "tfill.h"
dengyihao's avatar
dengyihao 已提交
22
#include "tname.h"
X
Xiaoyu Wang 已提交
23
#include "tref.h"
24

H
Haojun Liao 已提交
25
#include "tdatablock.h"
26
#include "tglobal.h"
H
Haojun Liao 已提交
27
#include "tmsg.h"
H
Haojun Liao 已提交
28
#include "tsort.h"
29
#include "ttime.h"
H
Haojun Liao 已提交
30

31
#include "executorimpl.h"
dengyihao's avatar
dengyihao 已提交
32
#include "index.h"
33
#include "query.h"
34 35
#include "tcompare.h"
#include "tcompression.h"
H
Haojun Liao 已提交
36
#include "thash.h"
37
#include "ttypes.h"
dengyihao's avatar
dengyihao 已提交
38
#include "vnode.h"
39

H
Haojun Liao 已提交
40
#define IS_MAIN_SCAN(runtime)          ((runtime)->scanFlag == MAIN_SCAN)
41 42 43 44 45 46
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)

#define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP)

#if 0
static UNUSED_FUNC void *u_malloc (size_t __size) {
wafwerar's avatar
wafwerar 已提交
47
  uint32_t v = taosRand();
48 49 50 51

  if (v % 1000 <= 0) {
    return NULL;
  } else {
wafwerar's avatar
wafwerar 已提交
52
    return taosMemoryMalloc(__size);
53 54 55 56
  }
}

static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
wafwerar's avatar
wafwerar 已提交
57
  uint32_t v = taosRand();
58 59 60
  if (v % 1000 <= 0) {
    return NULL;
  } else {
wafwerar's avatar
wafwerar 已提交
61
    return taosMemoryCalloc(num, __size);
62 63 64 65
  }
}

static UNUSED_FUNC void* u_realloc(void* p, size_t __size) {
wafwerar's avatar
wafwerar 已提交
66
  uint32_t v = taosRand();
67 68 69
  if (v % 5 <= 1) {
    return NULL;
  } else {
wafwerar's avatar
wafwerar 已提交
70
    return taosMemoryRealloc(p, __size);
71 72 73 74 75 76 77 78
  }
}

#define calloc  u_calloc
#define malloc  u_malloc
#define realloc u_realloc
#endif

X
Xiaoyu Wang 已提交
79
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
80 81
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0)

L
Liu Jicong 已提交
82 83 84
int32_t getMaximumIdleDurationSec() { return tsShellActivityTimer * 2; }

static int32_t getExprFunctionId(SExprInfo* pExprInfo) {
85
  assert(pExprInfo != NULL && pExprInfo->pExpr != NULL && pExprInfo->pExpr->nodeType == TEXPR_UNARYEXPR_NODE);
86
  return 0;
87 88 89 90
}

static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes);

91
static void setBlockStatisInfo(SqlFunctionCtx* pCtx, SExprInfo* pExpr, SSDataBlock* pSDataBlock);
92

L
Liu Jicong 已提交
93
static void destroyTableQueryInfoImpl(STableQueryInfo* pTableQueryInfo);
94

X
Xiaoyu Wang 已提交
95
static void releaseQueryBuf(size_t numOfTables);
96 97 98 99 100

static void destroySFillOperatorInfo(void* param, int32_t numOfOutput);
static void destroyProjectOperatorInfo(void* param, int32_t numOfOutput);
static void destroyOrderOperatorInfo(void* param, int32_t numOfOutput);
static void destroyAggOperatorInfo(void* param, int32_t numOfOutput);
X
Xiaoyu Wang 已提交
101

H
Haojun Liao 已提交
102
static void destroyIntervalOperatorInfo(void* param, int32_t numOfOutput);
H
Haojun Liao 已提交
103 104
static void destroyExchangeOperatorInfo(void* param, int32_t numOfOutput);

105 106
static void destroyOperatorInfo(SOperatorInfo* pOperator);

107
void doSetOperatorCompleted(SOperatorInfo* pOperator) {
108
  pOperator->status = OP_EXEC_DONE;
109

110
  pOperator->cost.totalCost = (taosGetTimestampUs() - pOperator->pTaskInfo->cost.start * 1000) / 1000.0;
H
Haojun Liao 已提交
111
  if (pOperator->pTaskInfo != NULL) {
112
    setTaskStatus(pOperator->pTaskInfo, TASK_COMPLETED);
113 114
  }
}
115

H
Haojun Liao 已提交
116
int32_t operatorDummyOpenFn(SOperatorInfo* pOperator) {
117
  OPTR_SET_OPENED(pOperator);
118
  pOperator->cost.openCost = 0;
H
Haojun Liao 已提交
119
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
120 121
}

122
SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t streamFn,
L
Liu Jicong 已提交
123
                                   __optr_fn_t cleanup, __optr_close_fn_t closeFn, __optr_encode_fn_t encode,
124
                                   __optr_decode_fn_t decode, __optr_explain_fn_t explain) {
125 126 127 128 129 130 131 132 133 134 135 136 137 138
  SOperatorFpSet fpSet = {
      ._openFn = openFn,
      .getNextFn = nextFn,
      .getStreamResFn = streamFn,
      .cleanupFn = cleanup,
      .closeFn = closeFn,
      .encodeResultRow = encode,
      .decodeResultRow = decode,
      .getExplainFn = explain,
  };

  return fpSet;
}

H
Haojun Liao 已提交
139
void operatorDummyCloseFn(void* param, int32_t numOfCols) {}
H
Haojun Liao 已提交
140

X
Xiaoyu Wang 已提交
141 142 143
static int32_t doCopyToSDataBlock(SExecTaskInfo* taskInfo, SSDataBlock* pBlock, SExprInfo* pExprInfo,
                                  SDiskbasedBuf* pBuf, SGroupResInfo* pGroupResInfo, const int32_t* rowCellOffset,
                                  SqlFunctionCtx* pCtx, int32_t numOfExprs);
H
Haojun Liao 已提交
144

145
static void initCtxOutputBuffer(SqlFunctionCtx* pCtx, int32_t size);
H
Haojun Liao 已提交
146
static void setResultBufSize(STaskAttr* pQueryAttr, SResultInfo* pResultInfo);
dengyihao's avatar
dengyihao 已提交
147 148
static void doSetTableGroupOutputBuf(SAggOperatorInfo* pAggInfo, int32_t numOfOutput, uint64_t groupId,
                                     SExecTaskInfo* pTaskInfo);
149

H
Haojun Liao 已提交
150
SArray* getOrderCheckColumns(STaskAttr* pQuery);
151 152

typedef struct SRowCompSupporter {
L
Liu Jicong 已提交
153 154 155
  STaskRuntimeEnv* pRuntimeEnv;
  int16_t          dataOffset;
  __compar_fn_t    comFunc;
156 157
} SRowCompSupporter;

L
Liu Jicong 已提交
158 159 160
static int compareRowData(const void* a, const void* b, const void* userData) {
  const SResultRow* pRow1 = (const SResultRow*)a;
  const SResultRow* pRow2 = (const SResultRow*)b;
161

L
Liu Jicong 已提交
162 163
  SRowCompSupporter* supporter = (SRowCompSupporter*)userData;
  STaskRuntimeEnv*   pRuntimeEnv = supporter->pRuntimeEnv;
164

L
Liu Jicong 已提交
165 166
  SFilePage* page1 = getBufPage(pRuntimeEnv->pResultBuf, pRow1->pageId);
  SFilePage* page2 = getBufPage(pRuntimeEnv->pResultBuf, pRow2->pageId);
167 168

  int16_t offset = supporter->dataOffset;
169
  return 0;
X
Xiaoyu Wang 已提交
170 171
  //  char*   in1 = getPosInResultPage(pRuntimeEnv->pQueryAttr, page1, pRow1->offset, offset);
  //  char*   in2 = getPosInResultPage(pRuntimeEnv->pQueryAttr, page2, pRow2->offset, offset);
172

X
Xiaoyu Wang 已提交
173
  //  return (in1 != NULL && in2 != NULL) ? supporter->comFunc(in1, in2) : 0;
174 175
}

L
Liu Jicong 已提交
176
// setup the output buffer for each operator
177
SSDataBlock* createResDataBlock(SDataBlockDescNode* pNode) {
H
Haojun Liao 已提交
178
  int32_t numOfCols = LIST_LENGTH(pNode->pSlots);
H
Haojun Liao 已提交
179

wafwerar's avatar
wafwerar 已提交
180
  SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock));
H
Haojun Liao 已提交
181 182
  pBlock->pDataBlock = taosArrayInit(numOfCols, sizeof(SColumnInfoData));

H
Haojun Liao 已提交
183
  pBlock->info.blockId = pNode->dataBlockId;
dengyihao's avatar
dengyihao 已提交
184
  pBlock->info.rowSize = pNode->totalRowSize;  // todo ??
5
54liuyao 已提交
185
  pBlock->info.type = STREAM_INVALID;
H
Haojun Liao 已提交
186

L
Liu Jicong 已提交
187
  for (int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
188
    SColumnInfoData idata = {{0}};
189
    SSlotDescNode*  pDescNode = (SSlotDescNode*)nodesListGetNode(pNode->pSlots, i);
X
Xiaoyu Wang 已提交
190 191 192
    //    if (!pDescNode->output) {  // todo disable it temporarily
    //      continue;
    //    }
H
Haojun Liao 已提交
193

dengyihao's avatar
dengyihao 已提交
194
    idata.info.type = pDescNode->dataType.type;
L
Liu Jicong 已提交
195 196
    idata.info.bytes = pDescNode->dataType.bytes;
    idata.info.scale = pDescNode->dataType.scale;
H
Haojun Liao 已提交
197
    idata.info.slotId = pDescNode->slotId;
H
Haojun Liao 已提交
198 199
    idata.info.precision = pDescNode->dataType.precision;

H
Hongze Cheng 已提交
200 201 202 203
    if (IS_VAR_DATA_TYPE(idata.info.type)) {
      pBlock->info.hasVarCol = true;
    }

H
Haojun Liao 已提交
204
    taosArrayPush(pBlock->pDataBlock, &idata);
H
Haojun Liao 已提交
205
  }
H
Haojun Liao 已提交
206

207
  pBlock->info.numOfCols = taosArrayGetSize(pBlock->pDataBlock);
H
Haojun Liao 已提交
208
  return pBlock;
H
Haojun Liao 已提交
209 210
}

L
Liu Jicong 已提交
211
static bool hasNull(SColumn* pColumn, SColumnDataAgg* pStatis) {
dengyihao's avatar
dengyihao 已提交
212 213
  if (TSDB_COL_IS_TAG(pColumn->flag) || TSDB_COL_IS_UD_COL(pColumn->flag) ||
      pColumn->colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
214 215 216 217 218 219 220 221 222 223
    return false;
  }

  if (pStatis != NULL && pStatis->numOfNull == 0) {
    return false;
  }

  return true;
}

L
Liu Jicong 已提交
224 225
static bool chkResultRowFromKey(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, char* pData,
                                int16_t bytes, bool masterscan, uint64_t uid) {
226 227 228
  bool existed = false;
  SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid);

L
Liu Jicong 已提交
229 230
  SResultRow** p1 =
      (SResultRow**)taosHashGet(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
231 232 233 234 235 236 237 238 239 240 241

  // in case of repeat scan/reverse scan, no new time window added.
  if (QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQueryAttr)) {
    if (!masterscan) {  // the *p1 may be NULL in case of sliding+offset exists.
      return p1 != NULL;
    }

    if (p1 != NULL) {
      if (pResultRowInfo->size == 0) {
        existed = false;
      } else if (pResultRowInfo->size == 1) {
dengyihao's avatar
dengyihao 已提交
242
        //        existed = (pResultRowInfo->pResult[0] == (*p1));
243 244
      } else {  // check if current pResultRowInfo contains the existed pResultRow
        SET_RES_EXT_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid, pResultRowInfo);
L
Liu Jicong 已提交
245 246
        int64_t* index =
            taosHashGet(pRuntimeEnv->pResultRowListSet, pRuntimeEnv->keyBuf, GET_RES_EXT_WINDOW_KEY_LEN(bytes));
247 248 249 250 251 252 253 254 255 256 257 258 259 260
        if (index != NULL) {
          existed = true;
        } else {
          existed = false;
        }
      }
    }

    return existed;
  }

  return p1 != NULL;
}

261
SResultRow* getNewResultRow(SDiskbasedBuf* pResultBuf, int64_t tableGroupId, int32_t interBufSize) {
L
Liu Jicong 已提交
262
  SFilePage* pData = NULL;
263 264 265 266 267 268 269 270 271 272 273 274 275

  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, tableGroupId);

  if (taosArrayGetSize(list) == 0) {
    pData = getNewBufPage(pResultBuf, tableGroupId, &pageId);
    pData->num = sizeof(SFilePage);
  } else {
    SPageInfo* pi = getLastPageInfo(list);
    pData = getBufPage(pResultBuf, getPageId(pi));
    pageId = getPageId(pi);

wmmhello's avatar
wmmhello 已提交
276
    if (pData->num + interBufSize > getBufPageSize(pResultBuf)) {
277 278 279 280 281 282 283 284 285 286 287 288 289 290
      // release current page first, and prepare the next one
      releaseBufPageInfo(pResultBuf, pi);

      pData = getNewBufPage(pResultBuf, tableGroupId, &pageId);
      if (pData != NULL) {
        pData->num = sizeof(SFilePage);
      }
    }
  }

  if (pData == NULL) {
    return NULL;
  }

291 292
  setBufPageDirty(pData, true);

293 294 295 296 297
  // set the number of rows in current disk page
  SResultRow* pResultRow = (SResultRow*)((char*)pData + pData->num);
  pResultRow->pageId = pageId;
  pResultRow->offset = (int32_t)pData->num;

wmmhello's avatar
wmmhello 已提交
298
  pData->num += interBufSize;
299 300 301 302

  return pResultRow;
}

303 304 305 306 307 308 309
/**
 * the struct of key in hash table
 * +----------+---------------+
 * | group id |   key data    |
 * | 8 bytes  | actual length |
 * +----------+---------------+
 */
310 311 312
SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pResultRowInfo, char* pData,
                                   int16_t bytes, bool masterscan, uint64_t groupId, SExecTaskInfo* pTaskInfo,
                                   bool isIntervalQuery, SAggSupporter* pSup) {
313
  SET_RES_WINDOW_KEY(pSup->keyBuf, pData, bytes, groupId);
H
Haojun Liao 已提交
314

dengyihao's avatar
dengyihao 已提交
315 316
  SResultRowPosition* p1 =
      (SResultRowPosition*)taosHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
H
Haojun Liao 已提交
317

318 319
  SResultRow* pResult = NULL;

H
Haojun Liao 已提交
320 321
  // in case of repeat scan/reverse scan, no new time window added.
  if (isIntervalQuery) {
322 323
    if (masterscan && p1 != NULL) {  // the *p1 may be NULL in case of sliding+offset exists.
      pResult = getResultRowByPos(pResultBuf, p1);
324
      ASSERT(pResult->pageId == p1->pageId && pResult->offset == p1->offset);
H
Haojun Liao 已提交
325 326
    }
  } else {
dengyihao's avatar
dengyihao 已提交
327 328
    // In case of group by column query, the required SResultRow object must be existInCurrentResusltRowInfo in the
    // pResultRowInfo object.
H
Haojun Liao 已提交
329
    if (p1 != NULL) {
330
      // todo
331
      pResult = getResultRowByPos(pResultBuf, p1);
332
      ASSERT(pResult->pageId == p1->pageId && pResult->offset == p1->offset);
H
Haojun Liao 已提交
333 334 335
    }
  }

L
Liu Jicong 已提交
336
  // 1. close current opened time window
337
  if (pResultRowInfo->cur.pageId != -1 && ((pResult == NULL) || (pResult->pageId != pResultRowInfo->cur.pageId &&
L
Liu Jicong 已提交
338
                                                                 pResult->offset != pResultRowInfo->cur.offset))) {
339
    SResultRowPosition pos = pResultRowInfo->cur;
X
Xiaoyu Wang 已提交
340
    SFilePage*         pPage = getBufPage(pResultBuf, pos.pageId);
341 342 343 344 345
    releaseBufPage(pResultBuf, pPage);
  }

  // allocate a new buffer page
  if (pResult == NULL) {
H
Haojun Liao 已提交
346
    ASSERT(pSup->resultRowSize > 0);
347 348
    pResult = getNewResultRow(pResultBuf, groupId, pSup->resultRowSize);

349
    initResultRow(pResult);
H
Haojun Liao 已提交
350

351 352
    // add a new result set for a new group
    SResultRowPosition pos = {.pageId = pResult->pageId, .offset = pResult->offset};
X
Xiaoyu Wang 已提交
353 354
    taosHashPut(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), &pos,
                sizeof(SResultRowPosition));
H
Haojun Liao 已提交
355 356
  }

357 358 359
  // 2. set the new time window to be the new active time window
  pResultRowInfo->cur = (SResultRowPosition){.pageId = pResult->pageId, .offset = pResult->offset};

H
Haojun Liao 已提交
360
  // too many time window in query
361
  if (taosHashGetSize(pSup->pResultRowHashTable) > MAX_INTERVAL_TIME_WINDOW) {
H
Haojun Liao 已提交
362 363 364
    longjmp(pTaskInfo->env, TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW);
  }

H
Haojun Liao 已提交
365
  return pResult;
H
Haojun Liao 已提交
366 367
}

368
// get the correct time window according to the handled timestamp
L
Liu Jicong 已提交
369
static STimeWindow getCurrentActiveTimeWindow(SResultRowInfo* pResultRowInfo, int64_t ts, STaskAttr* pQueryAttr) {
370
  STimeWindow w = {0};
H
Haojun Liao 已提交
371
#if 0
L
Liu Jicong 已提交
372 373
  if (pResultRowInfo->curPos == -1) {  // the first window, from the previous stored value
                                       //    getInitialStartTimeWindow(pQueryAttr, ts, &w);
374 375

    if (pQueryAttr->interval.intervalUnit == 'n' || pQueryAttr->interval.intervalUnit == 'y') {
L
Liu Jicong 已提交
376 377 378
      w.ekey =
          taosTimeAdd(w.skey, pQueryAttr->interval.interval, pQueryAttr->interval.intervalUnit, pQueryAttr->precision) -
          1;
379 380 381 382
    } else {
      w.ekey = w.skey + pQueryAttr->interval.interval - 1;
    }
  } else {
H
Haojun Liao 已提交
383
    w = pRow->win;
384 385 386 387 388 389 390 391 392
  }

  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQueryAttr->window.ekey && QUERY_IS_ASC_QUERY(pQueryAttr)) {
    w.ekey = pQueryAttr->window.ekey;
  }
H
Haojun Liao 已提交
393
#endif
394 395 396 397 398

  return w;
}

// a new buffer page for each table. Needs to opt this design
L
Liu Jicong 已提交
399
static int32_t addNewWindowResultBuf(SResultRow* pWindowRes, SDiskbasedBuf* pResultBuf, int32_t tid, uint32_t size) {
400 401 402 403
  if (pWindowRes->pageId != -1) {
    return 0;
  }

L
Liu Jicong 已提交
404
  SFilePage* pData = NULL;
405 406 407 408 409 410

  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, tid);

  if (taosArrayGetSize(list) == 0) {
H
Haojun Liao 已提交
411
    pData = getNewBufPage(pResultBuf, tid, &pageId);
412
    pData->num = sizeof(SFilePage);
413 414
  } else {
    SPageInfo* pi = getLastPageInfo(list);
415
    pData = getBufPage(pResultBuf, getPageId(pi));
416
    pageId = getPageId(pi);
417

418
    if (pData->num + size > getBufPageSize(pResultBuf)) {
419
      // release current page first, and prepare the next one
420
      releaseBufPageInfo(pResultBuf, pi);
421

H
Haojun Liao 已提交
422
      pData = getNewBufPage(pResultBuf, tid, &pageId);
423
      if (pData != NULL) {
424
        pData->num = sizeof(SFilePage);
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
      }
    }
  }

  if (pData == NULL) {
    return -1;
  }

  // set the number of rows in current disk page
  if (pWindowRes->pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pageId = pageId;
    pWindowRes->offset = (int32_t)pData->num;

    pData->num += size;
    assert(pWindowRes->pageId >= 0);
  }

  return 0;
}

L
Liu Jicong 已提交
445 446 447
static bool chkWindowOutputBufByKey(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, STimeWindow* win,
                                    bool masterscan, SResultRow** pResult, int64_t groupId, SqlFunctionCtx* pCtx,
                                    int32_t numOfOutput, int32_t* rowCellInfoOffset) {
448
  assert(win->skey <= win->ekey);
L
Liu Jicong 已提交
449
  return chkResultRowFromKey(pRuntimeEnv, pResultRowInfo, (char*)&win->skey, TSDB_KEYSIZE, masterscan, groupId);
450 451
}

452
//  query_range_start, query_range_end, window_duration, window_start, window_end
453
void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow) {
454 455 456
  pColData->info.type = TSDB_DATA_TYPE_TIMESTAMP;
  pColData->info.bytes = sizeof(int64_t);

457
  colInfoDataEnsureCapacity(pColData, 0, 5);
458 459 460 461 462 463 464 465 466
  colDataAppendInt64(pColData, 0, &pQueryWindow->skey);
  colDataAppendInt64(pColData, 1, &pQueryWindow->ekey);

  int64_t interval = 0;
  colDataAppendInt64(pColData, 2, &interval);  // this value may be variable in case of 'n' and 'y'.
  colDataAppendInt64(pColData, 3, &pQueryWindow->skey);
  colDataAppendInt64(pColData, 4, &pQueryWindow->ekey);
}

X
Xiaoyu Wang 已提交
467 468 469
void doApplyFunctions(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, STimeWindow* pWin,
                      SColumnInfoData* pTimeWindowData, int32_t offset, int32_t forwardStep, TSKEY* tsCol,
                      int32_t numOfTotal, int32_t numOfOutput, int32_t order) {
470
  for (int32_t k = 0; k < numOfOutput; ++k) {
H
Haojun Liao 已提交
471
    // keep it temporarily
472
    // todo no need this??
dengyihao's avatar
dengyihao 已提交
473 474
    bool    hasAgg = pCtx[k].input.colDataAggIsSet;
    int32_t numOfRows = pCtx[k].input.numOfRows;
H
Haojun Liao 已提交
475
    int32_t startOffset = pCtx[k].input.startRowIndex;
476

477
    pCtx[k].input.startRowIndex = offset;
478
    pCtx[k].input.numOfRows = forwardStep;
479 480 481

    // not a whole block involved in query processing, statistics data can not be used
    // NOTE: the original value of isSet have been changed here
482 483
    if (pCtx[k].input.colDataAggIsSet && forwardStep < numOfTotal) {
      pCtx[k].input.colDataAggIsSet = false;
484 485
    }

486 487
    if (fmIsWindowPseudoColumnFunc(pCtx[k].functionId)) {
      SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(&pCtx[k]);
488 489

      char* p = GET_ROWCELL_INTERBUF(pEntryInfo);
490

491
      SColumnInfoData idata = {0};
dengyihao's avatar
dengyihao 已提交
492
      idata.info.type = TSDB_DATA_TYPE_BIGINT;
493
      idata.info.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes;
dengyihao's avatar
dengyihao 已提交
494
      idata.pData = p;
495 496 497 498

      SScalarParam out = {.columnData = &idata};
      SScalarParam tw = {.numOfRows = 5, .columnData = pTimeWindowData};
      pCtx[k].sfp.process(&tw, 1, &out);
499
      pEntryInfo->numOfRes = 1;
500 501 502 503 504 505 506 507 508 509
    } else {
      int32_t code = TSDB_CODE_SUCCESS;
      if (functionNeedToExecute(&pCtx[k]) && pCtx[k].fpSet.process != NULL) {
        code = pCtx[k].fpSet.process(&pCtx[k]);

        if (code != TSDB_CODE_SUCCESS) {
          qError("%s apply functions error, code: %s", GET_TASKID(taskInfo), tstrerror(code));
          taskInfo->code = code;
          longjmp(taskInfo->env, code);
        }
510
      }
511

512 513 514 515 516
      // restore it
      pCtx[k].input.colDataAggIsSet = hasAgg;
      pCtx[k].input.startRowIndex = startOffset;
      pCtx[k].input.numOfRows = numOfRows;
    }
517 518 519
  }
}

L
Liu Jicong 已提交
520
static FORCE_INLINE TSKEY reviseWindowEkey(STaskAttr* pQueryAttr, STimeWindow* pWindow) {
dengyihao's avatar
dengyihao 已提交
521
  TSKEY   ekey = -1;
522 523
  int32_t order = TSDB_ORDER_ASC;
  if (order == TSDB_ORDER_ASC) {
524 525 526 527 528 529 530 531 532 533 534 535 536 537
    ekey = pWindow->ekey;
    if (ekey > pQueryAttr->window.ekey) {
      ekey = pQueryAttr->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQueryAttr->window.ekey) {
      ekey = pQueryAttr->window.ekey;
    }
  }

  return ekey;
}

dengyihao's avatar
dengyihao 已提交
538
static int32_t doSetInputDataBlock(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx, SSDataBlock* pBlock, int32_t order,
539
                                   int32_t scanFlag, bool createDummyCol);
540

dengyihao's avatar
dengyihao 已提交
541 542
static void doSetInputDataBlockInfo(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx, SSDataBlock* pBlock,
                                    int32_t order) {
543
  for (int32_t i = 0; i < pOperator->numOfExprs; ++i) {
544
    pCtx[i].order = order;
545
    pCtx[i].input.numOfRows = pBlock->info.rows;
546
    setBlockStatisInfo(&pCtx[i], &pOperator->pExpr[i], pBlock);
547 548 549
  }
}

X
Xiaoyu Wang 已提交
550 551
void setInputDataBlock(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx, SSDataBlock* pBlock, int32_t order,
                       int32_t scanFlag, bool createDummyCol) {
552
  if (pBlock->pBlockAgg != NULL) {
H
Haojun Liao 已提交
553
    doSetInputDataBlockInfo(pOperator, pCtx, pBlock, order);
554
  } else {
555
    doSetInputDataBlock(pOperator, pCtx, pBlock, order, scanFlag, createDummyCol);
H
Haojun Liao 已提交
556
  }
557 558
}

L
Liu Jicong 已提交
559 560
static int32_t doCreateConstantValColumnInfo(SInputColumnInfoData* pInput, SFunctParam* pFuncParam, int32_t paramIndex,
                                             int32_t numOfRows) {
561 562 563 564 565 566 567 568
  SColumnInfoData* pColInfo = NULL;
  if (pInput->pData[paramIndex] == NULL) {
    pColInfo = taosMemoryCalloc(1, sizeof(SColumnInfoData));
    if (pColInfo == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }

    // Set the correct column info (data type and bytes)
569 570
    pColInfo->info.type = pFuncParam->param.nType;
    pColInfo->info.bytes = pFuncParam->param.nLen;
571 572

    pInput->pData[paramIndex] = pColInfo;
573 574
  } else {
    pColInfo = pInput->pData[paramIndex];
575 576
  }

577
  colInfoDataEnsureCapacity(pColInfo, 0, numOfRows);
578

579
  int8_t type = pFuncParam->param.nType;
580 581
  if (type == TSDB_DATA_TYPE_BIGINT || type == TSDB_DATA_TYPE_UBIGINT) {
    int64_t v = pFuncParam->param.i;
dengyihao's avatar
dengyihao 已提交
582
    for (int32_t i = 0; i < numOfRows; ++i) {
583 584 585 586
      colDataAppendInt64(pColInfo, i, &v);
    }
  } else if (type == TSDB_DATA_TYPE_DOUBLE) {
    double v = pFuncParam->param.d;
dengyihao's avatar
dengyihao 已提交
587
    for (int32_t i = 0; i < numOfRows; ++i) {
588 589
      colDataAppendDouble(pColInfo, i, &v);
    }
590
  } else if (type == TSDB_DATA_TYPE_VARCHAR) {
L
Liu Jicong 已提交
591
    char* tmp = taosMemoryMalloc(pFuncParam->param.nLen + VARSTR_HEADER_SIZE);
592
    STR_WITH_SIZE_TO_VARSTR(tmp, pFuncParam->param.pz, pFuncParam->param.nLen);
L
Liu Jicong 已提交
593
    for (int32_t i = 0; i < numOfRows; ++i) {
594 595
      colDataAppend(pColInfo, i, tmp, false);
    }
596 597 598 599 600
  }

  return TSDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
601
static int32_t doSetInputDataBlock(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx, SSDataBlock* pBlock, int32_t order,
X
Xiaoyu Wang 已提交
602
                                   int32_t scanFlag, bool createDummyCol) {
603 604
  int32_t code = TSDB_CODE_SUCCESS;

605
  for (int32_t i = 0; i < pOperator->numOfExprs; ++i) {
L
Liu Jicong 已提交
606
    pCtx[i].order = order;
607 608
    pCtx[i].input.numOfRows = pBlock->info.rows;

L
Liu Jicong 已提交
609
    pCtx[i].pSrcBlock = pBlock;
X
Xiaoyu Wang 已提交
610
    pCtx[i].scanFlag = scanFlag;
H
Haojun Liao 已提交
611

612
    SInputColumnInfoData* pInput = &pCtx[i].input;
613
    pInput->uid = pBlock->info.uid;
C
Cary Xu 已提交
614
    pInput->colDataAggIsSet = false;
615

616 617
    SExprInfo* pOneExpr = &pOperator->pExpr[i];
    for (int32_t j = 0; j < pOneExpr->base.numOfParams; ++j) {
dengyihao's avatar
dengyihao 已提交
618
      SFunctParam* pFuncParam = &pOneExpr->base.pParam[j];
G
Ganlin Zhao 已提交
619 620
      if (pFuncParam->type == FUNC_PARAM_TYPE_COLUMN) {
        int32_t slotId = pFuncParam->pCol->slotId;
dengyihao's avatar
dengyihao 已提交
621
        pInput->pData[j] = taosArrayGet(pBlock->pDataBlock, slotId);
622 623 624
        pInput->totalRows = pBlock->info.rows;
        pInput->numOfRows = pBlock->info.rows;
        pInput->startRowIndex = 0;
625

626
        // NOTE: the last parameter is the primary timestamp column
627 628 629
        if (fmIsTimelineFunc(pCtx[i].functionId) && (j == pOneExpr->base.numOfParams - 1)) {
          pInput->pPTS = pInput->pData[j];
        }
630 631
        ASSERT(pInput->pData[j] != NULL);
      } else if (pFuncParam->type == FUNC_PARAM_TYPE_VALUE) {
632 633 634
        // todo avoid case: top(k, 12), 12 is the value parameter.
        // sum(11), 11 is also the value parameter.
        if (createDummyCol && pOneExpr->base.numOfParams == 1) {
635 636 637 638
          pInput->totalRows = pBlock->info.rows;
          pInput->numOfRows = pBlock->info.rows;
          pInput->startRowIndex = 0;

639
          code = doCreateConstantValColumnInfo(pInput, pFuncParam, j, pBlock->info.rows);
640 641 642
          if (code != TSDB_CODE_SUCCESS) {
            return code;
          }
643
        }
G
Ganlin Zhao 已提交
644 645
      }
    }
H
Haojun Liao 已提交
646
  }
647 648

  return code;
H
Haojun Liao 已提交
649 650
}

651
static int32_t doAggregateImpl(SOperatorInfo* pOperator, TSKEY startTs, SqlFunctionCtx* pCtx) {
652
  for (int32_t k = 0; k < pOperator->numOfExprs; ++k) {
H
Haojun Liao 已提交
653
    if (functionNeedToExecute(&pCtx[k])) {
654
      // todo add a dummy funtion to avoid process check
655 656 657 658 659 660 661 662
      if (pCtx[k].fpSet.process == NULL) {
        continue;
      }

      int32_t code = pCtx[k].fpSet.process(&pCtx[k]);
      if (code != TSDB_CODE_SUCCESS) {
        qError("%s aggregate function error happens, code: %s", GET_TASKID(pOperator->pTaskInfo), tstrerror(code));
        return code;
663
      }
664 665
    }
  }
666 667

  return TSDB_CODE_SUCCESS;
668 669
}

H
Haojun Liao 已提交
670
static void setPseudoOutputColInfo(SSDataBlock* pResult, SqlFunctionCtx* pCtx, SArray* pPseudoList) {
dengyihao's avatar
dengyihao 已提交
671
  size_t num = (pPseudoList != NULL) ? taosArrayGetSize(pPseudoList) : 0;
H
Haojun Liao 已提交
672 673 674 675 676
  for (int32_t i = 0; i < num; ++i) {
    pCtx[i].pOutput = taosArrayGet(pResult->pDataBlock, i);
  }
}

677
int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBlock* pSrcBlock, SqlFunctionCtx* pCtx,
X
Xiaoyu Wang 已提交
678
                              int32_t numOfOutput, SArray* pPseudoList) {
H
Haojun Liao 已提交
679
  setPseudoOutputColInfo(pResult, pCtx, pPseudoList);
H
Haojun Liao 已提交
680
  pResult->info.groupId = pSrcBlock->info.groupId;
H
Haojun Liao 已提交
681

dengyihao's avatar
dengyihao 已提交
682 683
  // if the source equals to the destination, it is to create a new column as the result of scalar function or some
  // operators.
684 685
  bool createNewColModel = (pResult == pSrcBlock);

686 687
  int32_t numOfRows = 0;

688
  for (int32_t k = 0; k < numOfOutput; ++k) {
dengyihao's avatar
dengyihao 已提交
689
    int32_t         outputSlotId = pExpr[k].base.resSchema.slotId;
690 691
    SqlFunctionCtx* pfCtx = &pCtx[k];

L
Liu Jicong 已提交
692
    if (pExpr[k].pExpr->nodeType == QUERY_NODE_COLUMN) {  // it is a project query
693
      SColumnInfoData* pColInfoData = taosArrayGet(pResult->pDataBlock, outputSlotId);
694
      if (pResult->info.rows > 0 && !createNewColModel) {
X
Xiaoyu Wang 已提交
695 696
        colDataMergeCol(pColInfoData, pResult->info.rows, &pResult->info.capacity, pfCtx->input.pData[0],
                        pfCtx->input.numOfRows);
697 698 699
      } else {
        colDataAssign(pColInfoData, pfCtx->input.pData[0], pfCtx->input.numOfRows);
      }
700

701
      numOfRows = pfCtx->input.numOfRows;
702
    } else if (pExpr[k].pExpr->nodeType == QUERY_NODE_VALUE) {
703
      SColumnInfoData* pColInfoData = taosArrayGet(pResult->pDataBlock, outputSlotId);
704

dengyihao's avatar
dengyihao 已提交
705
      int32_t offset = createNewColModel ? 0 : pResult->info.rows;
706
      for (int32_t i = 0; i < pSrcBlock->info.rows; ++i) {
dengyihao's avatar
dengyihao 已提交
707 708 709
        colDataAppend(pColInfoData, i + offset,
                      taosVariantGet(&pExpr[k].base.pParam[0].param, pExpr[k].base.pParam[0].param.nType),
                      TSDB_DATA_TYPE_NULL == pExpr[k].base.pParam[0].param.nType);
710
      }
711 712

      numOfRows = pSrcBlock->info.rows;
H
Haojun Liao 已提交
713
    } else if (pExpr[k].pExpr->nodeType == QUERY_NODE_OPERATOR) {
714 715 716
      SArray* pBlockList = taosArrayInit(4, POINTER_BYTES);
      taosArrayPush(pBlockList, &pSrcBlock);

717
      SColumnInfoData* pResColData = taosArrayGet(pResult->pDataBlock, outputSlotId);
718
      SColumnInfoData  idata = {.info = pResColData->info, .hasNull = true};
719

720
      SScalarParam dest = {.columnData = &idata};
X
Xiaoyu Wang 已提交
721
      int32_t      code = scalarCalculate(pExpr[k].pExpr->_optrRoot.pRootNode, pBlockList, &dest);
722 723 724 725
      if (code != TSDB_CODE_SUCCESS) {
        taosArrayDestroy(pBlockList);
        return code;
      }
726

dengyihao's avatar
dengyihao 已提交
727
      int32_t startOffset = createNewColModel ? 0 : pResult->info.rows;
728
      colInfoDataEnsureCapacity(pResColData, startOffset, pResult->info.capacity);
729
      colDataMergeCol(pResColData, startOffset, &pResult->info.capacity, &idata, dest.numOfRows);
730 731

      numOfRows = dest.numOfRows;
732 733
      taosArrayDestroy(pBlockList);
    } else if (pExpr[k].pExpr->nodeType == QUERY_NODE_FUNCTION) {
734
      ASSERT(!fmIsAggFunc(pfCtx->functionId));
735

736 737
      // _rowts/_c0, not tbname column
      if (fmIsPseudoColumnFunc(pfCtx->functionId) && (!fmIsScanPseudoColumnFunc(pfCtx->functionId))) {
H
Haojun Liao 已提交
738
        // do nothing
X
Xiaoyu Wang 已提交
739
      } else if (fmIsIndefiniteRowsFunc(pfCtx->functionId)) {
dengyihao's avatar
dengyihao 已提交
740
        SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[k]);
741
        pfCtx->fpSet.init(&pCtx[k], pResInfo);
742

743
        pfCtx->pOutput = taosArrayGet(pResult->pDataBlock, outputSlotId);
dengyihao's avatar
dengyihao 已提交
744
        pfCtx->offset = createNewColModel ? 0 : pResult->info.rows;  // set the start offset
H
Haojun Liao 已提交
745

746
        // set the timestamp(_rowts) output buffer
747 748
        if (taosArrayGetSize(pPseudoList) > 0) {
          int32_t* outputColIndex = taosArrayGet(pPseudoList, 0);
749
          pfCtx->pTsOutput = (SColumnInfoData*)pCtx[*outputColIndex].pOutput;
750
        }
H
Haojun Liao 已提交
751

752
        numOfRows = pfCtx->fpSet.process(pfCtx);
H
Haojun Liao 已提交
753 754 755
      } else {
        SArray* pBlockList = taosArrayInit(4, POINTER_BYTES);
        taosArrayPush(pBlockList, &pSrcBlock);
G
Ganlin Zhao 已提交
756

757
        SColumnInfoData* pResColData = taosArrayGet(pResult->pDataBlock, outputSlotId);
758
        SColumnInfoData  idata = {.info = pResColData->info, .hasNull = true};
H
Haojun Liao 已提交
759

760
        SScalarParam dest = {.columnData = &idata};
X
Xiaoyu Wang 已提交
761
        int32_t      code = scalarCalculate((SNode*)pExpr[k].pExpr->_function.pFunctNode, pBlockList, &dest);
762 763 764 765
        if (code != TSDB_CODE_SUCCESS) {
          taosArrayDestroy(pBlockList);
          return code;
        }
766

dengyihao's avatar
dengyihao 已提交
767
        int32_t startOffset = createNewColModel ? 0 : pResult->info.rows;
768
        colInfoDataEnsureCapacity(pResColData, startOffset, pResult->info.capacity);
769
        colDataMergeCol(pResColData, startOffset, &pResult->info.capacity, &idata, dest.numOfRows);
770 771

        numOfRows = dest.numOfRows;
H
Haojun Liao 已提交
772 773
        taosArrayDestroy(pBlockList);
      }
774
    } else {
775
      ASSERT(0);
776 777
    }
  }
778

779 780 781
  if (!createNewColModel) {
    pResult->info.rows += numOfRows;
  }
782 783

  return TSDB_CODE_SUCCESS;
784 785
}

786 787 788
static void setResultRowKey(SResultRow* pResultRow, char* pData, int16_t type) {
  if (IS_VAR_DATA_TYPE(type)) {
    // todo disable this
789

790 791 792 793 794 795 796 797 798
    //    if (pResultRow->key == NULL) {
    //      pResultRow->key = taosMemoryMalloc(varDataTLen(pData));
    //      varDataCopy(pResultRow->key, pData);
    //    } else {
    //      ASSERT(memcmp(pResultRow->key, pData, varDataTLen(pData)) == 0);
    //    }
  } else {
    int64_t v = -1;
    GET_TYPED_DATA(v, int64_t, type, pData);
799

800 801
    pResultRow->win.skey = v;
    pResultRow->win.ekey = v;
802 803 804
  }
}

805 806 807 808 809
int32_t setGroupResultOutputBuf(SOptrBasicInfo* binfo, int32_t numOfCols, char* pData, int16_t type, int16_t bytes,
                                int32_t groupId, SDiskbasedBuf* pBuf, SExecTaskInfo* pTaskInfo,
                                SAggSupporter* pAggSup) {
  SResultRowInfo* pResultRowInfo = &binfo->resultRowInfo;
  SqlFunctionCtx* pCtx = binfo->pCtx;
810

811 812 813
  SResultRow* pResultRow =
      doSetResultOutBufByKey(pBuf, pResultRowInfo, (char*)pData, bytes, true, groupId, pTaskInfo, false, pAggSup);
  assert(pResultRow != NULL);
814

815
  setResultRowInitCtx(pResultRow, pCtx, numOfCols, binfo->rowCellInfoOffset);
816
  return TSDB_CODE_SUCCESS;
817 818
}

5
54liuyao 已提交
819
bool functionNeedToExecute(SqlFunctionCtx* pCtx) {
820
  struct SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
821

822 823 824 825 826
  // in case of timestamp column, always generated results.
  int32_t functionId = pCtx->functionId;
  if (functionId == -1) {
    return false;
  }
827

828 829
  if (pCtx->scanFlag == REPEAT_SCAN) {
    return fmIsRepeatScanFunc(pCtx->functionId);
830 831
  }

832 833
  if (isRowEntryCompleted(pResInfo)) {
    return false;
834 835
  }

836 837 838
  return true;
}

839 840 841 842 843 844 845
static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SFunctParam* pFuncParam, int32_t type,
                                                int32_t paramIndex, int32_t numOfRows) {
  if (pInput->pData[paramIndex] == NULL) {
    pInput->pData[paramIndex] = taosMemoryCalloc(1, sizeof(SColumnInfoData));
    if (pInput->pData[paramIndex] == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
    }
846

847 848 849
    // Set the correct column info (data type and bytes)
    pInput->pData[paramIndex]->info.type = type;
    pInput->pData[paramIndex]->info.bytes = tDataTypes[type].bytes;
850
  }
H
Haojun Liao 已提交
851

852 853 854 855 856 857
  SColumnDataAgg* da = NULL;
  if (pInput->pColumnDataAgg[paramIndex] == NULL) {
    da = taosMemoryCalloc(1, sizeof(SColumnDataAgg));
    pInput->pColumnDataAgg[paramIndex] = da;
    if (da == NULL) {
      return TSDB_CODE_OUT_OF_MEMORY;
858 859
    }
  } else {
860
    da = pInput->pColumnDataAgg[paramIndex];
861 862
  }

863
  ASSERT(!IS_VAR_DATA_TYPE(type));
864

865 866 867 868 869 870
  if (type == TSDB_DATA_TYPE_BIGINT) {
    int64_t v = pFuncParam->param.i;
    *da = (SColumnDataAgg){.numOfNull = 0, .min = v, .max = v, .maxIndex = 0, .minIndex = 0, .sum = v * numOfRows};
  } else if (type == TSDB_DATA_TYPE_DOUBLE) {
    double v = pFuncParam->param.d;
    *da = (SColumnDataAgg){.numOfNull = 0, .maxIndex = 0, .minIndex = 0};
871

872 873 874 875 876 877 878 879 880 881 882 883
    *(double*)&da->min = v;
    *(double*)&da->max = v;
    *(double*)&da->sum = v * numOfRows;
  } else if (type == TSDB_DATA_TYPE_BOOL) {  // todo validate this data type
    bool v = pFuncParam->param.i;

    *da = (SColumnDataAgg){.numOfNull = 0, .maxIndex = 0, .minIndex = 0};
    *(bool*)&da->min = 0;
    *(bool*)&da->max = v;
    *(bool*)&da->sum = v * numOfRows;
  } else if (type == TSDB_DATA_TYPE_TIMESTAMP) {
    // do nothing
884
  } else {
885
    ASSERT(0);
886 887
  }

888 889
  return TSDB_CODE_SUCCESS;
}
890 891 892 893 894 895 896 897 898 899 900

void setBlockStatisInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pBlock) {
  int32_t numOfRows = pBlock->info.rows;

  SInputColumnInfoData* pInput = &pCtx->input;
  pInput->numOfRows = numOfRows;
  pInput->totalRows = numOfRows;

  if (pBlock->pBlockAgg != NULL) {
    pInput->colDataAggIsSet = true;

901 902
    for (int32_t j = 0; j < pExprInfo->base.numOfParams; ++j) {
      SFunctParam* pFuncParam = &pExprInfo->base.pParam[j];
903

904 905
      if (pFuncParam->type == FUNC_PARAM_TYPE_COLUMN) {
        int32_t slotId = pFuncParam->pCol->slotId;
906 907 908 909
        pInput->pColumnDataAgg[j] = pBlock->pBlockAgg[slotId];
        if (pInput->pColumnDataAgg[j] == NULL) {
          pInput->colDataAggIsSet = false;
        }
910 911 912 913

        // Here we set the column info data since the data type for each column data is required, but
        // the data in the corresponding SColumnInfoData will not be used.
        pInput->pData[j] = taosArrayGet(pBlock->pDataBlock, slotId);
914 915
      } else if (pFuncParam->type == FUNC_PARAM_TYPE_VALUE) {
        doCreateConstantValColumnAggInfo(pInput, pFuncParam, pFuncParam->param.nType, j, pBlock->info.rows);
916 917
      }
    }
918
  } else {
919
    pInput->colDataAggIsSet = false;
920 921 922
  }

  // set the statistics data for primary time stamp column
923 924 925 926 927
  //  if (pCtx->functionId == FUNCTION_SPREAD && pColumn->colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
  //    pCtx->isAggSet = true;
  //    pCtx->agg.min = pBlock->info.window.skey;
  //    pCtx->agg.max = pBlock->info.window.ekey;
  //  }
928 929 930
}

// set the output buffer for the selectivity + tag query
931
static int32_t setSelectValueColumnInfo(SqlFunctionCtx* pCtx, int32_t numOfOutput) {
932 933
  int32_t num = 0;

H
Haojun Liao 已提交
934
  SqlFunctionCtx*  p = NULL;
935 936
  SqlFunctionCtx** pValCtx = taosMemoryCalloc(numOfOutput, POINTER_BYTES);
  if (pValCtx == NULL) {
937 938 939 940
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }

  for (int32_t i = 0; i < numOfOutput; ++i) {
941 942
    if (strcmp(pCtx[i].pExpr->pExpr->_function.functionName, "_select_value") == 0) {
      pValCtx[num++] = &pCtx[i];
X
Xiaoyu Wang 已提交
943
    } else if (fmIsSelectFunc(pCtx[i].functionId)) {
944
      p = &pCtx[i];
945
    }
X
Xiaoyu Wang 已提交
946 947 948 949 950 951 952 953 954 955
    //    if (functionId == FUNCTION_TAG_DUMMY || functionId == FUNCTION_TS_DUMMY) {
    //      tagLen += pCtx[i].resDataInfo.bytes;
    //      pTagCtx[num++] = &pCtx[i];
    //    } else if (functionId == FUNCTION_TS || functionId == FUNCTION_TAG) {
    //      // tag function may be the group by tag column
    //      // ts may be the required primary timestamp column
    //      continue;
    //    } else {
    //      // the column may be the normal column, group by normal_column, the functionId is FUNCTION_PRJ
    //    }
956
  }
957

958
  if (p != NULL) {
959
    p->subsidiaries.pCtx = pValCtx;
960
    p->subsidiaries.num = num;
961
  } else {
962
    taosMemoryFreeClear(pValCtx);
963 964 965 966 967
  }

  return TSDB_CODE_SUCCESS;
}

968
SqlFunctionCtx* createSqlFunctionCtx(SExprInfo* pExprInfo, int32_t numOfOutput, int32_t** rowCellInfoOffset) {
L
Liu Jicong 已提交
969
  SqlFunctionCtx* pFuncCtx = (SqlFunctionCtx*)taosMemoryCalloc(numOfOutput, sizeof(SqlFunctionCtx));
H
Haojun Liao 已提交
970 971 972 973
  if (pFuncCtx == NULL) {
    return NULL;
  }

wafwerar's avatar
wafwerar 已提交
974
  *rowCellInfoOffset = taosMemoryCalloc(numOfOutput, sizeof(int32_t));
H
Haojun Liao 已提交
975
  if (*rowCellInfoOffset == 0) {
wafwerar's avatar
wafwerar 已提交
976
    taosMemoryFreeClear(pFuncCtx);
H
Haojun Liao 已提交
977 978 979 980
    return NULL;
  }

  for (int32_t i = 0; i < numOfOutput; ++i) {
H
Haojun Liao 已提交
981
    SExprInfo* pExpr = &pExprInfo[i];
H
Haojun Liao 已提交
982

L
Liu Jicong 已提交
983
    SExprBasicInfo* pFunct = &pExpr->base;
H
Haojun Liao 已提交
984
    SqlFunctionCtx* pCtx = &pFuncCtx[i];
H
Haojun Liao 已提交
985

986
    pCtx->functionId = -1;
987
    pCtx->curBufPage = -1;
L
Liu Jicong 已提交
988
    pCtx->pExpr = pExpr;
989

H
Haojun Liao 已提交
990
    if (pExpr->pExpr->nodeType == QUERY_NODE_FUNCTION) {
H
Haojun Liao 已提交
991
      SFuncExecEnv env = {0};
H
Haojun Liao 已提交
992 993
      pCtx->functionId = pExpr->pExpr->_function.pFunctNode->funcId;

X
Xiaoyu Wang 已提交
994
      if (fmIsAggFunc(pCtx->functionId) || fmIsIndefiniteRowsFunc(pCtx->functionId)) {
995 996 997 998
        bool isUdaf = fmIsUserDefinedFunc(pCtx->functionId);
        if (!isUdaf) {
          fmGetFuncExecFuncs(pCtx->functionId, &pCtx->fpSet);
        } else {
999
          char* udfName = pExpr->pExpr->_function.pFunctNode->functionName;
1000 1001 1002
          strncpy(pCtx->udfName, udfName, strlen(udfName));
          fmGetUdafExecFuncs(pCtx->functionId, &pCtx->fpSet);
        }
1003 1004 1005
        pCtx->fpSet.getEnv(pExpr->pExpr->_function.pFunctNode, &env);
      } else {
        fmGetScalarFuncExecFuncs(pCtx->functionId, &pCtx->sfp);
1006 1007 1008
        if (pCtx->sfp.getEnv != NULL) {
          pCtx->sfp.getEnv(pExpr->pExpr->_function.pFunctNode, &env);
        }
1009
      }
H
Haojun Liao 已提交
1010
      pCtx->resDataInfo.interBufSize = env.calcMemSize;
1011 1012
    } else if (pExpr->pExpr->nodeType == QUERY_NODE_COLUMN || pExpr->pExpr->nodeType == QUERY_NODE_OPERATOR ||
               pExpr->pExpr->nodeType == QUERY_NODE_VALUE) {
H
Haojun Liao 已提交
1013
      // for simple column, the result buffer needs to hold at least one element.
1014
      pCtx->resDataInfo.interBufSize = pFunct->resSchema.bytes;
H
Haojun Liao 已提交
1015
    }
H
Haojun Liao 已提交
1016

H
Haojun Liao 已提交
1017
    pCtx->input.numOfInputCols = pFunct->numOfParams;
wafwerar's avatar
wafwerar 已提交
1018 1019
    pCtx->input.pData = taosMemoryCalloc(pFunct->numOfParams, POINTER_BYTES);
    pCtx->input.pColumnDataAgg = taosMemoryCalloc(pFunct->numOfParams, POINTER_BYTES);
H
Haojun Liao 已提交
1020

1021
    pCtx->pTsOutput = NULL;
L
Liu Jicong 已提交
1022
    pCtx->resDataInfo.bytes = pFunct->resSchema.bytes;
1023
    pCtx->resDataInfo.type = pFunct->resSchema.type;
L
Liu Jicong 已提交
1024
    pCtx->order = TSDB_ORDER_ASC;
1025
    pCtx->start.key = INT64_MIN;
L
Liu Jicong 已提交
1026
    pCtx->end.key = INT64_MIN;
1027
    pCtx->numOfParams = pExpr->base.numOfParams;
1028
    pCtx->increase = false;
H
Haojun Liao 已提交
1029

1030
    pCtx->param = pFunct->pParam;
H
Haojun Liao 已提交
1031 1032
  }

L
Liu Jicong 已提交
1033 1034 1035
  for (int32_t i = 1; i < numOfOutput; ++i) {
    (*rowCellInfoOffset)[i] =
        (int32_t)((*rowCellInfoOffset)[i - 1] + sizeof(SResultRowEntryInfo) + pFuncCtx[i - 1].resDataInfo.interBufSize);
H
Haojun Liao 已提交
1036
  }
H
Haojun Liao 已提交
1037

1038
  setSelectValueColumnInfo(pFuncCtx, numOfOutput);
H
Haojun Liao 已提交
1039 1040 1041
  return pFuncCtx;
}

1042
static void* destroySqlFunctionCtx(SqlFunctionCtx* pCtx, int32_t numOfOutput) {
1043 1044 1045 1046 1047 1048
  if (pCtx == NULL) {
    return NULL;
  }

  for (int32_t i = 0; i < numOfOutput; ++i) {
    for (int32_t j = 0; j < pCtx[i].numOfParams; ++j) {
1049
      taosVariantDestroy(&pCtx[i].param[j].param);
1050 1051
    }

1052
    taosMemoryFreeClear(pCtx[i].subsidiaries.pCtx);
H
Haojun Liao 已提交
1053 1054
    taosMemoryFree(pCtx[i].input.pData);
    taosMemoryFree(pCtx[i].input.pColumnDataAgg);
1055 1056
  }

wafwerar's avatar
wafwerar 已提交
1057
  taosMemoryFreeClear(pCtx);
1058 1059 1060
  return NULL;
}

L
Liu Jicong 已提交
1061
bool isTaskKilled(SExecTaskInfo* pTaskInfo) {
1062 1063
  // query has been executed more than tsShellActivityTimer, and the retrieve has not arrived
  // abort current query execution.
L
Liu Jicong 已提交
1064 1065
  if (pTaskInfo->owner != 0 &&
      ((taosGetTimestampSec() - pTaskInfo->cost.start / 1000) > 10 * getMaximumIdleDurationSec())
1066 1067
      /*(!needBuildResAfterQueryComplete(pTaskInfo))*/) {
    assert(pTaskInfo->cost.start != 0);
L
Liu Jicong 已提交
1068 1069 1070
    //    qDebug("QInfo:%" PRIu64 " retrieve not arrive beyond %d ms, abort current query execution, start:%" PRId64
    //           ", current:%d", pQInfo->qId, 1, pQInfo->startExecTs, taosGetTimestampSec());
    //    return true;
1071 1072 1073 1074 1075
  }

  return false;
}

L
Liu Jicong 已提交
1076
void setTaskKilled(SExecTaskInfo* pTaskInfo) { pTaskInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
1077

L
Liu Jicong 已提交
1078
static bool isCachedLastQuery(STaskAttr* pQueryAttr) {
1079 1080
  for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) {
    int32_t functionId = getExprFunctionId(&pQueryAttr->pExpr1[i]);
X
Xiaoyu Wang 已提交
1081 1082 1083
    //    if (functionId == FUNCTION_LAST || functionId == FUNCTION_LAST_DST) {
    //      continue;
    //    }
1084 1085 1086 1087

    return false;
  }

1088 1089
  int32_t order = TSDB_ORDER_ASC;
  if (order != TSDB_ORDER_DESC || !TSWINDOW_IS_EQUAL(pQueryAttr->window, TSWINDOW_DESC_INITIALIZER)) {
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
    return false;
  }

  if (pQueryAttr->groupbyColumn) {
    return false;
  }

  if (pQueryAttr->interval.interval > 0) {
    return false;
  }

  if (pQueryAttr->numOfFilterCols > 0 || pQueryAttr->havingNum > 0) {
    return false;
  }

  return true;
}

/////////////////////////////////////////////////////////////////////////////////////////////
L
Liu Jicong 已提交
1109
// todo refactor : return window
1110
void getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key, STimeWindow* win) {
H
Haojun Liao 已提交
1111
  win->skey = taosTimeTruncate(key, pInterval, precision);
1112 1113

  /*
H
Haojun Liao 已提交
1114
   * if the realSkey > INT64_MAX - pInterval->interval, the query duration between
1115 1116
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1117 1118
  win->ekey = taosTimeAdd(win->skey, pInterval->interval, pInterval->intervalUnit, precision) - 1;
  if (win->ekey < win->skey) {
1119 1120 1121 1122
    win->ekey = INT64_MAX;
  }
}

L
Liu Jicong 已提交
1123
static int32_t updateBlockLoadStatus(STaskAttr* pQuery, int32_t status) {
1124 1125 1126
  bool hasFirstLastFunc = false;
  bool hasOtherFunc = false;

1127
  if (status == BLK_DATA_DATA_LOAD || status == BLK_DATA_FILTEROUT) {
1128 1129 1130 1131 1132
    return status;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = getExprFunctionId(&pQuery->pExpr1[i]);
1133
#if 0
1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
    if (functionId == FUNCTION_TS || functionId == FUNCTION_TS_DUMMY || functionId == FUNCTION_TAG ||
        functionId == FUNCTION_TAG_DUMMY) {
      continue;
    }

    if (functionId == FUNCTION_FIRST_DST || functionId == FUNCTION_LAST_DST) {
      hasFirstLastFunc = true;
    } else {
      hasOtherFunc = true;
    }
1144
#endif
1145 1146
  }

1147
  if (hasFirstLastFunc && status == BLK_DATA_NOT_LOAD) {
L
Liu Jicong 已提交
1148
    if (!hasOtherFunc) {
1149
      return BLK_DATA_FILTEROUT;
1150
    } else {
1151
      return BLK_DATA_DATA_LOAD;
1152 1153 1154 1155 1156 1157
    }
  }

  return status;
}

L
Liu Jicong 已提交
1158 1159
// static void updateDataCheckOrder(SQInfo *pQInfo, SQueryTableReq* pQueryMsg, bool stableQuery) {
//   STaskAttr* pQueryAttr = pQInfo->runtimeEnv.pQueryAttr;
H
Haojun Liao 已提交
1160
//
L
Liu Jicong 已提交
1161 1162 1163 1164
//   // in case of point-interpolation query, use asc order scan
//   char msg[] = "QInfo:0x%"PRIx64" scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%"
//   PRId64
//                "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
H
Haojun Liao 已提交
1165
//
L
Liu Jicong 已提交
1166 1167 1168 1169 1170
//   // todo handle the case the the order irrelevant query type mixed up with order critical query type
//   // descending order query for last_row query
//   if (isFirstLastRowQuery(pQueryAttr)) {
//     //qDebug("QInfo:0x%"PRIx64" scan order changed for last_row query, old:%d, new:%d", pQInfo->qId,
//     pQueryAttr->order.order, TSDB_ORDER_ASC);
H
Haojun Liao 已提交
1171
//
L
Liu Jicong 已提交
1172 1173
//     pQueryAttr->order.order = TSDB_ORDER_ASC;
//     if (pQueryAttr->window.skey > pQueryAttr->window.ekey) {
wafwerar's avatar
wafwerar 已提交
1174
//       TSWAP(pQueryAttr->window.skey, pQueryAttr->window.ekey);
L
Liu Jicong 已提交
1175
//     }
H
Haojun Liao 已提交
1176
//
L
Liu Jicong 已提交
1177 1178 1179
//     pQueryAttr->needReverseScan = false;
//     return;
//   }
H
Haojun Liao 已提交
1180
//
L
Liu Jicong 已提交
1181 1182 1183
//   if (pQueryAttr->groupbyColumn && pQueryAttr->order.order == TSDB_ORDER_DESC) {
//     pQueryAttr->order.order = TSDB_ORDER_ASC;
//     if (pQueryAttr->window.skey > pQueryAttr->window.ekey) {
wafwerar's avatar
wafwerar 已提交
1184
//       TSWAP(pQueryAttr->window.skey, pQueryAttr->window.ekey);
L
Liu Jicong 已提交
1185
//     }
H
Haojun Liao 已提交
1186
//
L
Liu Jicong 已提交
1187 1188 1189 1190
//     pQueryAttr->needReverseScan = false;
//     doUpdateLastKey(pQueryAttr);
//     return;
//   }
H
Haojun Liao 已提交
1191
//
L
Liu Jicong 已提交
1192 1193 1194 1195 1196 1197
//   if (pQueryAttr->pointInterpQuery && pQueryAttr->interval.interval == 0) {
//     if (!QUERY_IS_ASC_QUERY(pQueryAttr)) {
//       //qDebug(msg, pQInfo->qId, "interp", pQueryAttr->order.order, TSDB_ORDER_ASC, pQueryAttr->window.skey,
//       pQueryAttr->window.ekey, pQueryAttr->window.ekey, pQueryAttr->window.skey); TSWAP(pQueryAttr->window.skey,
//       pQueryAttr->window.ekey, TSKEY);
//     }
H
Haojun Liao 已提交
1198
//
L
Liu Jicong 已提交
1199 1200 1201
//     pQueryAttr->order.order = TSDB_ORDER_ASC;
//     return;
//   }
H
Haojun Liao 已提交
1202
//
L
Liu Jicong 已提交
1203 1204 1205 1206
//   if (pQueryAttr->interval.interval == 0) {
//     if (onlyFirstQuery(pQueryAttr)) {
//       if (!QUERY_IS_ASC_QUERY(pQueryAttr)) {
//         //qDebug(msg, pQInfo->qId, "only-first", pQueryAttr->order.order, TSDB_ORDER_ASC, pQueryAttr->window.skey,
H
Haojun Liao 已提交
1207 1208
////               pQueryAttr->window.ekey, pQueryAttr->window.ekey, pQueryAttr->window.skey);
//
wafwerar's avatar
wafwerar 已提交
1209
//        TSWAP(pQueryAttr->window.skey, pQueryAttr->window.ekey);
H
Haojun Liao 已提交
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
//        doUpdateLastKey(pQueryAttr);
//      }
//
//      pQueryAttr->order.order = TSDB_ORDER_ASC;
//      pQueryAttr->needReverseScan = false;
//    } else if (onlyLastQuery(pQueryAttr) && notContainSessionOrStateWindow(pQueryAttr)) {
//      if (QUERY_IS_ASC_QUERY(pQueryAttr)) {
//        //qDebug(msg, pQInfo->qId, "only-last", pQueryAttr->order.order, TSDB_ORDER_DESC, pQueryAttr->window.skey,
////               pQueryAttr->window.ekey, pQueryAttr->window.ekey, pQueryAttr->window.skey);
//
wafwerar's avatar
wafwerar 已提交
1220
//        TSWAP(pQueryAttr->window.skey, pQueryAttr->window.ekey);
H
Haojun Liao 已提交
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
//        doUpdateLastKey(pQueryAttr);
//      }
//
//      pQueryAttr->order.order = TSDB_ORDER_DESC;
//      pQueryAttr->needReverseScan = false;
//    }
//
//  } else {  // interval query
//    if (stableQuery) {
//      if (onlyFirstQuery(pQueryAttr)) {
//        if (!QUERY_IS_ASC_QUERY(pQueryAttr)) {
//          //qDebug(msg, pQInfo->qId, "only-first stable", pQueryAttr->order.order, TSDB_ORDER_ASC,
L
Liu Jicong 已提交
1233 1234
////                 pQueryAttr->window.skey, pQueryAttr->window.ekey, pQueryAttr->window.ekey,
/// pQueryAttr->window.skey);
H
Haojun Liao 已提交
1235
//
wafwerar's avatar
wafwerar 已提交
1236
//          TSWAP(pQueryAttr->window.skey, pQueryAttr->window.ekey);
H
Haojun Liao 已提交
1237 1238 1239 1240 1241 1242 1243 1244
//          doUpdateLastKey(pQueryAttr);
//        }
//
//        pQueryAttr->order.order = TSDB_ORDER_ASC;
//        pQueryAttr->needReverseScan = false;
//      } else if (onlyLastQuery(pQueryAttr)) {
//        if (QUERY_IS_ASC_QUERY(pQueryAttr)) {
//          //qDebug(msg, pQInfo->qId, "only-last stable", pQueryAttr->order.order, TSDB_ORDER_DESC,
L
Liu Jicong 已提交
1245 1246
////                 pQueryAttr->window.skey, pQueryAttr->window.ekey, pQueryAttr->window.ekey,
/// pQueryAttr->window.skey);
H
Haojun Liao 已提交
1247
//
wafwerar's avatar
wafwerar 已提交
1248
//          TSWAP(pQueryAttr->window.skey, pQueryAttr->window.ekey);
H
Haojun Liao 已提交
1249 1250 1251 1252 1253 1254 1255 1256 1257
//          doUpdateLastKey(pQueryAttr);
//        }
//
//        pQueryAttr->order.order = TSDB_ORDER_DESC;
//        pQueryAttr->needReverseScan = false;
//      }
//    }
//  }
//}
1258

L
Liu Jicong 已提交
1259 1260 1261
// static FORCE_INLINE bool doFilterByBlockStatistics(STaskRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis,
// SqlFunctionCtx *pCtx, int32_t numOfRows) {
//   STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr;
1262
//
L
Liu Jicong 已提交
1263 1264 1265
//   if (pDataStatis == NULL || pQueryAttr->pFilters == NULL) {
//     return true;
//   }
1266
//
L
Liu Jicong 已提交
1267 1268
//   return filterRangeExecute(pQueryAttr->pFilters, pDataStatis, pQueryAttr->numOfCols, numOfRows);
// }
1269

H
Haojun Liao 已提交
1270
static bool overlapWithTimeWindow(STaskAttr* pQueryAttr, SDataBlockInfo* pBlockInfo) {
1271 1272
  STimeWindow w = {0};

dengyihao's avatar
dengyihao 已提交
1273 1274
  TSKEY sk = TMIN(pQueryAttr->window.skey, pQueryAttr->window.ekey);
  TSKEY ek = TMAX(pQueryAttr->window.skey, pQueryAttr->window.ekey);
1275

1276
  if (true) {
L
Liu Jicong 已提交
1277
    //    getAlignQueryTimeWindow(pQueryAttr, pBlockInfo->window.skey, sk, ek, &w);
1278 1279 1280 1281 1282 1283
    assert(w.ekey >= pBlockInfo->window.skey);

    if (w.ekey < pBlockInfo->window.ekey) {
      return true;
    }

L
Liu Jicong 已提交
1284 1285
    while (1) {
      //      getNextTimeWindow(pQueryAttr, &w);
1286 1287 1288 1289 1290 1291 1292 1293 1294 1295
      if (w.skey > pBlockInfo->window.ekey) {
        break;
      }

      assert(w.ekey > pBlockInfo->window.ekey);
      if (w.skey <= pBlockInfo->window.ekey && w.skey > pBlockInfo->window.skey) {
        return true;
      }
    }
  } else {
L
Liu Jicong 已提交
1296
    //    getAlignQueryTimeWindow(pQueryAttr, pBlockInfo->window.ekey, sk, ek, &w);
1297 1298 1299 1300 1301 1302
    assert(w.skey <= pBlockInfo->window.ekey);

    if (w.skey > pBlockInfo->window.skey) {
      return true;
    }

L
Liu Jicong 已提交
1303 1304
    while (1) {
      //      getNextTimeWindow(pQueryAttr, &w);
1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

      assert(w.skey < pBlockInfo->window.skey);
      if (w.ekey < pBlockInfo->window.ekey && w.ekey >= pBlockInfo->window.skey) {
        return true;
      }
    }
  }

  return false;
}

void doCompactSDataBlock(SSDataBlock* pBlock, int32_t numOfRows, int8_t* p) {
  int32_t len = 0;
  int32_t start = 0;
  for (int32_t j = 0; j < numOfRows; ++j) {
    if (p[j] == 1) {
      len++;
    } else {
      if (len > 0) {
        int32_t cstart = j - len;
        for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) {
          SColumnInfoData* pColumnInfoData = taosArrayGet(pBlock->pDataBlock, i);

          int16_t bytes = pColumnInfoData->info.bytes;
          memmove(((char*)pColumnInfoData->pData) + start * bytes, pColumnInfoData->pData + cstart * bytes,
                  len * bytes);
        }

        start += len;
        len = 0;
      }
    }
  }

  if (len > 0) {
    int32_t cstart = numOfRows - len;
    for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) {
      SColumnInfoData* pColumnInfoData = taosArrayGet(pBlock->pDataBlock, i);

      int16_t bytes = pColumnInfoData->info.bytes;
      memmove(pColumnInfoData->pData + start * bytes, pColumnInfoData->pData + cstart * bytes, len * bytes);
    }

    start += len;
    len = 0;
  }

  pBlock->info.rows = start;
  pBlock->pBlockAgg = NULL;  // clean the block statistics info

  if (start > 0) {
    SColumnInfoData* pColumnInfoData = taosArrayGet(pBlock->pDataBlock, 0);
    if (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP &&
        pColumnInfoData->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
      pBlock->info.window.skey = *(int64_t*)pColumnInfoData->pData;
      pBlock->info.window.ekey = *(int64_t*)(pColumnInfoData->pData + TSDB_KEYSIZE * (start - 1));
    }
  }
}

static uint32_t doFilterByBlockTimeWindow(STableScanInfo* pTableScanInfo, SSDataBlock* pBlock) {
H
Haojun Liao 已提交
1369
  SqlFunctionCtx* pCtx = pTableScanInfo->pCtx;
1370
  uint32_t        status = BLK_DATA_NOT_LOAD;
1371 1372 1373 1374

  int32_t numOfOutput = pTableScanInfo->numOfOutput;
  for (int32_t i = 0; i < numOfOutput; ++i) {
    int32_t functionId = pCtx[i].functionId;
H
Haojun Liao 已提交
1375
    int32_t colId = pTableScanInfo->pExpr[i].base.pParam[0].pCol->colId;
1376 1377 1378

    // group by + first/last should not apply the first/last block filter
    if (functionId < 0) {
1379
      status |= BLK_DATA_DATA_LOAD;
1380 1381
      return status;
    } else {
L
Liu Jicong 已提交
1382
      //      status |= aAggs[functionId].dataReqFunc(&pTableScanInfo->pCtx[i], &pBlock->info.window, colId);
1383
      //      if ((status & BLK_DATA_DATA_LOAD) == BLK_DATA_DATA_LOAD) {
L
Liu Jicong 已提交
1384 1385
      //        return status;
      //      }
1386 1387 1388 1389 1390 1391
    }
  }

  return status;
}

L
Liu Jicong 已提交
1392 1393
int32_t loadDataBlockOnDemand(SExecTaskInfo* pTaskInfo, STableScanInfo* pTableScanInfo, SSDataBlock* pBlock,
                              uint32_t* status) {
1394
  *status = BLK_DATA_NOT_LOAD;
1395

H
Haojun Liao 已提交
1396
  pBlock->pDataBlock = NULL;
L
Liu Jicong 已提交
1397
  pBlock->pBlockAgg = NULL;
H
Haojun Liao 已提交
1398

L
Liu Jicong 已提交
1399 1400
  //  int64_t groupId = pRuntimeEnv->current->groupIndex;
  //  bool    ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr);
1401

H
Haojun Liao 已提交
1402
  STaskCostInfo* pCost = &pTaskInfo->cost;
1403

1404 1405
//  pCost->totalBlocks += 1;
//  pCost->totalRows += pBlock->info.rows;
H
Haojun Liao 已提交
1406
#if 0
1407 1408 1409
  // Calculate all time windows that are overlapping or contain current data block.
  // If current data block is contained by all possible time window, do not load current data block.
  if (/*pQueryAttr->pFilters || */pQueryAttr->groupbyColumn || pQueryAttr->sw.gap > 0 ||
H
Haojun Liao 已提交
1410
      (QUERY_IS_INTERVAL_QUERY(pQueryAttr) && overlapWithTimeWindow(pTaskInfo, &pBlock->info))) {
1411
    (*status) = BLK_DATA_DATA_LOAD;
1412 1413 1414
  }

  // check if this data block is required to load
1415
  if ((*status) != BLK_DATA_DATA_LOAD) {
1416 1417 1418 1419 1420 1421 1422
    bool needFilter = true;

    // the pCtx[i] result is belonged to previous time window since the outputBuf has not been set yet,
    // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer
    if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) {
      SResultRow* pResult = NULL;

H
Haojun Liao 已提交
1423
      bool  masterScan = IS_MAIN_SCAN(pRuntimeEnv);
1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446
      TSKEY k = ascQuery? pBlock->info.window.skey : pBlock->info.window.ekey;

      STimeWindow win = getActiveTimeWindow(pTableScanInfo->pResultRowInfo, k, pQueryAttr);
      if (pQueryAttr->pointInterpQuery) {
        needFilter = chkWindowOutputBufByKey(pRuntimeEnv, pTableScanInfo->pResultRowInfo, &win, masterScan, &pResult, groupId,
                                    pTableScanInfo->pCtx, pTableScanInfo->numOfOutput,
                                    pTableScanInfo->rowCellInfoOffset);
      } else {
        if (setResultOutputBufByKey(pRuntimeEnv, pTableScanInfo->pResultRowInfo, pBlock->info.uid, &win, masterScan, &pResult, groupId,
                                    pTableScanInfo->pCtx, pTableScanInfo->numOfOutput,
                                    pTableScanInfo->rowCellInfoOffset) != TSDB_CODE_SUCCESS) {
          longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
        }
      }
    } else if (pQueryAttr->stableQuery && (!pQueryAttr->tsCompQuery) && (!pQueryAttr->diffQuery)) { // stable aggregate, not interval aggregate or normal column aggregate
      doSetTableGroupOutputBuf(pRuntimeEnv, pTableScanInfo->pResultRowInfo, pTableScanInfo->pCtx,
                               pTableScanInfo->rowCellInfoOffset, pTableScanInfo->numOfOutput,
                               pRuntimeEnv->current->groupIndex);
    }

    if (needFilter) {
      (*status) = doFilterByBlockTimeWindow(pTableScanInfo, pBlock);
    } else {
1447
      (*status) = BLK_DATA_DATA_LOAD;
1448 1449 1450 1451
    }
  }

  SDataBlockInfo* pBlockInfo = &pBlock->info;
H
Haojun Liao 已提交
1452
//  *status = updateBlockLoadStatus(pRuntimeEnv->pQueryAttr, *status);
1453

1454
  if ((*status) == BLK_DATA_NOT_LOAD || (*status) == BLK_DATA_FILTEROUT) {
1455 1456
    //qDebug("QInfo:0x%"PRIx64" data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", pQInfo->qId, pBlockInfo->window.skey,
//           pBlockInfo->window.ekey, pBlockInfo->rows);
1457
    pCost->skipBlocks += 1;
1458
  } else if ((*status) == BLK_DATA_SMA_LOAD) {
1459 1460
    // this function never returns error?
    pCost->loadBlockStatis += 1;
1461
//    tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg);
1462 1463

    if (pBlock->pBlockAgg == NULL) {  // data block statistics does not exist, load data block
1464
//      pBlock->pDataBlock = tsdbRetrieveDataBlock(pTableScanInfo->pTsdbReadHandle, NULL);
1465 1466 1467
      pCost->totalCheckedRows += pBlock->info.rows;
    }
  } else {
1468
    assert((*status) == BLK_DATA_DATA_LOAD);
1469 1470 1471

    // load the data block statistics to perform further filter
    pCost->loadBlockStatis += 1;
1472
//    tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg);
1473 1474 1475 1476 1477 1478

    if (pQueryAttr->topBotQuery && pBlock->pBlockAgg != NULL) {
      { // set previous window
        if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) {
          SResultRow* pResult = NULL;

H
Haojun Liao 已提交
1479
          bool  masterScan = IS_MAIN_SCAN(pRuntimeEnv);
1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496
          TSKEY k = ascQuery? pBlock->info.window.skey : pBlock->info.window.ekey;

          STimeWindow win = getActiveTimeWindow(pTableScanInfo->pResultRowInfo, k, pQueryAttr);
          if (setResultOutputBufByKey(pRuntimeEnv, pTableScanInfo->pResultRowInfo, pBlock->info.uid, &win, masterScan, &pResult, groupId,
                                      pTableScanInfo->pCtx, pTableScanInfo->numOfOutput,
                                      pTableScanInfo->rowCellInfoOffset) != TSDB_CODE_SUCCESS) {
            longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
          }
        }
      }
      bool load = false;
      for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) {
        int32_t functionId = pTableScanInfo->pCtx[i].functionId;
        if (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM) {
//          load = topbot_datablock_filter(&pTableScanInfo->pCtx[i], (char*)&(pBlock->pBlockAgg[i].min),
//                                         (char*)&(pBlock->pBlockAgg[i].max));
          if (!load) { // current block has been discard due to filter applied
1497
            pCost->skipBlocks += 1;
1498 1499
            //qDebug("QInfo:0x%"PRIx64" data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", pQInfo->qId,
//                   pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
1500
            (*status) = BLK_DATA_FILTEROUT;
1501 1502 1503 1504 1505 1506 1507 1508
            return TSDB_CODE_SUCCESS;
          }
        }
      }
    }

    // current block has been discard due to filter applied
//    if (!doFilterByBlockStatistics(pRuntimeEnv, pBlock->pBlockAgg, pTableScanInfo->pCtx, pBlockInfo->rows)) {
1509
//      pCost->skipBlocks += 1;
1510 1511
//      qDebug("QInfo:0x%"PRIx64" data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", pQInfo->qId, pBlockInfo->window.skey,
//             pBlockInfo->window.ekey, pBlockInfo->rows);
1512
//      (*status) = BLK_DATA_FILTEROUT;
1513 1514 1515 1516 1517
//      return TSDB_CODE_SUCCESS;
//    }

    pCost->totalCheckedRows += pBlockInfo->rows;
    pCost->loadBlocks += 1;
1518
//    pBlock->pDataBlock = tsdbRetrieveDataBlock(pTableScanInfo->pTsdbReadHandle, NULL);
1519 1520 1521 1522 1523 1524 1525
//    if (pBlock->pDataBlock == NULL) {
//      return terrno;
//    }

//    if (pQueryAttr->pFilters != NULL) {
//      filterSetColFieldData(pQueryAttr->pFilters, pBlock->info.numOfCols, pBlock->pDataBlock);
//    }
1526

1527 1528 1529 1530
//    if (pQueryAttr->pFilters != NULL || pRuntimeEnv->pTsBuf != NULL) {
//      filterColRowsInDataBlock(pRuntimeEnv, pBlock, ascQuery);
//    }
  }
H
Haojun Liao 已提交
1531
#endif
1532 1533 1534
  return TSDB_CODE_SUCCESS;
}

L
Liu Jicong 已提交
1535
static void updateTableQueryInfoForReverseScan(STableQueryInfo* pTableQueryInfo) {
1536 1537 1538 1539
  if (pTableQueryInfo == NULL) {
    return;
  }

wafwerar's avatar
wafwerar 已提交
1540
  //  TSWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey);
L
Liu Jicong 已提交
1541
  //  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
1542

L
Liu Jicong 已提交
1543 1544
  //  SWITCH_ORDER(pTableQueryInfo->cur.order);
  //  pTableQueryInfo->cur.vgroupIndex = -1;
1545 1546

  // set the index to be the end slot of result rows array
dengyihao's avatar
dengyihao 已提交
1547 1548 1549 1550 1551 1552
  //  SResultRowInfo* pResultRowInfo = &pTableQueryInfo->resInfo;
  //  if (pResultRowInfo->size > 0) {
  //    pResultRowInfo->curPos = pResultRowInfo->size - 1;
  //  } else {
  //    pResultRowInfo->curPos = -1;
  //  }
1553 1554
}

H
Haojun Liao 已提交
1555
void initResultRow(SResultRow* pResultRow) {
X
Xiaoyu Wang 已提交
1556
  //  pResultRow->pEntryInfo = (struct SResultRowEntryInfo*)((char*)pResultRow + sizeof(SResultRow));
1557 1558 1559 1560 1561
}

/*
 * The start of each column SResultRowEntryInfo is denote by RowCellInfoOffset.
 * Note that in case of top/bottom query, the whole multiple rows of result is treated as only one row of results.
H
Haojun Liao 已提交
1562 1563 1564
 * +------------+-----------------result column 1------------+------------------result column 2-----------+
 * | SResultRow | SResultRowEntryInfo | intermediate buffer1 | SResultRowEntryInfo | intermediate buffer 2|
 * +------------+--------------------------------------------+--------------------------------------------+
1565 1566
 *           offset[0]                                  offset[1]                                   offset[2]
 */
1567
// TODO refactor: some function move away
X
Xiaoyu Wang 已提交
1568 1569
void setFunctionResultOutput(SOptrBasicInfo* pInfo, SAggSupporter* pSup, int32_t stage, int32_t numOfExprs,
                             SExecTaskInfo* pTaskInfo) {
L
Liu Jicong 已提交
1570 1571 1572
  SqlFunctionCtx* pCtx = pInfo->pCtx;
  SSDataBlock*    pDataBlock = pInfo->pRes;
  int32_t*        rowCellInfoOffset = pInfo->rowCellInfoOffset;
H
Haojun Liao 已提交
1573

H
Haojun Liao 已提交
1574
  SResultRowInfo* pResultRowInfo = &pInfo->resultRowInfo;
H
Haojun Liao 已提交
1575
  initResultRowInfo(pResultRowInfo, 16);
H
Haojun Liao 已提交
1576

L
Liu Jicong 已提交
1577 1578
  int64_t     tid = 0;
  int64_t     groupId = 0;
1579 1580
  SResultRow* pRow = doSetResultOutBufByKey(pSup->pResultBuf, pResultRowInfo, (char*)&tid, sizeof(tid), true, groupId,
                                            pTaskInfo, false, pSup);
H
Haojun Liao 已提交
1581

1582
  for (int32_t i = 0; i < numOfExprs; ++i) {
H
Haojun Liao 已提交
1583 1584 1585
    struct SResultRowEntryInfo* pEntry = getResultCell(pRow, i, rowCellInfoOffset);
    cleanupResultRowEntry(pEntry);

L
Liu Jicong 已提交
1586
    pCtx[i].resultInfo = pEntry;
1587
    pCtx[i].scanFlag = stage;
H
Haojun Liao 已提交
1588 1589
  }

1590
  initCtxOutputBuffer(pCtx, numOfExprs);
H
Haojun Liao 已提交
1591 1592
}

L
Liu Jicong 已提交
1593
void updateOutputBuf(SOptrBasicInfo* pBInfo, int32_t* bufCapacity, int32_t numOfInputRows) {
1594 1595
  SSDataBlock* pDataBlock = pBInfo->pRes;

L
Liu Jicong 已提交
1596
  int32_t newSize = pDataBlock->info.rows + numOfInputRows + 5;  // extra output buffer
1597
  if ((*bufCapacity) < newSize) {
L
Liu Jicong 已提交
1598 1599
    for (int32_t i = 0; i < pDataBlock->info.numOfCols; ++i) {
      SColumnInfoData* pColInfo = taosArrayGet(pDataBlock->pDataBlock, i);
1600

wafwerar's avatar
wafwerar 已提交
1601
      char* p = taosMemoryRealloc(pColInfo->pData, newSize * pColInfo->info.bytes);
1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
      if (p != NULL) {
        pColInfo->pData = p;

        // it starts from the tail of the previously generated results.
        pBInfo->pCtx[i].pOutput = pColInfo->pData;
        (*bufCapacity) = newSize;
      } else {
        // longjmp
      }
    }
  }

  for (int32_t i = 0; i < pDataBlock->info.numOfCols; ++i) {
L
Liu Jicong 已提交
1615
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock->pDataBlock, i);
1616 1617 1618 1619
    pBInfo->pCtx[i].pOutput = pColInfo->pData + pColInfo->info.bytes * pDataBlock->info.rows;

    // set the correct pointer after the memory buffer reallocated.
    int32_t functionId = pBInfo->pCtx[i].functionId;
1620
#if 0
L
Liu Jicong 已提交
1621 1622
    if (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM || functionId == FUNCTION_DIFF ||
        functionId == FUNCTION_DERIVATIVE) {
dengyihao's avatar
dengyihao 已提交
1623
      //      if (i > 0) pBInfo->pCtx[i].pTsOutput = pBInfo->pCtx[i - 1].pOutput;
1624
    }
1625
#endif
1626 1627 1628
  }
}

H
Haojun Liao 已提交
1629
void initCtxOutputBuffer(SqlFunctionCtx* pCtx, int32_t size) {
1630 1631
  for (int32_t j = 0; j < size; ++j) {
    struct SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[j]);
dengyihao's avatar
dengyihao 已提交
1632 1633
    if (isRowEntryInitialized(pResInfo) || fmIsPseudoColumnFunc(pCtx[j].functionId) || pCtx[j].functionId == -1 ||
        fmIsScalarFunc(pCtx[j].functionId)) {
1634 1635 1636
      continue;
    }

H
Haojun Liao 已提交
1637
    pCtx[j].fpSet.init(&pCtx[j], pCtx[j].resultInfo);
1638 1639 1640
  }
}

L
Liu Jicong 已提交
1641
void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status) {
1642
  if (status == TASK_NOT_COMPLETED) {
H
Haojun Liao 已提交
1643
    pTaskInfo->status = status;
1644 1645
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
1646
    CLEAR_QUERY_STATUS(pTaskInfo, TASK_NOT_COMPLETED);
H
Haojun Liao 已提交
1647
    pTaskInfo->status |= status;
1648 1649 1650
  }
}

L
Liu Jicong 已提交
1651
void destroyTableQueryInfoImpl(STableQueryInfo* pTableQueryInfo) {
1652 1653 1654 1655
  if (pTableQueryInfo == NULL) {
    return;
  }

L
Liu Jicong 已提交
1656
  //  taosVariantDestroy(&pTableQueryInfo->tag);
dengyihao's avatar
dengyihao 已提交
1657
  //  cleanupResultRowInfo(&pTableQueryInfo->resInfo);
1658 1659
}

1660
void setResultRowInitCtx(SResultRow* pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset) {
1661 1662 1663 1664 1665 1666 1667
  for (int32_t i = 0; i < numOfOutput; ++i) {
    pCtx[i].resultInfo = getResultCell(pResult, i, rowCellInfoOffset);

    struct SResultRowEntryInfo* pResInfo = pCtx[i].resultInfo;
    if (isRowEntryCompleted(pResInfo) && isRowEntryInitialized(pResInfo)) {
      continue;
    }
1668 1669 1670 1671 1672

    if (fmIsWindowPseudoColumnFunc(pCtx[i].functionId)) {
      continue;
    }

1673 1674 1675 1676 1677 1678
    if (!pResInfo->initialized) {
      if (pCtx[i].functionId != -1) {
        pCtx[i].fpSet.init(&pCtx[i], pResInfo);
      } else {
        pResInfo->initialized = true;
      }
1679 1680 1681 1682
    }
  }
}

1683
static void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowRes, bool keep);
1684

1685
void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock) {
1686 1687 1688 1689 1690
  if (pFilterNode == NULL) {
    return;
  }

  SFilterInfo* filter = NULL;
H
Haojun Liao 已提交
1691

H
Haojun Liao 已提交
1692
  // todo move to the initialization function
H
Haojun Liao 已提交
1693
  int32_t code = filterInitFromNode((SNode*)pFilterNode, &filter, 0);
1694 1695 1696 1697 1698

  SFilterColumnParam param1 = {.numOfCols = pBlock->info.numOfCols, .pDataBlock = pBlock->pDataBlock};
  code = filterSetDataFromSlotId(filter, &param1);

  int8_t* rowRes = NULL;
1699

1700
  // todo the keep seems never to be True??
1701
  bool keep = filterExecute(filter, pBlock, &rowRes, NULL, param1.numOfCols);
D
dapan1121 已提交
1702
  filterFreeInfo(filter);
1703

1704
  extractQualifiedTupleByFilterResult(pBlock, rowRes, keep);
1705
  blockDataUpdateTsWindow(pBlock, 0);
1706 1707
}

1708
void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowRes, bool keep) {
1709 1710 1711 1712 1713
  if (keep) {
    return;
  }

  if (rowRes != NULL) {
1714
    SSDataBlock* px = createOneDataBlock(pBlock, true);
1715

1716
    int32_t totalRows = pBlock->info.rows;
1717
    for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) {
1718 1719
      SColumnInfoData* pSrc = taosArrayGet(px->pDataBlock, i);
      SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i);
1720
      // it is a reserved column for scalar function, and no data in this column yet.
1721
      if (pDst->pData == NULL) {
1722 1723 1724
        continue;
      }

1725 1726
      colInfoDataCleanup(pDst, pBlock->info.rows);

1727
      int32_t numOfRows = 0;
1728
      for (int32_t j = 0; j < totalRows; ++j) {
D
dapan1121 已提交
1729 1730 1731
        if (rowRes[j] == 0) {
          continue;
        }
1732

D
dapan1121 已提交
1733
        if (colDataIsNull_s(pSrc, j)) {
1734
          colDataAppendNULL(pDst, numOfRows);
D
dapan1121 已提交
1735
        } else {
1736
          colDataAppend(pDst, numOfRows, colDataGetData(pSrc, j), false);
D
dapan1121 已提交
1737
        }
1738
        numOfRows += 1;
H
Haojun Liao 已提交
1739
      }
1740

1741 1742 1743 1744 1745
      if (pBlock->info.rows == totalRows) {
        pBlock->info.rows = numOfRows;
      } else {
        ASSERT(pBlock->info.rows == numOfRows);
      }
1746
    }
1747

dengyihao's avatar
dengyihao 已提交
1748
    blockDataDestroy(px);  // fix memory leak
1749 1750 1751
  } else {
    // do nothing
    pBlock->info.rows = 0;
1752 1753 1754
  }
}

dengyihao's avatar
dengyihao 已提交
1755 1756
void doSetTableGroupOutputBuf(SAggOperatorInfo* pAggInfo, int32_t numOfOutput, uint64_t groupId,
                              SExecTaskInfo* pTaskInfo) {
1757 1758 1759
  // for simple group by query without interval, all the tables belong to one group result.
  int64_t uid = 0;

1760 1761
  SResultRowInfo* pResultRowInfo = &pAggInfo->binfo.resultRowInfo;
  SqlFunctionCtx* pCtx = pAggInfo->binfo.pCtx;
L
Liu Jicong 已提交
1762
  int32_t*        rowCellInfoOffset = pAggInfo->binfo.rowCellInfoOffset;
1763

1764
  SResultRow* pResultRow = doSetResultOutBufByKey(pAggInfo->aggSup.pResultBuf, pResultRowInfo, (char*)&groupId,
L
Liu Jicong 已提交
1765
                                                  sizeof(groupId), true, groupId, pTaskInfo, false, &pAggInfo->aggSup);
L
Liu Jicong 已提交
1766
  assert(pResultRow != NULL);
1767 1768 1769 1770 1771 1772

  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pResultRow->pageId == -1) {
dengyihao's avatar
dengyihao 已提交
1773 1774
    int32_t ret =
        addNewWindowResultBuf(pResultRow, pAggInfo->aggSup.pResultBuf, groupId, pAggInfo->binfo.pRes->info.rowSize);
1775 1776 1777 1778 1779
    if (ret != TSDB_CODE_SUCCESS) {
      return;
    }
  }

1780
  setResultRowInitCtx(pResultRow, pCtx, numOfOutput, rowCellInfoOffset);
1781 1782
}

H
Haojun Liao 已提交
1783 1784
void setExecutionContext(int32_t numOfOutput, uint64_t groupId, SExecTaskInfo* pTaskInfo, SAggOperatorInfo* pAggInfo) {
  if (pAggInfo->groupId != INT32_MIN && pAggInfo->groupId == groupId) {
1785 1786 1787
    return;
  }

H
Haojun Liao 已提交
1788
  doSetTableGroupOutputBuf(pAggInfo, numOfOutput, groupId, pTaskInfo);
1789 1790

  // record the current active group id
H
Haojun Liao 已提交
1791
  pAggInfo->groupId = groupId;
1792 1793
}

1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806
static void doUpdateNumOfRows(SResultRow* pRow, int32_t numOfExprs, const int32_t* rowCellOffset) {
  for (int32_t j = 0; j < numOfExprs; ++j) {
    struct SResultRowEntryInfo* pResInfo = getResultCell(pRow, j, rowCellOffset);
    if (!isRowEntryInitialized(pResInfo)) {
      continue;
    }

    if (pRow->numOfRows < pResInfo->numOfRes) {
      pRow->numOfRows = pResInfo->numOfRes;
    }
  }
}

1807
int32_t finalizeResultRowIntoResultDataBlock(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition,
S
shenglian zhou 已提交
1808 1809 1810
                                             SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, int32_t numOfExprs,
                                             const int32_t* rowCellOffset, SSDataBlock* pBlock,
                                             SExecTaskInfo* pTaskInfo) {
1811 1812 1813 1814 1815 1816 1817 1818 1819
  SFilePage*  page = getBufPage(pBuf, resultRowPosition->pageId);
  SResultRow* pRow = (SResultRow*)((char*)page + resultRowPosition->offset);

  doUpdateNumOfRows(pRow, numOfExprs, rowCellOffset);
  if (pRow->numOfRows == 0) {
    releaseBufPage(pBuf, page);
    return 0;
  }

1820 1821 1822 1823 1824 1825 1826
  while (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
    int32_t code = blockDataEnsureCapacity(pBlock, pBlock->info.capacity * 1.25);
    if (TAOS_FAILED(code)) {
      releaseBufPage(pBuf, page);
      qError("%s ensure result data capacity failed, code %s", GET_TASKID(pTaskInfo), tstrerror(code));
      longjmp(pTaskInfo->env, code);
    }
1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852
  }

  for (int32_t j = 0; j < numOfExprs; ++j) {
    int32_t slotId = pExprInfo[j].base.resSchema.slotId;

    pCtx[j].resultInfo = getResultCell(pRow, j, rowCellOffset);
    if (pCtx[j].fpSet.finalize) {
      int32_t code = pCtx[j].fpSet.finalize(&pCtx[j], pBlock);
      if (TAOS_FAILED(code)) {
        qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code));
        longjmp(pTaskInfo->env, code);
      }
    } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) {
      // do nothing, todo refactor
    } else {
      // expand the result into multiple rows. E.g., _wstartts, top(k, 20)
      // the _wstartts needs to copy to 20 following rows, since the results of top-k expands to 20 different rows.
      SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId);
      char*            in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo);
      for (int32_t k = 0; k < pRow->numOfRows; ++k) {
        colDataAppend(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes);
      }
    }
  }

  releaseBufPage(pBuf, page);
1853
  pBlock->info.rows += pRow->numOfRows;
1854 1855 1856 1857

  return 0;
}

X
Xiaoyu Wang 已提交
1858 1859 1860
int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprInfo* pExprInfo, SDiskbasedBuf* pBuf,
                           SGroupResInfo* pGroupResInfo, const int32_t* rowCellOffset, SqlFunctionCtx* pCtx,
                           int32_t numOfExprs) {
1861
  int32_t numOfRows = getNumOfTotalRes(pGroupResInfo);
1862
  int32_t start = pGroupResInfo->index;
1863

1864
  for (int32_t i = start; i < numOfRows; i += 1) {
L
Liu Jicong 已提交
1865 1866
    SResKeyPos* pPos = taosArrayGetP(pGroupResInfo->pRows, i);
    SFilePage*  page = getBufPage(pBuf, pPos->pos.pageId);
1867

1868
    SResultRow* pRow = (SResultRow*)((char*)page + pPos->pos.offset);
1869 1870

    doUpdateNumOfRows(pRow, numOfExprs, rowCellOffset);
1871 1872
    if (pRow->numOfRows == 0) {
      pGroupResInfo->index += 1;
1873
      releaseBufPage(pBuf, page);
1874 1875 1876
      continue;
    }

1877 1878 1879 1880 1881
    if (pBlock->info.groupId == 0) {
      pBlock->info.groupId = pPos->groupId;
    } else {
      // current value belongs to different group, it can't be packed into one datablock
      if (pBlock->info.groupId != pPos->groupId) {
1882
        releaseBufPage(pBuf, page);
1883 1884 1885 1886
        break;
      }
    }

1887
    if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
1888
      releaseBufPage(pBuf, page);
1889 1890 1891 1892 1893
      break;
    }

    pGroupResInfo->index += 1;

1894
    for (int32_t j = 0; j < numOfExprs; ++j) {
1895 1896
      int32_t slotId = pExprInfo[j].base.resSchema.slotId;

1897
      pCtx[j].resultInfo = getResultCell(pRow, j, rowCellOffset);
1898
      if (pCtx[j].fpSet.finalize) {
1899
        int32_t code = pCtx[j].fpSet.finalize(&pCtx[j], pBlock);
1900
        if (TAOS_FAILED(code)) {
1901 1902
          qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code));
          longjmp(pTaskInfo->env, code);
1903
        }
1904 1905
      } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) {
        // do nothing, todo refactor
1906
      } else {
1907 1908
        // expand the result into multiple rows. E.g., _wstartts, top(k, 20)
        // the _wstartts needs to copy to 20 following rows, since the results of top-k expands to 20 different rows.
X
Xiaoyu Wang 已提交
1909 1910
        SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId);
        char*            in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo);
1911
        if (pCtx[j].increase) {
L
Liu Jicong 已提交
1912
          int64_t ts = *(int64_t*)in;
1913
          for (int32_t k = 0; k < pRow->numOfRows; ++k) {
L
Liu Jicong 已提交
1914
            colDataAppend(pColInfoData, pBlock->info.rows + k, (const char*)&ts, pCtx[j].resultInfo->isNullRes);
1915 1916 1917 1918 1919 1920
            ts++;
          }
        } else {
          for (int32_t k = 0; k < pRow->numOfRows; ++k) {
            colDataAppend(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes);
          }
X
Xiaoyu Wang 已提交
1921
        }
1922
      }
1923 1924
    }

1925
    releaseBufPage(pBuf, page);
1926 1927
    pBlock->info.rows += pRow->numOfRows;
    if (pBlock->info.rows >= pBlock->info.capacity) {  // output buffer is full
1928 1929 1930 1931
      break;
    }
  }

X
Xiaoyu Wang 已提交
1932 1933
  qDebug("%s result generated, rows:%d, groupId:%" PRIu64, GET_TASKID(pTaskInfo), pBlock->info.rows,
         pBlock->info.groupId);
1934
  blockDataUpdateTsWindow(pBlock, 0);
1935 1936 1937
  return 0;
}

X
Xiaoyu Wang 已提交
1938 1939 1940 1941
void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo,
                            SDiskbasedBuf* pBuf) {
  SExprInfo*     pExprInfo = pOperator->pExpr;
  int32_t        numOfExprs = pOperator->numOfExprs;
1942 1943
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;

X
Xiaoyu Wang 已提交
1944 1945 1946
  int32_t*        rowCellOffset = pbInfo->rowCellInfoOffset;
  SSDataBlock*    pBlock = pbInfo->pRes;
  SqlFunctionCtx* pCtx = pbInfo->pCtx;
1947

1948
  blockDataCleanup(pBlock);
1949
  if (!hashRemainDataInGroupInfo(pGroupResInfo)) {
1950 1951 1952
    return;
  }

1953 1954
  // clear the existed group id
  pBlock->info.groupId = 0;
1955
  doCopyToSDataBlock(pTaskInfo, pBlock, pExprInfo, pBuf, pGroupResInfo, rowCellOffset, pCtx, numOfExprs);
1956 1957
}

L
Liu Jicong 已提交
1958 1959
static void updateNumOfRowsInResultRows(SqlFunctionCtx* pCtx, int32_t numOfOutput, SResultRowInfo* pResultRowInfo,
                                        int32_t* rowCellInfoOffset) {
1960
  // update the number of result for each, only update the number of rows for the corresponding window result.
L
Liu Jicong 已提交
1961 1962 1963
  //  if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) {
  //    return;
  //  }
H
Haojun Liao 已提交
1964
#if 0
1965
  for (int32_t i = 0; i < pResultRowInfo->size; ++i) {
L
Liu Jicong 已提交
1966
    SResultRow* pResult = pResultRowInfo->pResult[i];
1967 1968 1969 1970 1971 1972 1973

    for (int32_t j = 0; j < numOfOutput; ++j) {
      int32_t functionId = pCtx[j].functionId;
      if (functionId == FUNCTION_TS || functionId == FUNCTION_TAG || functionId == FUNCTION_TAGPRJ) {
        continue;
      }

1974 1975
      SResultRowEntryInfo* pCell = getResultCell(pResult, j, rowCellInfoOffset);
      pResult->numOfRows = (uint16_t)(TMAX(pResult->numOfRows, pCell->numOfRes));
1976 1977
    }
  }
H
Haojun Liao 已提交
1978
#endif
1979 1980
}

L
Liu Jicong 已提交
1981
static int32_t compressQueryColData(SColumnInfoData* pColRes, int32_t numOfRows, char* data, int8_t compressed) {
1982 1983
  int32_t colSize = pColRes->info.bytes * numOfRows;
  return (*(tDataTypes[pColRes->info.type].compFunc))(pColRes->pData, colSize, numOfRows, data,
L
Liu Jicong 已提交
1984
                                                      colSize + COMP_OVERFLOW_BYTES, compressed, NULL, 0);
1985 1986
}

1987 1988 1989
int32_t doFillTimeIntervalGapsInResults(struct SFillInfo* pFillInfo, SSDataBlock* pBlock, int32_t capacity) {
  int32_t numOfRows = (int32_t)taosFillResultDataBlock(pFillInfo, pBlock, capacity - pBlock->info.rows);
  pBlock->info.rows += numOfRows;
1990

1991
  return pBlock->info.rows;
1992 1993
}

L
Liu Jicong 已提交
1994 1995
void queryCostStatis(SExecTaskInfo* pTaskInfo) {
  STaskCostInfo* pSummary = &pTaskInfo->cost;
1996

L
Liu Jicong 已提交
1997 1998 1999
  //  uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable);
  //  hashSize += taosHashGetMemSize(pRuntimeEnv->tableqinfoGroupInfo.map);
  //  pSummary->hashSize = hashSize;
2000 2001 2002 2003

  // add the merge time
  pSummary->elapsedTime += pSummary->firstStageMergeTime;

L
Liu Jicong 已提交
2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
  //  SResultRowPool* p = pTaskInfo->pool;
  //  if (p != NULL) {
  //    pSummary->winInfoSize = getResultRowPoolMemSize(p);
  //    pSummary->numOfTimeWindows = getNumOfAllocatedResultRows(p);
  //  } else {
  //    pSummary->winInfoSize = 0;
  //    pSummary->numOfTimeWindows = 0;
  //  }
  //
  //  calculateOperatorProfResults(pQInfo);

2015 2016
  SFileBlockLoadRecorder* pRecorder = pSummary->pRecoder;
  if (pSummary->pRecoder != NULL) {
X
Xiaoyu Wang 已提交
2017 2018
    qDebug("%s :cost summary: elapsed time:%" PRId64 " us, first merge:%" PRId64
           " us, total blocks:%d, "
2019 2020 2021 2022
           "load block statis:%d, load data block:%d, total rows:%" PRId64 ", check rows:%" PRId64,
           GET_TASKID(pTaskInfo), pSummary->elapsedTime, pSummary->firstStageMergeTime, pRecorder->totalBlocks,
           pRecorder->loadBlockStatis, pRecorder->loadBlocks, pRecorder->totalRows, pRecorder->totalCheckedRows);
  }
L
Liu Jicong 已提交
2023 2024 2025
  // qDebug("QInfo:0x%"PRIx64" :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb,
  // hashTable:%.2f Kb", pQInfo->qId, pSummary->winInfoSize/1024.0,
  //      pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0);
2026 2027
}

L
Liu Jicong 已提交
2028 2029 2030
// static void updateOffsetVal(STaskRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
//   STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr;
//   STableQueryInfo* pTableQueryInfo = pRuntimeEnv->current;
2031
//
L
Liu Jicong 已提交
2032
//   int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQueryAttr->order.order);
2033
//
L
Liu Jicong 已提交
2034 2035 2036 2037
//   if (pQueryAttr->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
//     pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQueryAttr) ? pBlockInfo->window.ekey + step :
//     pBlockInfo->window.skey + step; pQueryAttr->limit.offset = 0; return;
//   }
2038
//
L
Liu Jicong 已提交
2039 2040 2041 2042 2043
//   if (QUERY_IS_ASC_QUERY(pQueryAttr)) {
//     pQueryAttr->pos = (int32_t)pQueryAttr->limit.offset;
//   } else {
//     pQueryAttr->pos = pBlockInfo->rows - (int32_t)pQueryAttr->limit.offset - 1;
//   }
2044
//
L
Liu Jicong 已提交
2045
//   assert(pQueryAttr->pos >= 0 && pQueryAttr->pos <= pBlockInfo->rows - 1);
2046
//
L
Liu Jicong 已提交
2047 2048
//   SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pTsdbReadHandle, NULL);
//   SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
2049
//
L
Liu Jicong 已提交
2050 2051
//   // update the pQueryAttr->limit.offset value, and pQueryAttr->pos value
//   TSKEY *keys = (TSKEY *) pColInfoData->pData;
2052
//
L
Liu Jicong 已提交
2053 2054 2055
//   // update the offset value
//   pTableQueryInfo->lastKey = keys[pQueryAttr->pos];
//   pQueryAttr->limit.offset = 0;
2056
//
L
Liu Jicong 已提交
2057
//   int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
2058
//
L
Liu Jicong 已提交
2059 2060 2061 2062
//   //qDebug("QInfo:0x%"PRIx64" check data block, brange:%" PRId64 "-%" PRId64 ", numBlocksOfStep:%d, numOfRes:%d,
//   lastKey:%"PRId64, GET_TASKID(pRuntimeEnv),
//          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
// }
2063

L
Liu Jicong 已提交
2064 2065
// void skipBlocks(STaskRuntimeEnv *pRuntimeEnv) {
//   STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr;
2066
//
L
Liu Jicong 已提交
2067 2068 2069
//   if (pQueryAttr->limit.offset <= 0 || pQueryAttr->numOfFilterCols > 0) {
//     return;
//   }
2070
//
L
Liu Jicong 已提交
2071 2072
//   pQueryAttr->pos = 0;
//   int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQueryAttr->order.order);
2073
//
L
Liu Jicong 已提交
2074 2075
//   STableQueryInfo* pTableQueryInfo = pRuntimeEnv->current;
//   TsdbQueryHandleT pTsdbReadHandle = pRuntimeEnv->pTsdbReadHandle;
2076
//
L
Liu Jicong 已提交
2077 2078 2079 2080 2081
//   SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
//   while (tsdbNextDataBlock(pTsdbReadHandle)) {
//     if (isTaskKilled(pRuntimeEnv->qinfo)) {
//       longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
//     }
2082
//
L
Liu Jicong 已提交
2083
//     tsdbRetrieveDataBlockInfo(pTsdbReadHandle, &blockInfo);
2084
//
L
Liu Jicong 已提交
2085 2086 2087 2088
//     if (pQueryAttr->limit.offset > blockInfo.rows) {
//       pQueryAttr->limit.offset -= blockInfo.rows;
//       pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQueryAttr)) ? blockInfo.window.ekey : blockInfo.window.skey;
//       pTableQueryInfo->lastKey += step;
2089
//
L
Liu Jicong 已提交
2090 2091 2092 2093 2094 2095 2096
//       //qDebug("QInfo:0x%"PRIx64" skip rows:%d, offset:%" PRId64, GET_TASKID(pRuntimeEnv), blockInfo.rows,
//              pQuery->limit.offset);
//     } else {  // find the appropriated start position in current block
//       updateOffsetVal(pRuntimeEnv, &blockInfo);
//       break;
//     }
//   }
2097
//
L
Liu Jicong 已提交
2098 2099 2100 2101 2102 2103 2104 2105 2106
//   if (terrno != TSDB_CODE_SUCCESS) {
//     longjmp(pRuntimeEnv->env, terrno);
//   }
// }

// static TSKEY doSkipIntervalProcess(STaskRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo,
// STableQueryInfo* pTableQueryInfo) {
//   STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr;
//   SResultRowInfo *pWindowResInfo = &pRuntimeEnv->resultRowInfo;
2107
//
L
Liu Jicong 已提交
2108 2109 2110
//   assert(pQueryAttr->limit.offset == 0);
//   STimeWindow tw = *win;
//   getNextTimeWindow(pQueryAttr, &tw);
2111
//
L
Liu Jicong 已提交
2112 2113
//   if ((tw.skey <= pBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQueryAttr)) ||
//       (tw.ekey >= pBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQueryAttr))) {
2114
//
L
Liu Jicong 已提交
2115 2116 2117 2118
//     // load the data block and check data remaining in current data block
//     // TODO optimize performance
//     SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pTsdbReadHandle, NULL);
//     SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
2119
//
L
Liu Jicong 已提交
2120 2121 2122 2123
//     tw = *win;
//     int32_t startPos =
//         getNextQualifiedWindow(pQueryAttr, &tw, pBlockInfo, pColInfoData->pData, binarySearchForKey, -1);
//     assert(startPos >= 0);
2124
//
L
Liu Jicong 已提交
2125 2126
//     // set the abort info
//     pQueryAttr->pos = startPos;
2127
//
L
Liu Jicong 已提交
2128 2129 2130 2131
//     // reset the query start timestamp
//     pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
//     pQueryAttr->window.skey = pTableQueryInfo->win.skey;
//     TSKEY key = pTableQueryInfo->win.skey;
2132
//
L
Liu Jicong 已提交
2133 2134
//     pWindowResInfo->prevSKey = tw.skey;
//     int32_t index = pRuntimeEnv->resultRowInfo.curIndex;
2135
//
L
Liu Jicong 已提交
2136 2137
//     int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
//     pRuntimeEnv->resultRowInfo.curIndex = index;  // restore the window index
2138
//
L
Liu Jicong 已提交
2139 2140 2141 2142
//     //qDebug("QInfo:0x%"PRIx64" check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d,
//     lastKey:%" PRId64,
//            GET_TASKID(pRuntimeEnv), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes,
//            pQueryAttr->current->lastKey);
2143
//
L
Liu Jicong 已提交
2144 2145 2146 2147 2148
//     return key;
//   } else {  // do nothing
//     pQueryAttr->window.skey      = tw.skey;
//     pWindowResInfo->prevSKey = tw.skey;
//     pTableQueryInfo->lastKey = tw.skey;
2149
//
L
Liu Jicong 已提交
2150 2151
//     return tw.skey;
//   }
2152
//
L
Liu Jicong 已提交
2153 2154 2155 2156 2157 2158 2159 2160 2161 2162
//   return true;
// }

// static bool skipTimeInterval(STaskRuntimeEnv *pRuntimeEnv, TSKEY* start) {
//   STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr;
//   if (QUERY_IS_ASC_QUERY(pQueryAttr)) {
//     assert(*start <= pRuntimeEnv->current->lastKey);
//   } else {
//     assert(*start >= pRuntimeEnv->current->lastKey);
//   }
2163
//
L
Liu Jicong 已提交
2164 2165 2166 2167 2168
//   // if queried with value filter, do NOT forward query start position
//   if (pQueryAttr->limit.offset <= 0 || pQueryAttr->numOfFilterCols > 0 || pRuntimeEnv->pTsBuf != NULL ||
//   pRuntimeEnv->pFillInfo != NULL) {
//     return true;
//   }
2169
//
L
Liu Jicong 已提交
2170 2171 2172 2173 2174 2175 2176
//   /*
//    * 1. for interval without interpolation query we forward pQueryAttr->interval.interval at a time for
//    *    pQueryAttr->limit.offset times. Since hole exists, pQueryAttr->interval.interval*pQueryAttr->limit.offset
//    value is
//    *    not valid. otherwise, we only forward pQueryAttr->limit.offset number of points
//    */
//   assert(pRuntimeEnv->resultRowInfo.prevSKey == TSKEY_INITIAL_VAL);
2177
//
L
Liu Jicong 已提交
2178 2179
//   STimeWindow w = TSWINDOW_INITIALIZER;
//   bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr);
2180
//
L
Liu Jicong 已提交
2181 2182
//   SResultRowInfo *pWindowResInfo = &pRuntimeEnv->resultRowInfo;
//   STableQueryInfo *pTableQueryInfo = pRuntimeEnv->current;
2183
//
L
Liu Jicong 已提交
2184 2185 2186
//   SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
//   while (tsdbNextDataBlock(pRuntimeEnv->pTsdbReadHandle)) {
//     tsdbRetrieveDataBlockInfo(pRuntimeEnv->pTsdbReadHandle, &blockInfo);
2187
//
L
Liu Jicong 已提交
2188 2189 2190 2191 2192 2193 2194 2195 2196
//     if (QUERY_IS_ASC_QUERY(pQueryAttr)) {
//       if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
//         getAlignQueryTimeWindow(pQueryAttr, blockInfo.window.skey, blockInfo.window.skey, pQueryAttr->window.ekey,
//         &w); pWindowResInfo->prevSKey = w.skey;
//       }
//     } else {
//       getAlignQueryTimeWindow(pQueryAttr, blockInfo.window.ekey, pQueryAttr->window.ekey, blockInfo.window.ekey, &w);
//       pWindowResInfo->prevSKey = w.skey;
//     }
2197
//
L
Liu Jicong 已提交
2198 2199
//     // the first time window
//     STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQueryAttr);
2200
//
L
Liu Jicong 已提交
2201 2202
//     while (pQueryAttr->limit.offset > 0) {
//       STimeWindow tw = win;
2203
//
L
Liu Jicong 已提交
2204 2205 2206
//       if ((win.ekey <= blockInfo.window.ekey && ascQuery) || (win.ekey >= blockInfo.window.skey && !ascQuery)) {
//         pQueryAttr->limit.offset -= 1;
//         pWindowResInfo->prevSKey = win.skey;
2207
//
L
Liu Jicong 已提交
2208 2209 2210 2211 2212 2213
//         // current time window is aligned with blockInfo.window.ekey
//         // restart it from next data block by set prevSKey to be TSKEY_INITIAL_VAL;
//         if ((win.ekey == blockInfo.window.ekey && ascQuery) || (win.ekey == blockInfo.window.skey && !ascQuery)) {
//           pWindowResInfo->prevSKey = TSKEY_INITIAL_VAL;
//         }
//       }
2214
//
L
Liu Jicong 已提交
2215 2216 2217 2218
//       if (pQueryAttr->limit.offset == 0) {
//         *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
//         return true;
//       }
2219
//
L
Liu Jicong 已提交
2220 2221
//       // current window does not ended in current data block, try next data block
//       getNextTimeWindow(pQueryAttr, &tw);
2222
//
L
Liu Jicong 已提交
2223 2224 2225 2226 2227 2228 2229 2230 2231
//       /*
//        * If the next time window still starts from current data block,
//        * load the primary timestamp column first, and then find the start position for the next queried time window.
//        * Note that only the primary timestamp column is required.
//        * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually
//        required
//        * time window resides in current data block.
//        */
//       if ((tw.skey <= blockInfo.window.ekey && ascQuery) || (tw.ekey >= blockInfo.window.skey && !ascQuery)) {
2232
//
L
Liu Jicong 已提交
2233 2234
//         SArray *pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pTsdbReadHandle, NULL);
//         SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
2235
//
L
Liu Jicong 已提交
2236 2237 2238
//         if ((win.ekey > blockInfo.window.ekey && ascQuery) || (win.ekey < blockInfo.window.skey && !ascQuery)) {
//           pQueryAttr->limit.offset -= 1;
//         }
2239
//
L
Liu Jicong 已提交
2240 2241 2242 2243 2244 2245 2246 2247
//         if (pQueryAttr->limit.offset == 0) {
//           *start = doSkipIntervalProcess(pRuntimeEnv, &win, &blockInfo, pTableQueryInfo);
//           return true;
//         } else {
//           tw = win;
//           int32_t startPos =
//               getNextQualifiedWindow(pQueryAttr, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
//           assert(startPos >= 0);
2248
//
L
Liu Jicong 已提交
2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259
//           // set the abort info
//           pQueryAttr->pos = startPos;
//           pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
//           pWindowResInfo->prevSKey = tw.skey;
//           win = tw;
//         }
//       } else {
//         break;  // offset is not 0, and next time window begins or ends in the next block.
//       }
//     }
//   }
2260
//
L
Liu Jicong 已提交
2261 2262 2263 2264
//   // check for error
//   if (terrno != TSDB_CODE_SUCCESS) {
//     longjmp(pRuntimeEnv->env, terrno);
//   }
2265
//
L
Liu Jicong 已提交
2266 2267
//   return true;
// }
2268

2269
int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num) {
H
Haojun Liao 已提交
2270
  if (p->pDownstream == NULL) {
H
Haojun Liao 已提交
2271
    assert(p->numOfDownstream == 0);
2272 2273
  }

wafwerar's avatar
wafwerar 已提交
2274
  p->pDownstream = taosMemoryCalloc(1, num * POINTER_BYTES);
2275 2276 2277 2278 2279 2280 2281
  if (p->pDownstream == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

  memcpy(p->pDownstream, pDownstream, num * POINTER_BYTES);
  p->numOfDownstream = num;
  return TSDB_CODE_SUCCESS;
2282 2283
}

wmmhello's avatar
wmmhello 已提交
2284
static void doDestroyTableList(STableListInfo* pTableqinfoList);
2285

2286
static void doTableQueryInfoTimeWindowCheck(SExecTaskInfo* pTaskInfo, STableQueryInfo* pTableQueryInfo, int32_t order) {
H
Haojun Liao 已提交
2287 2288
#if 0
    if (order == TSDB_ORDER_ASC) {
2289 2290
    assert(
        (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) &&
H
Haojun Liao 已提交
2291 2292
        (pTableQueryInfo->lastKey >= pTaskInfo->window.skey) &&
        (pTableQueryInfo->win.skey >= pTaskInfo->window.skey && pTableQueryInfo->win.ekey <= pTaskInfo->window.ekey));
2293 2294 2295
  } else {
    assert(
        (pTableQueryInfo->win.skey >= pTableQueryInfo->win.ekey) &&
H
Haojun Liao 已提交
2296 2297
        (pTableQueryInfo->lastKey <= pTaskInfo->window.skey) &&
        (pTableQueryInfo->win.skey <= pTaskInfo->window.skey && pTableQueryInfo->win.ekey >= pTaskInfo->window.ekey));
2298
  }
H
Haojun Liao 已提交
2299
#endif
2300 2301
}

2302 2303 2304 2305
typedef struct SFetchRspHandleWrapper {
  uint32_t exchangeId;
  int32_t  sourceIndex;
} SFetchRspHandleWrapper;
2306

2307
int32_t loadRemoteDataCallback(void* param, const SDataBuf* pMsg, int32_t code) {
X
Xiaoyu Wang 已提交
2308
  SFetchRspHandleWrapper* pWrapper = (SFetchRspHandleWrapper*)param;
2309 2310 2311 2312 2313 2314 2315

  SExchangeInfo* pExchangeInfo = taosAcquireRef(exchangeObjRefPool, pWrapper->exchangeId);
  if (pExchangeInfo == NULL) {
    qWarn("failed to acquire exchange operator, since it may have been released");
    return TSDB_CODE_SUCCESS;
  }

X
Xiaoyu Wang 已提交
2316
  int32_t          index = pWrapper->sourceIndex;
2317
  SSourceDataInfo* pSourceDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, index);
2318

H
Haojun Liao 已提交
2319 2320
  if (code == TSDB_CODE_SUCCESS) {
    pSourceDataInfo->pRsp = pMsg->pData;
2321

H
Haojun Liao 已提交
2322 2323
    SRetrieveTableRsp* pRsp = pSourceDataInfo->pRsp;
    pRsp->numOfRows = htonl(pRsp->numOfRows);
dengyihao's avatar
dengyihao 已提交
2324
    pRsp->compLen = htonl(pRsp->compLen);
2325
    pRsp->numOfCols = htonl(pRsp->numOfCols);
dengyihao's avatar
dengyihao 已提交
2326
    pRsp->useconds = htobe64(pRsp->useconds);
2327

2328
    ASSERT(pRsp != NULL);
2329
    qDebug("%s fetch rsp received, index:%d, rows:%d", pSourceDataInfo->taskId, index, pRsp->numOfRows);
H
Haojun Liao 已提交
2330 2331 2332
  } else {
    pSourceDataInfo->code = code;
  }
H
Haojun Liao 已提交
2333

H
Haojun Liao 已提交
2334
  pSourceDataInfo->status = EX_SOURCE_DATA_READY;
2335 2336 2337 2338 2339

  tsem_post(&pExchangeInfo->ready);
  taosReleaseRef(exchangeObjRefPool, pWrapper->exchangeId);

  taosMemoryFree(pWrapper);
wmmhello's avatar
wmmhello 已提交
2340
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
2341 2342 2343 2344
}

static void destroySendMsgInfo(SMsgSendInfo* pMsgBody) {
  assert(pMsgBody != NULL);
wafwerar's avatar
wafwerar 已提交
2345 2346
  taosMemoryFreeClear(pMsgBody->msgInfo.pData);
  taosMemoryFreeClear(pMsgBody);
H
Haojun Liao 已提交
2347 2348
}

S
Shengliang Guan 已提交
2349
void qProcessFetchRsp(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) {
S
Shengliang Guan 已提交
2350 2351
  SMsgSendInfo* pSendInfo = (SMsgSendInfo*)pMsg->info.ahandle;
  assert(pMsg->info.ahandle != NULL);
H
Haojun Liao 已提交
2352 2353 2354 2355

  SDataBuf buf = {.len = pMsg->contLen, .pData = NULL};

  if (pMsg->contLen > 0) {
wafwerar's avatar
wafwerar 已提交
2356
    buf.pData = taosMemoryCalloc(1, pMsg->contLen);
H
Haojun Liao 已提交
2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367
    if (buf.pData == NULL) {
      terrno = TSDB_CODE_OUT_OF_MEMORY;
      pMsg->code = TSDB_CODE_OUT_OF_MEMORY;
    } else {
      memcpy(buf.pData, pMsg->pCont, pMsg->contLen);
    }
  }

  pSendInfo->fp(pSendInfo->param, &buf, pMsg->code);
  rpcFreeCont(pMsg->pCont);
  destroySendMsgInfo(pSendInfo);
2368 2369
}

L
Liu Jicong 已提交
2370
static int32_t doSendFetchDataRequest(SExchangeInfo* pExchangeInfo, SExecTaskInfo* pTaskInfo, int32_t sourceIndex) {
2371
  size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources);
2372

wafwerar's avatar
wafwerar 已提交
2373
  SResFetchReq* pMsg = taosMemoryCalloc(1, sizeof(SResFetchReq));
2374 2375 2376 2377
  if (NULL == pMsg) {
    pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return pTaskInfo->code;
  }
2378

L
Liu Jicong 已提交
2379 2380
  SDownstreamSourceNode* pSource = taosArrayGet(pExchangeInfo->pSources, sourceIndex);
  SSourceDataInfo*       pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, sourceIndex);
2381

2382 2383
  ASSERT(pDataInfo->status == EX_SOURCE_DATA_NOT_READY);

L
Liu Jicong 已提交
2384 2385
  qDebug("%s build fetch msg and send to vgId:%d, ep:%s, taskId:0x%" PRIx64 ", %d/%" PRIzu, GET_TASKID(pTaskInfo),
         pSource->addr.nodeId, pSource->addr.epSet.eps[0].fqdn, pSource->taskId, sourceIndex, totalSources);
2386 2387 2388 2389 2390 2391 2392

  pMsg->header.vgId = htonl(pSource->addr.nodeId);
  pMsg->sId = htobe64(pSource->schedId);
  pMsg->taskId = htobe64(pSource->taskId);
  pMsg->queryId = htobe64(pTaskInfo->id.queryId);

  // send the fetch remote task result reques
wafwerar's avatar
wafwerar 已提交
2393
  SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo));
2394
  if (NULL == pMsgSendInfo) {
wafwerar's avatar
wafwerar 已提交
2395
    taosMemoryFreeClear(pMsg);
2396 2397 2398
    qError("%s prepare message %d failed", GET_TASKID(pTaskInfo), (int32_t)sizeof(SMsgSendInfo));
    pTaskInfo->code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    return pTaskInfo->code;
H
Haojun Liao 已提交
2399 2400
  }

2401
  SFetchRspHandleWrapper* pWrapper = taosMemoryCalloc(1, sizeof(SFetchRspHandleWrapper));
X
Xiaoyu Wang 已提交
2402
  pWrapper->exchangeId = pExchangeInfo->self;
2403 2404 2405
  pWrapper->sourceIndex = sourceIndex;

  pMsgSendInfo->param = pWrapper;
2406 2407 2408 2409
  pMsgSendInfo->msgInfo.pData = pMsg;
  pMsgSendInfo->msgInfo.len = sizeof(SResFetchReq);
  pMsgSendInfo->msgType = TDMT_VND_FETCH;
  pMsgSendInfo->fp = loadRemoteDataCallback;
2410

2411
  int64_t transporterId = 0;
L
Liu Jicong 已提交
2412
  int32_t code = asyncSendMsgToServer(pExchangeInfo->pTransporter, &pSource->addr.epSet, &transporterId, pMsgSendInfo);
2413 2414 2415
  return TSDB_CODE_SUCCESS;
}

2416
// NOTE: sources columns are more than the destination SSDatablock columns.
2417
void relocateColumnData(SSDataBlock* pBlock, const SArray* pColMatchInfo, SArray* pCols) {
2418 2419 2420
  size_t numOfSrcCols = taosArrayGetSize(pCols);

  int32_t i = 0, j = 0;
L
Liu Jicong 已提交
2421
  while (i < numOfSrcCols && j < taosArrayGetSize(pColMatchInfo)) {
2422 2423 2424 2425 2426 2427 2428 2429
    SColumnInfoData* p = taosArrayGet(pCols, i);
    SColMatchInfo*   pmInfo = taosArrayGet(pColMatchInfo, j);
    if (!pmInfo->output) {
      j++;
      continue;
    }

    if (p->info.colId == pmInfo->colId) {
H
Haojun Liao 已提交
2430 2431
      SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, pmInfo->targetSlotId);
      colDataAssign(pDst, p, pBlock->info.rows);
2432 2433 2434 2435 2436 2437 2438 2439 2440 2441
      i++;
      j++;
    } else if (p->info.colId < pmInfo->colId) {
      i++;
    } else {
      ASSERT(0);
    }
  }
}

2442
int32_t setDataBlockFromFetchRsp(SSDataBlock* pRes, SLoadRemoteDataInfo* pLoadInfo, int32_t numOfRows, char* pData,
X
Xiaoyu Wang 已提交
2443 2444
                                 int32_t compLen, int32_t numOfOutput, int64_t startTs, uint64_t* total,
                                 SArray* pColList) {
H
Haojun Liao 已提交
2445
  if (pColList == NULL) {  // data from other sources
2446
    blockCompressDecode(pRes, numOfOutput, numOfRows, pData);
2447
    pRes->info.rows = numOfRows;
H
Haojun Liao 已提交
2448
  } else {  // extract data according to pColList
2449
    ASSERT(numOfOutput == taosArrayGetSize(pColList));
2450 2451 2452 2453 2454
    char* pStart = pData;

    int32_t numOfCols = htonl(*(int32_t*)pStart);
    pStart += sizeof(int32_t);

2455
    // todo refactor:extract method
2456
    SSysTableSchema* pSchema = (SSysTableSchema*)pStart;
dengyihao's avatar
dengyihao 已提交
2457
    for (int32_t i = 0; i < numOfCols; ++i) {
2458 2459 2460 2461 2462 2463 2464
      SSysTableSchema* p = (SSysTableSchema*)pStart;

      p->colId = htons(p->colId);
      p->bytes = htonl(p->bytes);
      pStart += sizeof(SSysTableSchema);
    }

H
Haojun Liao 已提交
2465 2466 2467 2468
    SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock));
    pBlock->pDataBlock = taosArrayInit(numOfCols, sizeof(SColumnInfoData));
    pBlock->info.numOfCols = numOfCols;

dengyihao's avatar
dengyihao 已提交
2469
    for (int32_t i = 0; i < numOfCols; ++i) {
2470
      SColumnInfoData idata = {0};
2471

X
Xiaoyu Wang 已提交
2472
      idata.info.type = pSchema[i].type;
2473 2474
      idata.info.bytes = pSchema[i].bytes;
      idata.info.colId = pSchema[i].colId;
X
Xiaoyu Wang 已提交
2475
      idata.hasNull = true;
2476

H
Haojun Liao 已提交
2477
      taosArrayPush(pBlock->pDataBlock, &idata);
2478
      if (IS_VAR_DATA_TYPE(idata.info.type)) {
H
Haojun Liao 已提交
2479
        pBlock->info.hasVarCol = true;
2480 2481 2482
      }
    }

2483
    blockCompressDecode(pBlock, numOfCols, numOfRows, pStart);
2484

H
Haojun Liao 已提交
2485
    // data from mnode
2486
    pRes->info.rows = numOfRows;
H
Haojun Liao 已提交
2487 2488 2489
    relocateColumnData(pRes, pColList, pBlock->pDataBlock);
    taosArrayDestroy(pBlock->pDataBlock);
    taosMemoryFree(pBlock);
X
Xiaoyu Wang 已提交
2490
    //    blockDataDestroy(pBlock);
2491
  }
2492

2493 2494
  // todo move this to time window aggregator, since the primary timestamp may not be known by exchange operator.
  blockDataUpdateTsWindow(pRes, 0);
2495

2496
  int64_t el = taosGetTimestampUs() - startTs;
2497

H
Haojun Liao 已提交
2498 2499
  pLoadInfo->totalRows += numOfRows;
  pLoadInfo->totalSize += compLen;
2500

H
Haojun Liao 已提交
2501 2502 2503
  if (total != NULL) {
    *total += numOfRows;
  }
2504

H
Haojun Liao 已提交
2505
  pLoadInfo->totalElapsed += el;
2506 2507
  return TSDB_CODE_SUCCESS;
}
2508

L
Liu Jicong 已提交
2509 2510
static void* setAllSourcesCompleted(SOperatorInfo* pOperator, int64_t startTs) {
  SExchangeInfo* pExchangeInfo = pOperator->info;
2511
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
H
Haojun Liao 已提交
2512

2513
  int64_t              el = taosGetTimestampUs() - startTs;
H
Haojun Liao 已提交
2514
  SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo;
2515

H
Haojun Liao 已提交
2516
  pLoadInfo->totalElapsed += el;
H
Haojun Liao 已提交
2517

2518
  size_t totalSources = taosArrayGetSize(pExchangeInfo->pSources);
L
Liu Jicong 已提交
2519 2520 2521
  qDebug("%s all %" PRIzu " sources are exhausted, total rows: %" PRIu64 " bytes:%" PRIu64 ", elapsed:%.2f ms",
         GET_TASKID(pTaskInfo), totalSources, pLoadInfo->totalRows, pLoadInfo->totalSize,
         pLoadInfo->totalElapsed / 1000.0);
2522 2523 2524 2525 2526

  doSetOperatorCompleted(pOperator);
  return NULL;
}

L
Liu Jicong 已提交
2527 2528
static SSDataBlock* concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeInfo* pExchangeInfo,
                                                   SExecTaskInfo* pTaskInfo) {
2529 2530 2531 2532 2533 2534 2535 2536
  int32_t code = 0;
  int64_t startTs = taosGetTimestampUs();
  size_t  totalSources = taosArrayGetSize(pExchangeInfo->pSources);

  while (1) {
    int32_t completed = 0;
    for (int32_t i = 0; i < totalSources; ++i) {
      SSourceDataInfo* pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, i);
2537
      if (pDataInfo->status == EX_SOURCE_DATA_EXHAUSTED) {
2538
        completed += 1;
H
Haojun Liao 已提交
2539 2540
        continue;
      }
2541

2542
      if (pDataInfo->status != EX_SOURCE_DATA_READY) {
2543 2544 2545
        continue;
      }

2546 2547 2548 2549 2550
      if (pDataInfo->code != TSDB_CODE_SUCCESS) {
        code = pDataInfo->code;
        goto _error;
      }

L
Liu Jicong 已提交
2551
      SRetrieveTableRsp*     pRsp = pDataInfo->pRsp;
X
Xiaoyu Wang 已提交
2552
      SDownstreamSourceNode* pSource = taosArrayGet(pExchangeInfo->pSources, i);
2553

L
Liu Jicong 已提交
2554
      SSDataBlock*         pRes = pExchangeInfo->pResult;
H
Haojun Liao 已提交
2555
      SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo;
2556
      if (pRsp->numOfRows == 0) {
2557
        qDebug("%s vgId:%d, taskId:0x%" PRIx64 " index:%d completed, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64
X
Xiaoyu Wang 已提交
2558
               ", completed:%d try next %d/%" PRIzu,
2559 2560
               GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, i, pDataInfo->totalRows,
               pExchangeInfo->loadInfo.totalRows, completed + 1, i + 1, totalSources);
2561
        pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
2562
        completed += 1;
D
dapan1121 已提交
2563
        taosMemoryFreeClear(pDataInfo->pRsp);
2564 2565
        continue;
      }
H
Haojun Liao 已提交
2566

H
Haojun Liao 已提交
2567
      SRetrieveTableRsp* pTableRsp = pDataInfo->pRsp;
2568
      code = setDataBlockFromFetchRsp(pExchangeInfo->pResult, pLoadInfo, pTableRsp->numOfRows, pTableRsp->data,
X
Xiaoyu Wang 已提交
2569
                                      pTableRsp->compLen, pTableRsp->numOfCols, startTs, &pDataInfo->totalRows, NULL);
2570
      if (code != 0) {
2571
        taosMemoryFreeClear(pDataInfo->pRsp);
2572 2573 2574
        goto _error;
      }

2575
      if (pRsp->completed == 1) {
X
Xiaoyu Wang 已提交
2576 2577 2578
        qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64
               " index:%d completed, numOfRows:%d, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64
               ", completed:%d try next %d/%" PRIzu,
2579 2580 2581
               GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, i, pRes->info.rows, pDataInfo->totalRows,
               pLoadInfo->totalRows, pLoadInfo->totalSize, completed + 1, i + 1, totalSources);
        completed += 1;
2582
        pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
2583
      } else {
dengyihao's avatar
dengyihao 已提交
2584 2585
        qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, totalRows:%" PRIu64
               ", totalBytes:%" PRIu64,
H
Haojun Liao 已提交
2586 2587
               GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, pLoadInfo->totalRows,
               pLoadInfo->totalSize);
2588 2589
      }

2590 2591
      taosMemoryFreeClear(pDataInfo->pRsp);

2592 2593
      if (pDataInfo->status != EX_SOURCE_DATA_EXHAUSTED) {
        pDataInfo->status = EX_SOURCE_DATA_NOT_READY;
2594 2595
        code = doSendFetchDataRequest(pExchangeInfo, pTaskInfo, i);
        if (code != TSDB_CODE_SUCCESS) {
2596
          taosMemoryFreeClear(pDataInfo->pRsp);
2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613
          goto _error;
        }
      }

      return pExchangeInfo->pResult;
    }

    if (completed == totalSources) {
      return setAllSourcesCompleted(pOperator, startTs);
    }
  }

_error:
  pTaskInfo->code = code;
  return NULL;
}

L
Liu Jicong 已提交
2614 2615 2616
static int32_t prepareConcurrentlyLoad(SOperatorInfo* pOperator) {
  SExchangeInfo* pExchangeInfo = pOperator->info;
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
2617

L
Liu Jicong 已提交
2618
  size_t  totalSources = taosArrayGetSize(pExchangeInfo->pSources);
2619 2620 2621
  int64_t startTs = taosGetTimestampUs();

  // Asynchronously send all fetch requests to all sources.
L
Liu Jicong 已提交
2622
  for (int32_t i = 0; i < totalSources; ++i) {
2623 2624
    int32_t code = doSendFetchDataRequest(pExchangeInfo, pTaskInfo, i);
    if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2625 2626
      pTaskInfo->code = code;
      return code;
2627 2628 2629 2630
    }
  }

  int64_t endTs = taosGetTimestampUs();
2631
  qDebug("%s send all fetch requests to %" PRIzu " sources completed, elapsed:%.2fms", GET_TASKID(pTaskInfo),
X
Xiaoyu Wang 已提交
2632
         totalSources, (endTs - startTs) / 1000.0);
2633

2634
  pOperator->status = OP_RES_TO_RETURN;
H
Haojun Liao 已提交
2635
  pOperator->cost.openCost = taosGetTimestampUs() - startTs;
2636

2637
  tsem_wait(&pExchangeInfo->ready);
H
Haojun Liao 已提交
2638
  return TSDB_CODE_SUCCESS;
2639 2640
}

L
Liu Jicong 已提交
2641 2642 2643
static SSDataBlock* seqLoadRemoteData(SOperatorInfo* pOperator) {
  SExchangeInfo* pExchangeInfo = pOperator->info;
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
2644

L
Liu Jicong 已提交
2645
  size_t  totalSources = taosArrayGetSize(pExchangeInfo->pSources);
2646
  int64_t startTs = taosGetTimestampUs();
2647

L
Liu Jicong 已提交
2648
  while (1) {
2649 2650
    if (pExchangeInfo->current >= totalSources) {
      return setAllSourcesCompleted(pOperator, startTs);
2651
    }
2652

2653 2654 2655
    doSendFetchDataRequest(pExchangeInfo, pTaskInfo, pExchangeInfo->current);
    tsem_wait(&pExchangeInfo->ready);

dengyihao's avatar
dengyihao 已提交
2656
    SSourceDataInfo*       pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, pExchangeInfo->current);
X
Xiaoyu Wang 已提交
2657
    SDownstreamSourceNode* pSource = taosArrayGet(pExchangeInfo->pSources, pExchangeInfo->current);
2658

H
Haojun Liao 已提交
2659
    if (pDataInfo->code != TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
2660 2661
      qError("%s vgId:%d, taskID:0x%" PRIx64 " error happens, code:%s", GET_TASKID(pTaskInfo), pSource->addr.nodeId,
             pSource->taskId, tstrerror(pDataInfo->code));
H
Haojun Liao 已提交
2662 2663 2664 2665
      pOperator->pTaskInfo->code = pDataInfo->code;
      return NULL;
    }

L
Liu Jicong 已提交
2666
    SRetrieveTableRsp*   pRsp = pDataInfo->pRsp;
H
Haojun Liao 已提交
2667
    SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo;
2668
    if (pRsp->numOfRows == 0) {
dengyihao's avatar
dengyihao 已提交
2669 2670
      qDebug("%s vgId:%d, taskID:0x%" PRIx64 " %d of total completed, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64
             " try next",
2671
             GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pExchangeInfo->current + 1,
H
Haojun Liao 已提交
2672
             pDataInfo->totalRows, pLoadInfo->totalRows);
H
Haojun Liao 已提交
2673

2674
      pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
2675
      pExchangeInfo->current += 1;
D
dapan1121 已提交
2676
      taosMemoryFreeClear(pDataInfo->pRsp);
2677 2678
      continue;
    }
H
Haojun Liao 已提交
2679

L
Liu Jicong 已提交
2680
    SSDataBlock*       pRes = pExchangeInfo->pResult;
H
Haojun Liao 已提交
2681
    SRetrieveTableRsp* pTableRsp = pDataInfo->pRsp;
L
Liu Jicong 已提交
2682
    int32_t            code =
2683
        setDataBlockFromFetchRsp(pExchangeInfo->pResult, pLoadInfo, pTableRsp->numOfRows, pTableRsp->data,
X
Xiaoyu Wang 已提交
2684
                                 pTableRsp->compLen, pTableRsp->numOfCols, startTs, &pDataInfo->totalRows, NULL);
2685 2686

    if (pRsp->completed == 1) {
H
Haojun Liao 已提交
2687
      qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, rowsOfSource:%" PRIu64
L
Liu Jicong 已提交
2688 2689 2690
             ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 " try next %d/%" PRIzu,
             GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, pDataInfo->totalRows,
             pLoadInfo->totalRows, pLoadInfo->totalSize, pExchangeInfo->current + 1, totalSources);
2691

2692
      pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
2693 2694
      pExchangeInfo->current += 1;
    } else {
L
Liu Jicong 已提交
2695 2696 2697 2698
      qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, totalRows:%" PRIu64
             ", totalBytes:%" PRIu64,
             GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, pLoadInfo->totalRows,
             pLoadInfo->totalSize);
2699 2700
    }

2701
    pOperator->resultInfo.totalRows += pRes->info.rows;
2702
    taosMemoryFreeClear(pDataInfo->pRsp);
2703 2704
    return pExchangeInfo->pResult;
  }
2705 2706
}

L
Liu Jicong 已提交
2707
static int32_t prepareLoadRemoteData(SOperatorInfo* pOperator) {
2708
  if (OPTR_IS_OPENED(pOperator)) {
H
Haojun Liao 已提交
2709 2710 2711
    return TSDB_CODE_SUCCESS;
  }

2712 2713
  int64_t st = taosGetTimestampUs();

L
Liu Jicong 已提交
2714
  SExchangeInfo* pExchangeInfo = pOperator->info;
2715
  if (!pExchangeInfo->seqLoadData) {
H
Haojun Liao 已提交
2716 2717 2718 2719 2720 2721
    int32_t code = prepareConcurrentlyLoad(pOperator);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
  }

2722
  OPTR_SET_OPENED(pOperator);
2723
  pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0;
H
Haojun Liao 已提交
2724 2725 2726
  return TSDB_CODE_SUCCESS;
}

2727
static SSDataBlock* doLoadRemoteData(SOperatorInfo* pOperator) {
L
Liu Jicong 已提交
2728 2729
  SExchangeInfo* pExchangeInfo = pOperator->info;
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
2730

2731
  pTaskInfo->code = pOperator->fpSet._openFn(pOperator);
2732
  if (pTaskInfo->code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
2733 2734
    return NULL;
  }
2735

L
Liu Jicong 已提交
2736
  size_t               totalSources = taosArrayGetSize(pExchangeInfo->pSources);
H
Haojun Liao 已提交
2737
  SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo;
H
Haojun Liao 已提交
2738

2739
  if (pOperator->status == OP_EXEC_DONE) {
L
Liu Jicong 已提交
2740 2741 2742
    qDebug("%s all %" PRIzu " source(s) are exhausted, total rows:%" PRIu64 " bytes:%" PRIu64 ", elapsed:%.2f ms",
           GET_TASKID(pTaskInfo), totalSources, pLoadInfo->totalRows, pLoadInfo->totalSize,
           pLoadInfo->totalElapsed / 1000.0);
2743 2744 2745 2746 2747 2748
    return NULL;
  }

  if (pExchangeInfo->seqLoadData) {
    return seqLoadRemoteData(pOperator);
  } else {
2749
    return concurrentlyLoadRemoteDataImpl(pOperator, pExchangeInfo, pTaskInfo);
2750
  }
H
Haojun Liao 已提交
2751
}
2752

2753
static int32_t initDataSource(int32_t numOfSources, SExchangeInfo* pInfo, const char* id) {
2754
  pInfo->pSourceDataInfo = taosArrayInit(numOfSources, sizeof(SSourceDataInfo));
H
Haojun Liao 已提交
2755 2756
  if (pInfo->pSourceDataInfo == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
2757 2758
  }

L
Liu Jicong 已提交
2759
  for (int32_t i = 0; i < numOfSources; ++i) {
2760
    SSourceDataInfo dataInfo = {0};
H
Haojun Liao 已提交
2761
    dataInfo.status = EX_SOURCE_DATA_NOT_READY;
2762
    dataInfo.taskId = id;
L
Liu Jicong 已提交
2763
    dataInfo.index = i;
X
Xiaoyu Wang 已提交
2764
    SSourceDataInfo* pDs = taosArrayPush(pInfo->pSourceDataInfo, &dataInfo);
2765
    if (pDs == NULL) {
H
Haojun Liao 已提交
2766 2767 2768 2769 2770 2771 2772 2773
      taosArrayDestroy(pInfo->pSourceDataInfo);
      return TSDB_CODE_OUT_OF_MEMORY;
    }
  }

  return TSDB_CODE_SUCCESS;
}

2774
static int32_t initExchangeOperator(SExchangePhysiNode* pExNode, SExchangeInfo* pInfo, const char* id) {
2775
  size_t numOfSources = LIST_LENGTH(pExNode->pSrcEndPoints);
H
Haojun Liao 已提交
2776

2777
  if (numOfSources == 0) {
X
Xiaoyu Wang 已提交
2778
    qError("%s invalid number: %d of sources in exchange operator", id, (int32_t)numOfSources);
2779 2780 2781
    return TSDB_CODE_INVALID_PARA;
  }

H
Haojun Liao 已提交
2782
  pInfo->pSources = taosArrayInit(numOfSources, sizeof(SDownstreamSourceNode));
H
Haojun Liao 已提交
2783 2784
  pInfo->pSourceDataInfo = taosArrayInit(numOfSources, sizeof(SSourceDataInfo));
  if (pInfo->pSourceDataInfo == NULL || pInfo->pSources == NULL) {
2785
    return TSDB_CODE_OUT_OF_MEMORY;
H
Haojun Liao 已提交
2786 2787
  }

L
Liu Jicong 已提交
2788
  for (int32_t i = 0; i < numOfSources; ++i) {
2789
    SNodeListNode* pNode = (SNodeListNode*)nodesListGetNode((SNodeList*)pExNode->pSrcEndPoints, i);
H
Haojun Liao 已提交
2790 2791
    taosArrayPush(pInfo->pSources, pNode);
  }
2792

2793 2794
  pInfo->self = taosAddRef(exchangeObjRefPool, pInfo);

2795
  return initDataSource(numOfSources, pInfo, id);
2796 2797 2798 2799 2800 2801
}

SOperatorInfo* createExchangeOperatorInfo(void* pTransporter, SExchangePhysiNode* pExNode, SExecTaskInfo* pTaskInfo) {
  SExchangeInfo* pInfo = taosMemoryCalloc(1, sizeof(SExchangeInfo));
  SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
  if (pInfo == NULL || pOperator == NULL) {
H
Haojun Liao 已提交
2802
    goto _error;
2803
  }
H
Haojun Liao 已提交
2804

2805
  int32_t code = initExchangeOperator(pExNode, pInfo, GET_TASKID(pTaskInfo));
2806 2807 2808
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
2809 2810

  tsem_init(&pInfo->ready, 0, 0);
2811

X
Xiaoyu Wang 已提交
2812
  pInfo->seqLoadData = false;
2813
  pInfo->pTransporter = pTransporter;
X
Xiaoyu Wang 已提交
2814 2815
  pInfo->pResult = createResDataBlock(pExNode->node.pOutputDataBlockDesc);
  pOperator->name = "ExchangeOperator";
X
Xiaoyu Wang 已提交
2816
  pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_EXCHANGE;
X
Xiaoyu Wang 已提交
2817 2818 2819 2820 2821
  pOperator->blocking = false;
  pOperator->status = OP_NOT_OPENED;
  pOperator->info = pInfo;
  pOperator->numOfExprs = pInfo->pResult->info.numOfCols;
  pOperator->pTaskInfo = pTaskInfo;
2822

L
Liu Jicong 已提交
2823 2824
  pOperator->fpSet = createOperatorFpSet(prepareLoadRemoteData, doLoadRemoteData, NULL, NULL,
                                         destroyExchangeOperatorInfo, NULL, NULL, NULL);
2825
  return pOperator;
H
Haojun Liao 已提交
2826

L
Liu Jicong 已提交
2827
_error:
H
Haojun Liao 已提交
2828
  if (pInfo != NULL) {
2829
    doDestroyExchangeOperatorInfo(pInfo);
H
Haojun Liao 已提交
2830 2831
  }

wafwerar's avatar
wafwerar 已提交
2832 2833
  taosMemoryFreeClear(pInfo);
  taosMemoryFreeClear(pOperator);
2834
  pTaskInfo->code = code;
H
Haojun Liao 已提交
2835
  return NULL;
2836 2837
}

dengyihao's avatar
dengyihao 已提交
2838 2839
static int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, size_t keyBufSize,
                                const char* pKey);
2840

2841
static void destroySortedMergeOperatorInfo(void* param, int32_t numOfOutput) {
L
Liu Jicong 已提交
2842
  SSortedMergeOperatorInfo* pInfo = (SSortedMergeOperatorInfo*)param;
H
Haojun Liao 已提交
2843
  taosArrayDestroy(pInfo->pSortInfo);
2844 2845 2846
  taosArrayDestroy(pInfo->groupInfo);

  if (pInfo->pSortHandle != NULL) {
H
Haojun Liao 已提交
2847
    tsortDestroySortHandle(pInfo->pSortHandle);
2848 2849
  }

H
Haojun Liao 已提交
2850
  blockDataDestroy(pInfo->binfo.pRes);
H
Haojun Liao 已提交
2851
  cleanupAggSup(&pInfo->aggSup);
2852
}
H
Haojun Liao 已提交
2853

L
Liu Jicong 已提交
2854
static bool needToMerge(SSDataBlock* pBlock, SArray* groupInfo, char** buf, int32_t rowIndex) {
2855 2856 2857 2858
  size_t size = taosArrayGetSize(groupInfo);
  if (size == 0) {
    return true;
  }
2859

2860 2861
  for (int32_t i = 0; i < size; ++i) {
    int32_t* index = taosArrayGet(groupInfo, i);
2862

2863
    SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, *index);
L
Liu Jicong 已提交
2864
    bool             isNull = colDataIsNull(pColInfo, rowIndex, pBlock->info.rows, NULL);
2865

2866 2867 2868
    if ((isNull && buf[i] != NULL) || (!isNull && buf[i] == NULL)) {
      return false;
    }
2869

2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882
    char* pCell = colDataGetData(pColInfo, rowIndex);
    if (IS_VAR_DATA_TYPE(pColInfo->info.type)) {
      if (varDataLen(pCell) != varDataLen(buf[i])) {
        return false;
      } else {
        if (memcmp(varDataVal(pCell), varDataVal(buf[i]), varDataLen(pCell)) != 0) {
          return false;
        }
      }
    } else {
      if (memcmp(pCell, buf[i], pColInfo->info.bytes) != 0) {
        return false;
      }
2883 2884 2885
    }
  }

2886
  return 0;
2887 2888
}

L
Liu Jicong 已提交
2889 2890 2891
static void doMergeResultImpl(SSortedMergeOperatorInfo* pInfo, SqlFunctionCtx* pCtx, int32_t numOfExpr,
                              int32_t rowIndex) {
  for (int32_t j = 0; j < numOfExpr; ++j) {  // TODO set row index
X
Xiaoyu Wang 已提交
2892
                                             //    pCtx[j].startRow = rowIndex;
2893 2894
  }

2895 2896
  for (int32_t j = 0; j < numOfExpr; ++j) {
    int32_t functionId = pCtx[j].functionId;
L
Liu Jicong 已提交
2897 2898 2899 2900 2901 2902 2903 2904 2905
    //    pCtx[j].fpSet->addInput(&pCtx[j]);

    //    if (functionId < 0) {
    //      SUdfInfo* pUdfInfo = taosArrayGet(pInfo->udfInfo, -1 * functionId - 1);
    //      doInvokeUdf(pUdfInfo, &pCtx[j], 0, TSDB_UDF_FUNC_MERGE);
    //    } else {
    //      assert(!TSDB_FUNC_IS_SCALAR(functionId));
    //      aAggs[functionId].mergeFunc(&pCtx[j]);
    //    }
2906
  }
2907
}
2908

L
Liu Jicong 已提交
2909 2910
static void doFinalizeResultImpl(SqlFunctionCtx* pCtx, int32_t numOfExpr) {
  for (int32_t j = 0; j < numOfExpr; ++j) {
2911 2912 2913 2914
    int32_t functionId = pCtx[j].functionId;
    //    if (functionId == FUNC_TAG_DUMMY || functionId == FUNC_TS_DUMMY) {
    //      continue;
    //    }
2915

2916 2917 2918 2919
    //    if (functionId < 0) {
    //      SUdfInfo* pUdfInfo = taosArrayGet(pInfo->udfInfo, -1 * functionId - 1);
    //      doInvokeUdf(pUdfInfo, &pCtx[j], 0, TSDB_UDF_FUNC_FINALIZE);
    //    } else {
dengyihao's avatar
dengyihao 已提交
2920
    //    pCtx[j].fpSet.finalize(&pCtx[j]);
2921 2922
  }
}
2923

2924
static bool saveCurrentTuple(char** rowColData, SArray* pColumnList, SSDataBlock* pBlock, int32_t rowIndex) {
L
Liu Jicong 已提交
2925
  int32_t size = (int32_t)taosArrayGetSize(pColumnList);
2926

L
Liu Jicong 已提交
2927 2928
  for (int32_t i = 0; i < size; ++i) {
    int32_t*         index = taosArrayGet(pColumnList, i);
2929
    SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, *index);
H
Haojun Liao 已提交
2930

2931 2932 2933
    char* data = colDataGetData(pColInfo, rowIndex);
    memcpy(rowColData[i], data, colDataGetLength(pColInfo, rowIndex));
  }
2934

2935 2936
  return true;
}
2937

2938 2939
static void doMergeImpl(SOperatorInfo* pOperator, int32_t numOfExpr, SSDataBlock* pBlock) {
  SSortedMergeOperatorInfo* pInfo = pOperator->info;
2940

2941
  SqlFunctionCtx* pCtx = pInfo->binfo.pCtx;
L
Liu Jicong 已提交
2942
  for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) {
X
Xiaoyu Wang 已提交
2943
    //    pCtx[i].size = 1;
2944
  }
2945

L
Liu Jicong 已提交
2946
  for (int32_t i = 0; i < pBlock->info.rows; ++i) {
2947 2948 2949 2950 2951 2952 2953 2954 2955
    if (!pInfo->hasGroupVal) {
      ASSERT(i == 0);
      doMergeResultImpl(pInfo, pCtx, numOfExpr, i);
      pInfo->hasGroupVal = saveCurrentTuple(pInfo->groupVal, pInfo->groupInfo, pBlock, i);
    } else {
      if (needToMerge(pBlock, pInfo->groupInfo, pInfo->groupVal, i)) {
        doMergeResultImpl(pInfo, pCtx, numOfExpr, i);
      } else {
        doFinalizeResultImpl(pCtx, numOfExpr);
2956 2957
        int32_t numOfRows = getNumOfResult(pInfo->binfo.pCtx, pOperator->numOfExprs, NULL);
        //        setTagValueForMultipleRows(pCtx, pOperator->numOfExprs, numOfRows);
2958

2959
        // TODO check for available buffer;
H
Haojun Liao 已提交
2960

2961 2962 2963 2964 2965
        // next group info data
        pInfo->binfo.pRes->info.rows += numOfRows;
        for (int32_t j = 0; j < numOfExpr; ++j) {
          if (pCtx[j].functionId < 0) {
            continue;
2966
          }
2967

H
Haojun Liao 已提交
2968
          pCtx[j].fpSet.process(&pCtx[j]);
2969
        }
2970 2971 2972

        doMergeResultImpl(pInfo, pCtx, numOfExpr, i);
        pInfo->hasGroupVal = saveCurrentTuple(pInfo->groupVal, pInfo->groupInfo, pBlock, i);
H
Haojun Liao 已提交
2973
      }
2974 2975 2976 2977
    }
  }
}

2978 2979
static SSDataBlock* doMerge(SOperatorInfo* pOperator) {
  SSortedMergeOperatorInfo* pInfo = pOperator->info;
L
Liu Jicong 已提交
2980
  SSortHandle*              pHandle = pInfo->pSortHandle;
2981

2982
  SSDataBlock* pDataBlock = createOneDataBlock(pInfo->binfo.pRes, false);
2983
  blockDataEnsureCapacity(pDataBlock, pOperator->resultInfo.capacity);
2984

L
Liu Jicong 已提交
2985
  while (1) {
2986
    blockDataCleanup(pDataBlock);
2987
    while (1) {
H
Haojun Liao 已提交
2988
      STupleHandle* pTupleHandle = tsortNextTuple(pHandle);
2989 2990
      if (pTupleHandle == NULL) {
        break;
2991
      }
2992

2993 2994
      // build datablock for merge for one group
      appendOneRowToDataBlock(pDataBlock, pTupleHandle);
2995
      if (pDataBlock->info.rows >= pOperator->resultInfo.capacity) {
2996 2997
        break;
      }
2998
    }
2999

3000 3001 3002
    if (pDataBlock->info.rows == 0) {
      break;
    }
3003

3004
    setInputDataBlock(pOperator, pInfo->binfo.pCtx, pDataBlock, TSDB_ORDER_ASC, MAIN_SCAN, true);
L
Liu Jicong 已提交
3005 3006
    //  updateOutputBuf(&pInfo->binfo, &pAggInfo->bufCapacity, pBlock->info.rows * pAggInfo->resultRowFactor,
    //  pOperator->pRuntimeEnv, true);
3007
    doMergeImpl(pOperator, pOperator->numOfExprs, pDataBlock);
3008 3009
    // flush to tuple store, and after all data have been handled, return to upstream node or sink node
  }
3010

3011 3012 3013
  doFinalizeResultImpl(pInfo->binfo.pCtx, pOperator->numOfExprs);
  int32_t numOfRows = getNumOfResult(pInfo->binfo.pCtx, pOperator->numOfExprs, NULL);
  //        setTagValueForMultipleRows(pCtx, pOperator->numOfExprs, numOfRows);
3014

3015
  // TODO check for available buffer;
3016

3017 3018
  // next group info data
  pInfo->binfo.pRes->info.rows += numOfRows;
L
Liu Jicong 已提交
3019
  return (pInfo->binfo.pRes->info.rows > 0) ? pInfo->binfo.pRes : NULL;
3020
}
3021

L
Liu Jicong 已提交
3022 3023
SSDataBlock* getSortedMergeBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, int32_t capacity,
                                     SArray* pColMatchInfo, SSortedMergeOperatorInfo* pInfo) {
3024 3025 3026 3027 3028 3029 3030 3031 3032 3033
  blockDataCleanup(pDataBlock);

  SSDataBlock* p = tsortGetSortedDataBlock(pHandle);
  if (p == NULL) {
    return NULL;
  }

  blockDataEnsureCapacity(p, capacity);

  while (1) {
3034
    STupleHandle* pTupleHandle = tsortNextTuple(pHandle);
3035 3036 3037 3038
    if (pTupleHandle == NULL) {
      break;
    }

3039
    appendOneRowToDataBlock(p, pTupleHandle);
3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063
    if (p->info.rows >= capacity) {
      break;
    }
  }

  if (p->info.rows > 0) {
    int32_t numOfCols = taosArrayGetSize(pColMatchInfo);
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColMatchInfo* pmInfo = taosArrayGet(pColMatchInfo, i);
      ASSERT(pmInfo->matchType == COL_MATCH_FROM_SLOT_ID);

      SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId);
      SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->targetSlotId);
      colDataAssign(pDst, pSrc, p->info.rows);
    }

    pDataBlock->info.rows = p->info.rows;
    pDataBlock->info.capacity = p->info.rows;
  }

  blockDataDestroy(p);
  return (pDataBlock->info.rows > 0) ? pDataBlock : NULL;
}

3064
static SSDataBlock* doSortedMerge(SOperatorInfo* pOperator) {
3065 3066
  if (pOperator->status == OP_EXEC_DONE) {
    return NULL;
3067 3068
  }

L
Liu Jicong 已提交
3069
  SExecTaskInfo*            pTaskInfo = pOperator->pTaskInfo;
3070
  SSortedMergeOperatorInfo* pInfo = pOperator->info;
H
Haojun Liao 已提交
3071
  if (pOperator->status == OP_RES_TO_RETURN) {
3072
    return getSortedMergeBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pOperator->resultInfo.capacity, NULL, pInfo);
3073 3074
  }

3075
  int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize;
dengyihao's avatar
dengyihao 已提交
3076 3077
  pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, NULL, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize,
                                             numOfBufPage, pInfo->binfo.pRes, "GET_TASKID(pTaskInfo)");
H
Haojun Liao 已提交
3078

3079
  tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, NULL, NULL);
3080

L
Liu Jicong 已提交
3081
  for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
wmmhello's avatar
wmmhello 已提交
3082
    SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource));
H
Haojun Liao 已提交
3083
    ps->param = pOperator->pDownstream[i];
H
Haojun Liao 已提交
3084
    tsortAddSource(pInfo->pSortHandle, ps);
3085 3086
  }

H
Haojun Liao 已提交
3087
  int32_t code = tsortOpen(pInfo->pSortHandle);
3088
  if (code != TSDB_CODE_SUCCESS) {
3089
    longjmp(pTaskInfo->env, terrno);
3090 3091
  }

H
Haojun Liao 已提交
3092
  pOperator->status = OP_RES_TO_RETURN;
3093
  return doMerge(pOperator);
3094
}
3095

L
Liu Jicong 已提交
3096 3097
static int32_t initGroupCol(SExprInfo* pExprInfo, int32_t numOfCols, SArray* pGroupInfo,
                            SSortedMergeOperatorInfo* pInfo) {
3098 3099
  if (pGroupInfo == NULL || taosArrayGetSize(pGroupInfo) == 0) {
    return 0;
H
Haojun Liao 已提交
3100 3101
  }

3102 3103 3104 3105 3106 3107 3108 3109
  int32_t len = 0;
  SArray* plist = taosArrayInit(3, sizeof(SColumn));
  pInfo->groupInfo = taosArrayInit(3, sizeof(int32_t));

  if (plist == NULL || pInfo->groupInfo == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

L
Liu Jicong 已提交
3110 3111
  size_t numOfGroupCol = taosArrayGetSize(pInfo->groupInfo);
  for (int32_t i = 0; i < numOfGroupCol; ++i) {
3112
    SColumn* pCol = taosArrayGet(pGroupInfo, i);
L
Liu Jicong 已提交
3113
    for (int32_t j = 0; j < numOfCols; ++j) {
H
Haojun Liao 已提交
3114
      SExprInfo* pe = &pExprInfo[j];
3115
      if (pe->base.resSchema.slotId == pCol->colId) {
3116 3117
        taosArrayPush(plist, pCol);
        taosArrayPush(pInfo->groupInfo, &j);
H
Haojun Liao 已提交
3118
        len += pCol->bytes;
3119 3120
        break;
      }
H
Haojun Liao 已提交
3121 3122 3123
    }
  }

3124
  ASSERT(taosArrayGetSize(pGroupInfo) == taosArrayGetSize(plist));
H
Haojun Liao 已提交
3125

wafwerar's avatar
wafwerar 已提交
3126
  pInfo->groupVal = taosMemoryCalloc(1, (POINTER_BYTES * numOfGroupCol + len));
3127 3128 3129 3130
  if (pInfo->groupVal == NULL) {
    taosArrayDestroy(plist);
    return TSDB_CODE_OUT_OF_MEMORY;
  }
H
Haojun Liao 已提交
3131

3132
  int32_t offset = 0;
L
Liu Jicong 已提交
3133 3134
  char*   start = (char*)(pInfo->groupVal + (POINTER_BYTES * numOfGroupCol));
  for (int32_t i = 0; i < numOfGroupCol; ++i) {
3135 3136
    pInfo->groupVal[i] = start + offset;
    SColumn* pCol = taosArrayGet(plist, i);
H
Haojun Liao 已提交
3137
    offset += pCol->bytes;
3138
  }
H
Haojun Liao 已提交
3139

3140
  taosArrayDestroy(plist);
H
Haojun Liao 已提交
3141

3142 3143
  return TSDB_CODE_SUCCESS;
}
H
Haojun Liao 已提交
3144

L
Liu Jicong 已提交
3145 3146 3147
SOperatorInfo* createSortedMergeOperatorInfo(SOperatorInfo** downstream, int32_t numOfDownstream, SExprInfo* pExprInfo,
                                             int32_t num, SArray* pSortInfo, SArray* pGroupInfo,
                                             SExecTaskInfo* pTaskInfo) {
wafwerar's avatar
wafwerar 已提交
3148
  SSortedMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SSortedMergeOperatorInfo));
L
Liu Jicong 已提交
3149
  SOperatorInfo*            pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
3150
  if (pInfo == NULL || pOperator == NULL) {
3151
    goto _error;
3152
  }
H
Haojun Liao 已提交
3153

3154
  pInfo->binfo.pCtx = createSqlFunctionCtx(pExprInfo, num, &pInfo->binfo.rowCellInfoOffset);
3155
  initResultRowInfo(&pInfo->binfo.resultRowInfo, (int32_t)1);
H
Haojun Liao 已提交
3156

3157 3158 3159
  if (pInfo->binfo.pCtx == NULL || pInfo->binfo.pRes == NULL) {
    goto _error;
  }
H
Haojun Liao 已提交
3160

dengyihao's avatar
dengyihao 已提交
3161
  size_t  keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES;
3162
  int32_t code = doInitAggInfoSup(&pInfo->aggSup, pInfo->binfo.pCtx, num, keyBufSize, pTaskInfo->id.str);
3163 3164 3165
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
H
Haojun Liao 已提交
3166

H
Haojun Liao 已提交
3167
  setFunctionResultOutput(&pInfo->binfo, &pInfo->aggSup, MAIN_SCAN, num, pTaskInfo);
H
Haojun Liao 已提交
3168
  code = initGroupCol(pExprInfo, num, pGroupInfo, pInfo);
3169 3170 3171
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
H
Haojun Liao 已提交
3172

L
Liu Jicong 已提交
3173 3174 3175 3176 3177
  //  pInfo->resultRowFactor = (int32_t)(getRowNumForMultioutput(pRuntimeEnv->pQueryAttr,
  //      pRuntimeEnv->pQueryAttr->topBotQuery, false));
  pInfo->sortBufSize = 1024 * 16;  // 1MB
  pInfo->bufPageSize = 1024;
  pInfo->pSortInfo = pSortInfo;
H
Haojun Liao 已提交
3178

3179
  pOperator->resultInfo.capacity = blockDataGetCapacityInRow(pInfo->binfo.pRes, pInfo->bufPageSize);
H
Haojun Liao 已提交
3180

L
Liu Jicong 已提交
3181
  pOperator->name = "SortedMerge";
X
Xiaoyu Wang 已提交
3182
  // pOperator->operatorType = OP_SortedMerge;
3183
  pOperator->blocking = true;
L
Liu Jicong 已提交
3184 3185
  pOperator->status = OP_NOT_OPENED;
  pOperator->info = pInfo;
3186
  pOperator->numOfExprs = num;
L
Liu Jicong 已提交
3187
  pOperator->pExpr = pExprInfo;
H
Haojun Liao 已提交
3188

L
Liu Jicong 已提交
3189
  pOperator->pTaskInfo = pTaskInfo;
H
Haojun Liao 已提交
3190

3191 3192
  pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doSortedMerge, NULL, NULL, destroySortedMergeOperatorInfo,
                                         NULL, NULL, NULL);
3193 3194 3195
  code = appendDownstream(pOperator, downstream, numOfDownstream);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
3196
  }
H
Haojun Liao 已提交
3197

3198
  return pOperator;
H
Haojun Liao 已提交
3199

L
Liu Jicong 已提交
3200
_error:
3201
  if (pInfo != NULL) {
H
Haojun Liao 已提交
3202
    destroySortedMergeOperatorInfo(pInfo, num);
H
Haojun Liao 已提交
3203 3204
  }

wafwerar's avatar
wafwerar 已提交
3205 3206
  taosMemoryFreeClear(pInfo);
  taosMemoryFreeClear(pOperator);
3207 3208
  terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
  return NULL;
H
Haojun Liao 已提交
3209 3210
}

X
Xiaoyu Wang 已提交
3211
int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scanFlag) {
3212
  // todo add more information about exchange operation
3213
  int32_t type = pOperator->operatorType;
X
Xiaoyu Wang 已提交
3214
  if (type == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE || type == QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN ||
3215
      type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN) {
3216 3217 3218
    *order = TSDB_ORDER_ASC;
    *scanFlag = MAIN_SCAN;
    return TSDB_CODE_SUCCESS;
3219
  } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) {
3220 3221 3222 3223 3224
    STableScanInfo* pTableScanInfo = pOperator->info;
    *order = pTableScanInfo->cond.order;
    *scanFlag = pTableScanInfo->scanFlag;
    return TSDB_CODE_SUCCESS;
  } else {
H
Haojun Liao 已提交
3225
    if (pOperator->pDownstream == NULL || pOperator->pDownstream[0] == NULL) {
3226
      return TSDB_CODE_INVALID_PARA;
H
Haojun Liao 已提交
3227
    } else {
3228
      return getTableScanInfo(pOperator->pDownstream[0], order, scanFlag);
3229 3230 3231
    }
  }
}
3232 3233

// this is a blocking operator
L
Liu Jicong 已提交
3234
static int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) {
H
Haojun Liao 已提交
3235 3236
  if (OPTR_IS_OPENED(pOperator)) {
    return TSDB_CODE_SUCCESS;
3237 3238
  }

H
Haojun Liao 已提交
3239
  SExecTaskInfo*    pTaskInfo = pOperator->pTaskInfo;
3240
  SAggOperatorInfo* pAggInfo = pOperator->info;
H
Haojun Liao 已提交
3241

dengyihao's avatar
dengyihao 已提交
3242
  SOptrBasicInfo* pInfo = &pAggInfo->binfo;
X
Xiaoyu Wang 已提交
3243
  SOperatorInfo*  downstream = pOperator->pDownstream[0];
3244

3245 3246
  int64_t st = taosGetTimestampUs();

3247 3248 3249
  int32_t order = TSDB_ORDER_ASC;
  int32_t scanFlag = MAIN_SCAN;

H
Haojun Liao 已提交
3250
  while (1) {
3251
    SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream);
3252 3253 3254 3255
    if (pBlock == NULL) {
      break;
    }

3256 3257 3258 3259
    int32_t code = getTableScanInfo(pOperator, &order, &scanFlag);
    if (code != TSDB_CODE_SUCCESS) {
      longjmp(pTaskInfo->env, code);
    }
3260

3261 3262
    // there is an scalar expression that needs to be calculated before apply the group aggregation.
    if (pAggInfo->pScalarExprInfo != NULL) {
3263
      code = projectApplyFunctions(pAggInfo->pScalarExprInfo, pBlock, pBlock, pAggInfo->pScalarCtx,
3264
                                   pAggInfo->numOfScalarExpr, NULL);
3265
      if (code != TSDB_CODE_SUCCESS) {
3266
        longjmp(pTaskInfo->env, code);
3267
      }
3268 3269
    }

3270
    // the pDataBlock are always the same one, no need to call this again
3271
    setExecutionContext(pOperator->numOfExprs, pBlock->info.groupId, pTaskInfo, pAggInfo);
3272
    setInputDataBlock(pOperator, pInfo->pCtx, pBlock, order, scanFlag, true);
3273 3274 3275 3276
    code = doAggregateImpl(pOperator, 0, pInfo->pCtx);
    if (code != 0) {
      longjmp(pTaskInfo->env, code);
    }
3277

dengyihao's avatar
dengyihao 已提交
3278
#if 0  // test for encode/decode result info
3279
    if(pOperator->fpSet.encodeResultRow){
3280 3281
      char *result = NULL;
      int32_t length = 0;
3282 3283
      pOperator->fpSet.encodeResultRow(pOperator, &result, &length);
      SAggSupporter* pSup = &pAggInfo->aggSup;
3284 3285
      taosHashClear(pSup->pResultRowHashTable);
      pInfo->resultRowInfo.size = 0;
3286
      pOperator->fpSet.decodeResultRow(pOperator, result);
3287 3288 3289
      if(result){
        taosMemoryFree(result);
      }
3290
    }
3291
#endif
3292 3293
  }

H
Haojun Liao 已提交
3294
  closeAllResultRows(&pAggInfo->binfo.resultRowInfo);
3295
  initGroupedResultInfo(&pAggInfo->groupResInfo, pAggInfo->aggSup.pResultRowHashTable, 0);
H
Haojun Liao 已提交
3296
  OPTR_SET_OPENED(pOperator);
3297

3298
  pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0;
H
Haojun Liao 已提交
3299 3300 3301
  return TSDB_CODE_SUCCESS;
}

3302
static SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) {
L
Liu Jicong 已提交
3303
  SAggOperatorInfo* pAggInfo = pOperator->info;
H
Haojun Liao 已提交
3304 3305 3306 3307 3308 3309
  SOptrBasicInfo*   pInfo = &pAggInfo->binfo;

  if (pOperator->status == OP_EXEC_DONE) {
    return NULL;
  }

L
Liu Jicong 已提交
3310
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
3311
  pTaskInfo->code = pOperator->fpSet._openFn(pOperator);
H
Haojun Liao 已提交
3312
  if (pTaskInfo->code != TSDB_CODE_SUCCESS) {
3313
    doSetOperatorCompleted(pOperator);
H
Haojun Liao 已提交
3314 3315 3316
    return NULL;
  }

H
Haojun Liao 已提交
3317
  blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity);
3318
  doBuildResultDatablock(pOperator, pInfo, &pAggInfo->groupResInfo, pAggInfo->aggSup.pResultBuf);
3319
  if (pInfo->pRes->info.rows == 0 || !hashRemainDataInGroupInfo(&pAggInfo->groupResInfo)) {
H
Haojun Liao 已提交
3320 3321
    doSetOperatorCompleted(pOperator);
  }
3322

3323
  size_t rows = blockDataGetNumOfRows(pInfo->pRes);
3324 3325
  pOperator->resultInfo.totalRows += rows;

3326
  return (rows == 0) ? NULL : pInfo->pRes;
3327 3328
}

wmmhello's avatar
wmmhello 已提交
3329
int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* length) {
3330
  if (result == NULL || length == NULL) {
wmmhello's avatar
wmmhello 已提交
3331 3332 3333
    return TSDB_CODE_TSC_INVALID_INPUT;
  }
  SOptrBasicInfo* pInfo = (SOptrBasicInfo*)(pOperator->info);
3334 3335 3336 3337 3338
  SAggSupporter*  pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo));
  int32_t         size = taosHashGetSize(pSup->pResultRowHashTable);
  size_t          keyLen = sizeof(uint64_t) * 2;  // estimate the key length
  int32_t         totalSize =
      sizeof(int32_t) + sizeof(int32_t) + size * (sizeof(int32_t) + keyLen + sizeof(int32_t) + pSup->resultRowSize);
wmmhello's avatar
wmmhello 已提交
3339

wmmhello's avatar
wmmhello 已提交
3340
  *result = (char*)taosMemoryCalloc(1, totalSize);
L
Liu Jicong 已提交
3341
  if (*result == NULL) {
wmmhello's avatar
wmmhello 已提交
3342
    return TSDB_CODE_OUT_OF_MEMORY;
wmmhello's avatar
wmmhello 已提交
3343
  }
wmmhello's avatar
wmmhello 已提交
3344

wmmhello's avatar
wmmhello 已提交
3345
  int32_t offset = sizeof(int32_t);
wmmhello's avatar
wmmhello 已提交
3346 3347
  *(int32_t*)(*result + offset) = size;
  offset += sizeof(int32_t);
3348 3349

  // prepare memory
3350
  SResultRowPosition* pos = &pInfo->resultRowInfo.cur;
dengyihao's avatar
dengyihao 已提交
3351 3352
  void*               pPage = getBufPage(pSup->pResultBuf, pos->pageId);
  SResultRow*         pRow = (SResultRow*)((char*)pPage + pos->offset);
3353 3354 3355
  setBufPageDirty(pPage, true);
  releaseBufPage(pSup->pResultBuf, pPage);

dengyihao's avatar
dengyihao 已提交
3356
  void* pIter = taosHashIterate(pSup->pResultRowHashTable, NULL);
wmmhello's avatar
wmmhello 已提交
3357
  while (pIter) {
dengyihao's avatar
dengyihao 已提交
3358
    void*               key = taosHashGetKey(pIter, &keyLen);
3359
    SResultRowPosition* p1 = (SResultRowPosition*)pIter;
3360

dengyihao's avatar
dengyihao 已提交
3361
    pPage = (SFilePage*)getBufPage(pSup->pResultBuf, p1->pageId);
3362
    pRow = (SResultRow*)((char*)pPage + p1->offset);
3363 3364
    setBufPageDirty(pPage, true);
    releaseBufPage(pSup->pResultBuf, pPage);
wmmhello's avatar
wmmhello 已提交
3365 3366 3367

    // recalculate the result size
    int32_t realTotalSize = offset + sizeof(int32_t) + keyLen + sizeof(int32_t) + pSup->resultRowSize;
L
Liu Jicong 已提交
3368
    if (realTotalSize > totalSize) {
wmmhello's avatar
wmmhello 已提交
3369
      char* tmp = (char*)taosMemoryRealloc(*result, realTotalSize);
L
Liu Jicong 已提交
3370
      if (tmp == NULL) {
wafwerar's avatar
wafwerar 已提交
3371
        taosMemoryFree(*result);
wmmhello's avatar
wmmhello 已提交
3372
        *result = NULL;
wmmhello's avatar
wmmhello 已提交
3373
        return TSDB_CODE_OUT_OF_MEMORY;
L
Liu Jicong 已提交
3374
      } else {
wmmhello's avatar
wmmhello 已提交
3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386
        *result = tmp;
      }
    }
    // save key
    *(int32_t*)(*result + offset) = keyLen;
    offset += sizeof(int32_t);
    memcpy(*result + offset, key, keyLen);
    offset += keyLen;

    // save value
    *(int32_t*)(*result + offset) = pSup->resultRowSize;
    offset += sizeof(int32_t);
3387
    memcpy(*result + offset, pRow, pSup->resultRowSize);
wmmhello's avatar
wmmhello 已提交
3388 3389 3390 3391 3392
    offset += pSup->resultRowSize;

    pIter = taosHashIterate(pSup->pResultRowHashTable, pIter);
  }

wmmhello's avatar
wmmhello 已提交
3393 3394 3395 3396
  *(int32_t*)(*result) = offset;
  *length = offset;

  return TDB_CODE_SUCCESS;
wmmhello's avatar
wmmhello 已提交
3397 3398
}

3399
int32_t aggDecodeResultRow(SOperatorInfo* pOperator, char* result) {
3400
  if (result == NULL) {
wmmhello's avatar
wmmhello 已提交
3401
    return TSDB_CODE_TSC_INVALID_INPUT;
wmmhello's avatar
wmmhello 已提交
3402
  }
wmmhello's avatar
wmmhello 已提交
3403
  SOptrBasicInfo* pInfo = (SOptrBasicInfo*)(pOperator->info);
3404
  SAggSupporter*  pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo));
wmmhello's avatar
wmmhello 已提交
3405 3406

  //  int32_t size = taosHashGetSize(pSup->pResultRowHashTable);
3407
  int32_t length = *(int32_t*)(result);
wmmhello's avatar
wmmhello 已提交
3408
  int32_t offset = sizeof(int32_t);
3409 3410 3411 3412

  int32_t count = *(int32_t*)(result + offset);
  offset += sizeof(int32_t);

L
Liu Jicong 已提交
3413
  while (count-- > 0 && length > offset) {
wmmhello's avatar
wmmhello 已提交
3414 3415 3416
    int32_t keyLen = *(int32_t*)(result + offset);
    offset += sizeof(int32_t);

L
Liu Jicong 已提交
3417
    uint64_t    tableGroupId = *(uint64_t*)(result + offset);
3418
    SResultRow* resultRow = getNewResultRow(pSup->pResultBuf, tableGroupId, pSup->resultRowSize);
L
Liu Jicong 已提交
3419
    if (!resultRow) {
wmmhello's avatar
wmmhello 已提交
3420
      return TSDB_CODE_TSC_INVALID_INPUT;
wmmhello's avatar
wmmhello 已提交
3421
    }
3422

wmmhello's avatar
wmmhello 已提交
3423
    // add a new result set for a new group
3424 3425
    SResultRowPosition pos = {.pageId = resultRow->pageId, .offset = resultRow->offset};
    taosHashPut(pSup->pResultRowHashTable, result + offset, keyLen, &pos, sizeof(SResultRowPosition));
wmmhello's avatar
wmmhello 已提交
3426 3427 3428

    offset += keyLen;
    int32_t valueLen = *(int32_t*)(result + offset);
L
Liu Jicong 已提交
3429
    if (valueLen != pSup->resultRowSize) {
wmmhello's avatar
wmmhello 已提交
3430
      return TSDB_CODE_TSC_INVALID_INPUT;
wmmhello's avatar
wmmhello 已提交
3431 3432 3433 3434 3435 3436 3437 3438 3439 3440
    }
    offset += sizeof(int32_t);
    int32_t pageId = resultRow->pageId;
    int32_t pOffset = resultRow->offset;
    memcpy(resultRow, result + offset, valueLen);
    resultRow->pageId = pageId;
    resultRow->offset = pOffset;
    offset += valueLen;

    initResultRow(resultRow);
dengyihao's avatar
dengyihao 已提交
3441
    pInfo->resultRowInfo.cur = (SResultRowPosition){.pageId = resultRow->pageId, .offset = resultRow->offset};
wmmhello's avatar
wmmhello 已提交
3442 3443
  }

L
Liu Jicong 已提交
3444
  if (offset != length) {
wmmhello's avatar
wmmhello 已提交
3445
    return TSDB_CODE_TSC_INVALID_INPUT;
wmmhello's avatar
wmmhello 已提交
3446
  }
wmmhello's avatar
wmmhello 已提交
3447
  return TDB_CODE_SUCCESS;
wmmhello's avatar
wmmhello 已提交
3448 3449
}

3450 3451
enum {
  PROJECT_RETRIEVE_CONTINUE = 0x1,
L
Liu Jicong 已提交
3452
  PROJECT_RETRIEVE_DONE = 0x2,
3453 3454 3455 3456 3457
};

static int32_t handleLimitOffset(SOperatorInfo* pOperator, SSDataBlock* pBlock) {
  SProjectOperatorInfo* pProjectInfo = pOperator->info;
  SOptrBasicInfo*       pInfo = &pProjectInfo->binfo;
L
Liu Jicong 已提交
3458
  SSDataBlock*          pRes = pInfo->pRes;
3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506

  if (pProjectInfo->curSOffset > 0) {
    if (pProjectInfo->groupId == 0) {  // it is the first group
      pProjectInfo->groupId = pBlock->info.groupId;
      blockDataCleanup(pInfo->pRes);
      return PROJECT_RETRIEVE_CONTINUE;
    } else if (pProjectInfo->groupId != pBlock->info.groupId) {
      pProjectInfo->curSOffset -= 1;

      // ignore data block in current group
      if (pProjectInfo->curSOffset > 0) {
        blockDataCleanup(pInfo->pRes);
        return PROJECT_RETRIEVE_CONTINUE;
      }
    }

    // set current group id of the project operator
    pProjectInfo->groupId = pBlock->info.groupId;
  }

  if (pProjectInfo->groupId != 0 && pProjectInfo->groupId != pBlock->info.groupId) {
    pProjectInfo->curGroupOutput += 1;
    if ((pProjectInfo->slimit.limit > 0) && (pProjectInfo->slimit.limit <= pProjectInfo->curGroupOutput)) {
      pOperator->status = OP_EXEC_DONE;
      blockDataCleanup(pRes);

      return PROJECT_RETRIEVE_DONE;
    }

    // reset the value for a new group data
    pProjectInfo->curOffset = 0;
    pProjectInfo->curOutput = 0;
  }

  // here we reach the start position, according to the limit/offset requirements.

  // set current group id
  pProjectInfo->groupId = pBlock->info.groupId;

  if (pProjectInfo->curOffset >= pRes->info.rows) {
    pProjectInfo->curOffset -= pRes->info.rows;
    blockDataCleanup(pRes);
    return PROJECT_RETRIEVE_CONTINUE;
  } else if (pProjectInfo->curOffset < pRes->info.rows && pProjectInfo->curOffset > 0) {
    blockDataTrimFirstNRows(pRes, pProjectInfo->curOffset);
    pProjectInfo->curOffset = 0;
  }

3507 3508 3509
  // check for the limitation in each group
  if (pProjectInfo->limit.limit > 0 && pProjectInfo->curOutput + pRes->info.rows >= pProjectInfo->limit.limit) {
    pRes->info.rows = (int32_t)(pProjectInfo->limit.limit - pProjectInfo->curOutput);
3510
    if (pProjectInfo->slimit.limit > 0 && pProjectInfo->slimit.limit <= pProjectInfo->curGroupOutput) {
3511 3512 3513
      pOperator->status = OP_EXEC_DONE;
    }

3514
    return PROJECT_RETRIEVE_DONE;
3515
  }
3516

3517
  // todo optimize performance
3518 3519
  // If there are slimit/soffset value exists, multi-round result can not be packed into one group, since the
  // they may not belong to the same group the limit/offset value is not valid in this case.
L
Liu Jicong 已提交
3520 3521
  if (pRes->info.rows >= pOperator->resultInfo.threshold || pProjectInfo->slimit.offset != -1 ||
      pProjectInfo->slimit.limit != -1) {
3522
    return PROJECT_RETRIEVE_DONE;
L
Liu Jicong 已提交
3523
  } else {  // not full enough, continue to accumulate the output data in the buffer.
3524 3525 3526 3527
    return PROJECT_RETRIEVE_CONTINUE;
  }
}

3528
static SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) {
3529
  SProjectOperatorInfo* pProjectInfo = pOperator->info;
L
Liu Jicong 已提交
3530
  SOptrBasicInfo*       pInfo = &pProjectInfo->binfo;
3531 3532

  SSDataBlock* pRes = pInfo->pRes;
3533
  blockDataCleanup(pRes);
3534

3535
  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
3536 3537 3538
  if (pOperator->status == OP_EXEC_DONE) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
3539

H
Haojun Liao 已提交
3540
#if 0
3541 3542 3543 3544 3545
  if (pProjectInfo->existDataBlock) {  // TODO refactor
    SSDataBlock* pBlock = pProjectInfo->existDataBlock;
    pProjectInfo->existDataBlock = NULL;

    // the pDataBlock are always the same one, no need to call this again
H
Haojun Liao 已提交
3546
    setInputDataBlock(pOperator, pInfo->pCtx, pBlock, TSDB_ORDER_ASC);
3547

H
Haojun Liao 已提交
3548
    blockDataEnsureCapacity(pInfo->pRes, pBlock->info.rows);
3549
    projectApplyFunctions(pOperator->pExpr, pInfo->pRes, pBlock, pInfo->pCtx, pOperator->numOfExprs);
L
Liu Jicong 已提交
3550
    if (pRes->info.rows >= pProjectInfo->binfo.capacity * 0.8) {
3551 3552
      copyTsColoum(pRes, pInfo->pCtx, pOperator->numOfExprs);
      resetResultRowEntryResult(pInfo->pCtx, pOperator->numOfExprs);
3553 3554 3555
      return pRes;
    }
  }
H
Haojun Liao 已提交
3556
#endif
3557

3558
  int64_t st = 0;
3559 3560 3561
  int32_t order = 0;
  int32_t scanFlag = 0;

3562 3563 3564 3565
  if (pOperator->cost.openCost == 0) {
    st = taosGetTimestampUs();
  }

H
Haojun Liao 已提交
3566 3567
  SOperatorInfo* downstream = pOperator->pDownstream[0];

L
Liu Jicong 已提交
3568
  while (1) {
H
Haojun Liao 已提交
3569
    // The downstream exec may change the value of the newgroup, so use a local variable instead.
3570
    SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream);
3571
    if (pBlock == NULL) {
3572
      doSetOperatorCompleted(pOperator);
3573 3574 3575 3576
      break;
    }

    // the pDataBlock are always the same one, no need to call this again
3577
    int32_t code = getTableScanInfo(pOperator->pDownstream[0], &order, &scanFlag);
3578 3579 3580
    if (code != TSDB_CODE_SUCCESS) {
      longjmp(pTaskInfo->env, code);
    }
3581

3582
    doFilter(pProjectInfo->pFilterNode, pBlock);
3583

3584
    setInputDataBlock(pOperator, pInfo->pCtx, pBlock, order, scanFlag, false);
3585 3586
    blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows);

X
Xiaoyu Wang 已提交
3587 3588
    code = projectApplyFunctions(pOperator->pExpr, pInfo->pRes, pBlock, pInfo->pCtx, pOperator->numOfExprs,
                                 pProjectInfo->pPseudoColInfo);
3589 3590
    if (code != TSDB_CODE_SUCCESS) {
      longjmp(pTaskInfo->env, code);
3591 3592
    }

3593 3594
    int32_t status = handleLimitOffset(pOperator, pBlock);
    if (status == PROJECT_RETRIEVE_CONTINUE) {
H
Haojun Liao 已提交
3595
      continue;
L
Liu Jicong 已提交
3596
    } else if (status == PROJECT_RETRIEVE_DONE) {
3597 3598 3599
      break;
    }
  }
dengyihao's avatar
dengyihao 已提交
3600

H
Haojun Liao 已提交
3601
  pProjectInfo->curOutput += pInfo->pRes->info.rows;
H
Haojun Liao 已提交
3602

3603 3604 3605 3606
  size_t rows = pInfo->pRes->info.rows;
  pOperator->resultInfo.totalRows += rows;

  if (pOperator->cost.openCost == 0) {
3607
    pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0;
3608 3609
  }

3610
  return (rows > 0) ? pInfo->pRes : NULL;
3611 3612
}

L
Liu Jicong 已提交
3613 3614
static void doHandleRemainBlockForNewGroupImpl(SFillOperatorInfo* pInfo, SResultInfo* pResultInfo, bool* newgroup,
                                               SExecTaskInfo* pTaskInfo) {
3615
  pInfo->totalInputRows = pInfo->existNewGroupBlock->info.rows;
H
Haojun Liao 已提交
3616

L
Liu Jicong 已提交
3617 3618
  int64_t ekey = Q_STATUS_EQUAL(pTaskInfo->status, TASK_COMPLETED) ? pTaskInfo->window.ekey
                                                                   : pInfo->existNewGroupBlock->info.window.ekey;
3619 3620
  taosResetFillInfo(pInfo->pFillInfo, getFillInfoStart(pInfo->pFillInfo));

3621
  taosFillSetStartInfo(pInfo->pFillInfo, pInfo->existNewGroupBlock->info.rows, ekey);
3622 3623
  taosFillSetInputDataBlock(pInfo->pFillInfo, pInfo->existNewGroupBlock);

3624
  doFillTimeIntervalGapsInResults(pInfo->pFillInfo, pInfo->pRes, pResultInfo->capacity);
3625 3626 3627 3628
  pInfo->existNewGroupBlock = NULL;
  *newgroup = true;
}

L
Liu Jicong 已提交
3629 3630
static void doHandleRemainBlockFromNewGroup(SFillOperatorInfo* pInfo, SResultInfo* pResultInfo, bool* newgroup,
                                            SExecTaskInfo* pTaskInfo) {
3631 3632
  if (taosFillHasMoreResults(pInfo->pFillInfo)) {
    *newgroup = false;
3633
    doFillTimeIntervalGapsInResults(pInfo->pFillInfo, pInfo->pRes, (int32_t)pResultInfo->capacity);
H
Haojun Liao 已提交
3634
    if (pInfo->pRes->info.rows > pResultInfo->threshold || (!pInfo->multigroupResult)) {
3635 3636 3637 3638 3639 3640
      return;
    }
  }

  // handle the cached new group data block
  if (pInfo->existNewGroupBlock) {
3641
    doHandleRemainBlockForNewGroupImpl(pInfo, pResultInfo, newgroup, pTaskInfo);
3642 3643 3644
  }
}

3645
static SSDataBlock* doFill(SOperatorInfo* pOperator) {
L
Liu Jicong 已提交
3646 3647
  SFillOperatorInfo* pInfo = pOperator->info;
  SExecTaskInfo*     pTaskInfo = pOperator->pTaskInfo;
3648

H
Haojun Liao 已提交
3649
  SResultInfo* pResultInfo = &pOperator->resultInfo;
3650 3651 3652
  SSDataBlock* pResBlock = pInfo->pRes;

  blockDataCleanup(pResBlock);
3653 3654 3655 3656
  if (pOperator->status == OP_EXEC_DONE) {
    return NULL;
  }

3657
  // todo handle different group data interpolation
X
Xiaoyu Wang 已提交
3658 3659
  bool  n = false;
  bool* newgroup = &n;
3660
  doHandleRemainBlockFromNewGroup(pInfo, pResultInfo, newgroup, pTaskInfo);
3661 3662
  if (pResBlock->info.rows > pResultInfo->threshold || (!pInfo->multigroupResult && pResBlock->info.rows > 0)) {
    return pResBlock;
H
Haojun Liao 已提交
3663
  }
3664

H
Haojun Liao 已提交
3665
  SOperatorInfo* pDownstream = pOperator->pDownstream[0];
L
Liu Jicong 已提交
3666
  while (1) {
3667
    SSDataBlock* pBlock = pDownstream->fpSet.getNextFn(pDownstream);
3668 3669 3670 3671 3672 3673 3674 3675 3676 3677
    if (*newgroup) {
      assert(pBlock != NULL);
    }

    if (*newgroup && pInfo->totalInputRows > 0) {  // there are already processed current group data block
      pInfo->existNewGroupBlock = pBlock;
      *newgroup = false;

      // Fill the previous group data block, before handle the data block of new group.
      // Close the fill operation for previous group data block
3678
      taosFillSetStartInfo(pInfo->pFillInfo, 0, pTaskInfo->window.ekey);
3679 3680 3681 3682 3683 3684 3685
    } else {
      if (pBlock == NULL) {
        if (pInfo->totalInputRows == 0) {
          pOperator->status = OP_EXEC_DONE;
          return NULL;
        }

3686
        taosFillSetStartInfo(pInfo->pFillInfo, 0, pTaskInfo->window.ekey);
3687 3688 3689 3690 3691 3692 3693
      } else {
        pInfo->totalInputRows += pBlock->info.rows;
        taosFillSetStartInfo(pInfo->pFillInfo, pBlock->info.rows, pBlock->info.window.ekey);
        taosFillSetInputDataBlock(pInfo->pFillInfo, pBlock);
      }
    }

3694 3695
    blockDataEnsureCapacity(pResBlock, pOperator->resultInfo.capacity);
    doFillTimeIntervalGapsInResults(pInfo->pFillInfo, pResBlock, pOperator->resultInfo.capacity);
3696 3697

    // current group has no more result to return
3698
    if (pResBlock->info.rows > 0) {
3699 3700
      // 1. The result in current group not reach the threshold of output result, continue
      // 2. If multiple group results existing in one SSDataBlock is not allowed, return immediately
3701 3702
      if (pResBlock->info.rows > pResultInfo->threshold || pBlock == NULL || (!pInfo->multigroupResult)) {
        return pResBlock;
3703 3704
      }

3705
      doHandleRemainBlockFromNewGroup(pInfo, pResultInfo, newgroup, pTaskInfo);
3706 3707
      if (pResBlock->info.rows > pOperator->resultInfo.threshold || pBlock == NULL) {
        return pResBlock;
3708 3709 3710
      }
    } else if (pInfo->existNewGroupBlock) {  // try next group
      assert(pBlock != NULL);
3711
      doHandleRemainBlockForNewGroupImpl(pInfo, pResultInfo, newgroup, pTaskInfo);
3712 3713
      if (pResBlock->info.rows > pResultInfo->threshold) {
        return pResBlock;
3714 3715 3716 3717 3718 3719 3720
      }
    } else {
      return NULL;
    }
  }
}

H
Haojun Liao 已提交
3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731
static void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs) {
  for (int32_t i = 0; i < numOfExprs; ++i) {
    SExprInfo* pExprInfo = &pExpr[i];
    if (pExprInfo->pExpr->nodeType == QUERY_NODE_COLUMN) {
      taosMemoryFree(pExprInfo->base.pParam[0].pCol);
    }
    taosMemoryFree(pExprInfo->base.pParam);
    taosMemoryFree(pExprInfo->pExpr);
  }
}

3732 3733 3734 3735 3736
static void destroyOperatorInfo(SOperatorInfo* pOperator) {
  if (pOperator == NULL) {
    return;
  }

3737
  if (pOperator->fpSet.closeFn != NULL) {
3738
    pOperator->fpSet.closeFn(pOperator->info, pOperator->numOfExprs);
3739 3740
  }

H
Haojun Liao 已提交
3741
  if (pOperator->pDownstream != NULL) {
L
Liu Jicong 已提交
3742
    for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
H
Haojun Liao 已提交
3743
      destroyOperatorInfo(pOperator->pDownstream[i]);
3744 3745
    }

wafwerar's avatar
wafwerar 已提交
3746
    taosMemoryFreeClear(pOperator->pDownstream);
H
Haojun Liao 已提交
3747
    pOperator->numOfDownstream = 0;
3748 3749
  }

H
Haojun Liao 已提交
3750
  if (pOperator->pExpr != NULL) {
H
Haojun Liao 已提交
3751
    destroyExprInfo(pOperator->pExpr, pOperator->numOfExprs);
H
Haojun Liao 已提交
3752 3753
  }

D
fix bug  
dapan 已提交
3754
  taosMemoryFreeClear(pOperator->pExpr);
wafwerar's avatar
wafwerar 已提交
3755 3756
  taosMemoryFreeClear(pOperator->info);
  taosMemoryFreeClear(pOperator);
3757 3758
}

3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773
int32_t getBufferPgSize(int32_t rowSize, uint32_t* defaultPgsz, uint32_t* defaultBufsz) {
  *defaultPgsz = 4096;
  while (*defaultPgsz < rowSize * 4) {
    *defaultPgsz <<= 1u;
  }

  // at least four pages need to be in buffer
  *defaultBufsz = 4096 * 256;
  if ((*defaultBufsz) <= (*defaultPgsz)) {
    (*defaultBufsz) = (*defaultPgsz) * 4;
  }

  return 0;
}

dengyihao's avatar
dengyihao 已提交
3774 3775
int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, size_t keyBufSize,
                         const char* pKey) {
3776 3777
  _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY);

dengyihao's avatar
dengyihao 已提交
3778 3779
  pAggSup->resultRowSize = getResultRowSize(pCtx, numOfOutput);
  pAggSup->keyBuf = taosMemoryCalloc(1, keyBufSize + POINTER_BYTES + sizeof(int64_t));
3780 3781
  pAggSup->pResultRowHashTable = taosHashInit(10, hashFn, true, HASH_NO_LOCK);

H
Haojun Liao 已提交
3782
  if (pAggSup->keyBuf == NULL || pAggSup->pResultRowHashTable == NULL) {
3783 3784 3785
    return TSDB_CODE_OUT_OF_MEMORY;
  }

dengyihao's avatar
dengyihao 已提交
3786
  uint32_t defaultPgsz = 0;
3787 3788
  uint32_t defaultBufsz = 0;
  getBufferPgSize(pAggSup->resultRowSize, &defaultPgsz, &defaultBufsz);
H
Haojun Liao 已提交
3789

3790
  int32_t code = createDiskbasedBuf(&pAggSup->pResultBuf, defaultPgsz, defaultBufsz, pKey, TD_TMP_DIR_PATH);
H
Haojun Liao 已提交
3791 3792 3793 3794
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

3795 3796 3797
  return TSDB_CODE_SUCCESS;
}

3798
void cleanupAggSup(SAggSupporter* pAggSup) {
wafwerar's avatar
wafwerar 已提交
3799
  taosMemoryFreeClear(pAggSup->keyBuf);
3800
  taosHashCleanup(pAggSup->pResultRowHashTable);
H
Haojun Liao 已提交
3801
  destroyDiskbasedBuf(pAggSup->pResultBuf);
3802 3803
}

H
Haojun Liao 已提交
3804
int32_t initAggInfo(SOptrBasicInfo* pBasicInfo, SAggSupporter* pAggSup, SExprInfo* pExprInfo, int32_t numOfCols,
3805
                    SSDataBlock* pResultBlock, size_t keyBufSize, const char* pkey) {
3806
  pBasicInfo->pCtx = createSqlFunctionCtx(pExprInfo, numOfCols, &pBasicInfo->rowCellInfoOffset);
H
Haojun Liao 已提交
3807 3808
  pBasicInfo->pRes = pResultBlock;

3809
  doInitAggInfoSup(pAggSup, pBasicInfo->pCtx, numOfCols, keyBufSize, pkey);
3810

L
Liu Jicong 已提交
3811
  for (int32_t i = 0; i < numOfCols; ++i) {
3812 3813 3814
    pBasicInfo->pCtx[i].pBuf = pAggSup->pResultBuf;
  }

3815
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
3816 3817
}

3818 3819 3820 3821 3822 3823 3824 3825 3826
void initResultSizeInfo(SOperatorInfo* pOperator, int32_t numOfRows) {
  pOperator->resultInfo.capacity = numOfRows;
  pOperator->resultInfo.threshold = numOfRows * 0.75;

  if (pOperator->resultInfo.threshold == 0) {
    pOperator->resultInfo.capacity = numOfRows;
  }
}

L
Liu Jicong 已提交
3827
SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols,
3828
                                           SSDataBlock* pResultBlock, SExprInfo* pScalarExprInfo,
wmmhello's avatar
wmmhello 已提交
3829
                                           int32_t numOfScalarExpr, SExecTaskInfo* pTaskInfo) {
wafwerar's avatar
wafwerar 已提交
3830
  SAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SAggOperatorInfo));
L
Liu Jicong 已提交
3831
  SOperatorInfo*    pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
H
Haojun Liao 已提交
3832 3833 3834
  if (pInfo == NULL || pOperator == NULL) {
    goto _error;
  }
H
Haojun Liao 已提交
3835

3836
  int32_t numOfRows = 1024;
dengyihao's avatar
dengyihao 已提交
3837
  size_t  keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES;
3838 3839

  initResultSizeInfo(pOperator, numOfRows);
dengyihao's avatar
dengyihao 已提交
3840 3841
  int32_t code =
      initAggInfo(&pInfo->binfo, &pInfo->aggSup, pExprInfo, numOfCols, pResultBlock, keyBufSize, pTaskInfo->id.str);
L
Liu Jicong 已提交
3842
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
3843 3844
    goto _error;
  }
H
Haojun Liao 已提交
3845

H
Haojun Liao 已提交
3846 3847 3848 3849
  int32_t numOfGroup = 10;  // todo replaced with true value
  pInfo->groupId = INT32_MIN;
  initResultRowInfo(&pInfo->binfo.resultRowInfo, numOfGroup);

3850 3851
  pInfo->pScalarExprInfo = pScalarExprInfo;
  pInfo->numOfScalarExpr = numOfScalarExpr;
3852
  if (pInfo->pScalarExprInfo != NULL) {
3853
    pInfo->pScalarCtx = createSqlFunctionCtx(pScalarExprInfo, numOfScalarExpr, &pInfo->rowCellInfoOffset);
3854
  }
3855

dengyihao's avatar
dengyihao 已提交
3856
  pOperator->name = "TableAggregate";
3857
  pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_HASH_AGG;
3858
  pOperator->blocking = true;
dengyihao's avatar
dengyihao 已提交
3859 3860 3861
  pOperator->status = OP_NOT_OPENED;
  pOperator->info = pInfo;
  pOperator->pExpr = pExprInfo;
3862
  pOperator->numOfExprs = numOfCols;
dengyihao's avatar
dengyihao 已提交
3863
  pOperator->pTaskInfo = pTaskInfo;
H
Haojun Liao 已提交
3864

3865 3866
  pOperator->fpSet = createOperatorFpSet(doOpenAggregateOptr, getAggregateResult, NULL, NULL, destroyAggOperatorInfo,
                                         aggEncodeResultRow, aggDecodeResultRow, NULL);
H
Haojun Liao 已提交
3867 3868 3869 3870 3871

  code = appendDownstream(pOperator, &downstream, 1);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
3872 3873

  return pOperator;
L
Liu Jicong 已提交
3874
_error:
H
Haojun Liao 已提交
3875
  destroyAggOperatorInfo(pInfo, numOfCols);
wafwerar's avatar
wafwerar 已提交
3876 3877
  taosMemoryFreeClear(pInfo);
  taosMemoryFreeClear(pOperator);
H
Haojun Liao 已提交
3878 3879
  pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY;
  return NULL;
3880 3881
}

H
Haojun Liao 已提交
3882
void doDestroyBasicInfo(SOptrBasicInfo* pInfo, int32_t numOfOutput) {
3883 3884
  assert(pInfo != NULL);

3885
  destroySqlFunctionCtx(pInfo->pCtx, numOfOutput);
wafwerar's avatar
wafwerar 已提交
3886
  taosMemoryFreeClear(pInfo->rowCellInfoOffset);
3887 3888

  cleanupResultRowInfo(&pInfo->resultRowInfo);
H
Haojun Liao 已提交
3889
  pInfo->pRes = blockDataDestroy(pInfo->pRes);
3890 3891
}

H
Haojun Liao 已提交
3892
void destroyBasicOperatorInfo(void* param, int32_t numOfOutput) {
L
Liu Jicong 已提交
3893
  SOptrBasicInfo* pInfo = (SOptrBasicInfo*)param;
3894 3895
  doDestroyBasicInfo(pInfo, numOfOutput);
}
H
Haojun Liao 已提交
3896 3897

void destroyAggOperatorInfo(void* param, int32_t numOfOutput) {
L
Liu Jicong 已提交
3898
  SAggOperatorInfo* pInfo = (SAggOperatorInfo*)param;
3899 3900
  doDestroyBasicInfo(&pInfo->binfo, numOfOutput);
}
3901

H
Haojun Liao 已提交
3902
void destroySFillOperatorInfo(void* param, int32_t numOfOutput) {
L
Liu Jicong 已提交
3903
  SFillOperatorInfo* pInfo = (SFillOperatorInfo*)param;
3904
  pInfo->pFillInfo = taosDestroyFillInfo(pInfo->pFillInfo);
H
Haojun Liao 已提交
3905
  pInfo->pRes = blockDataDestroy(pInfo->pRes);
wafwerar's avatar
wafwerar 已提交
3906
  taosMemoryFreeClear(pInfo->p);
3907 3908
}

H
Haojun Liao 已提交
3909
static void destroyProjectOperatorInfo(void* param, int32_t numOfOutput) {
D
fix bug  
dapan 已提交
3910 3911 3912
  if (NULL == param) {
    return;
  }
L
Liu Jicong 已提交
3913
  SProjectOperatorInfo* pInfo = (SProjectOperatorInfo*)param;
3914
  doDestroyBasicInfo(&pInfo->binfo, numOfOutput);
H
Haojun Liao 已提交
3915
  cleanupAggSup(&pInfo->aggSup);
H
Haojun Liao 已提交
3916
  taosArrayDestroy(pInfo->pPseudoColInfo);
3917 3918
}

H
Haojun Liao 已提交
3919
static void destroyIndefinitOperatorInfo(void* param, int32_t numOfOutput) {
3920
  SIndefOperatorInfo* pInfo = (SIndefOperatorInfo*)param;
H
Haojun Liao 已提交
3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931
  doDestroyBasicInfo(&pInfo->binfo, numOfOutput);

  taosArrayDestroy(pInfo->pPseudoColInfo);
  cleanupAggSup(&pInfo->aggSup);

  destroySqlFunctionCtx(pInfo->pScalarCtx, numOfOutput);
  destroyExprInfo(pInfo->pScalarExpr, pInfo->numOfScalarExpr);

  taosMemoryFree(pInfo->rowCellInfoOffset);
}

H
Haojun Liao 已提交
3932
void destroyExchangeOperatorInfo(void* param, int32_t numOfOutput) {
L
Liu Jicong 已提交
3933
  SExchangeInfo* pExInfo = (SExchangeInfo*)param;
3934 3935 3936 3937
  taosRemoveRef(exchangeObjRefPool, pExInfo->self);
}

void doDestroyExchangeOperatorInfo(void* param) {
X
Xiaoyu Wang 已提交
3938
  SExchangeInfo* pExInfo = (SExchangeInfo*)param;
3939

H
Haojun Liao 已提交
3940 3941 3942 3943 3944 3945 3946 3947 3948
  taosArrayDestroy(pExInfo->pSources);
  taosArrayDestroy(pExInfo->pSourceDataInfo);
  if (pExInfo->pResult != NULL) {
    blockDataDestroy(pExInfo->pResult);
  }

  tsem_destroy(&pExInfo->ready);
}

H
Haojun Liao 已提交
3949 3950
static SArray* setRowTsColumnOutputInfo(SqlFunctionCtx* pCtx, int32_t numOfCols) {
  SArray* pList = taosArrayInit(4, sizeof(int32_t));
dengyihao's avatar
dengyihao 已提交
3951
  for (int32_t i = 0; i < numOfCols; ++i) {
H
Haojun Liao 已提交
3952 3953 3954 3955 3956 3957 3958 3959
    if (fmIsPseudoColumnFunc(pCtx[i].functionId)) {
      taosArrayPush(pList, &i);
    }
  }

  return pList;
}

L
Liu Jicong 已提交
3960
SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t num,
3961
                                         SSDataBlock* pResBlock, SLimit* pLimit, SLimit* pSlimit, SNode* pCondition,
dengyihao's avatar
dengyihao 已提交
3962
                                         SExecTaskInfo* pTaskInfo) {
wafwerar's avatar
wafwerar 已提交
3963
  SProjectOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SProjectOperatorInfo));
L
Liu Jicong 已提交
3964
  SOperatorInfo*        pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
H
Haojun Liao 已提交
3965 3966 3967
  if (pInfo == NULL || pOperator == NULL) {
    goto _error;
  }
3968

X
Xiaoyu Wang 已提交
3969 3970 3971
  pInfo->limit = *pLimit;
  pInfo->slimit = *pSlimit;
  pInfo->curOffset = pLimit->offset;
H
Haojun Liao 已提交
3972
  pInfo->curSOffset = pSlimit->offset;
H
Haojun Liao 已提交
3973
  pInfo->binfo.pRes = pResBlock;
X
Xiaoyu Wang 已提交
3974
  pInfo->pFilterNode = pCondition;
H
Haojun Liao 已提交
3975 3976 3977

  int32_t numOfCols = num;
  int32_t numOfRows = 4096;
dengyihao's avatar
dengyihao 已提交
3978
  size_t  keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES;
3979

3980 3981 3982 3983 3984
  // Make sure the size of SSDataBlock will never exceed the size of 2MB.
  int32_t TWOMB = 2 * 1024 * 1024;
  if (numOfRows * pResBlock->info.rowSize > TWOMB) {
    numOfRows = TWOMB / pResBlock->info.rowSize;
  }
3985
  initResultSizeInfo(pOperator, numOfRows);
3986

3987
  initAggInfo(&pInfo->binfo, &pInfo->aggSup, pExprInfo, numOfCols, pResBlock, keyBufSize, pTaskInfo->id.str);
H
Haojun Liao 已提交
3988
  setFunctionResultOutput(&pInfo->binfo, &pInfo->aggSup, MAIN_SCAN, numOfCols, pTaskInfo);
3989

X
Xiaoyu Wang 已提交
3990 3991
  pInfo->pPseudoColInfo = setRowTsColumnOutputInfo(pInfo->binfo.pCtx, numOfCols);
  pOperator->name = "ProjectOperator";
H
Haojun Liao 已提交
3992
  pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_PROJECT;
X
Xiaoyu Wang 已提交
3993 3994 3995 3996 3997 3998
  pOperator->blocking = false;
  pOperator->status = OP_NOT_OPENED;
  pOperator->info = pInfo;
  pOperator->pExpr = pExprInfo;
  pOperator->numOfExprs = num;
  pOperator->pTaskInfo = pTaskInfo;
3999

L
Liu Jicong 已提交
4000 4001
  pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doProjectOperation, NULL, NULL,
                                         destroyProjectOperatorInfo, NULL, NULL, NULL);
L
Liu Jicong 已提交
4002

4003
  int32_t code = appendDownstream(pOperator, &downstream, 1);
H
Haojun Liao 已提交
4004
  if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
4005 4006
    goto _error;
  }
4007 4008

  return pOperator;
H
Haojun Liao 已提交
4009

L
Liu Jicong 已提交
4010
_error:
H
Haojun Liao 已提交
4011 4012
  pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY;
  return NULL;
4013 4014
}

H
Haojun Liao 已提交
4015 4016
static SSDataBlock* doApplyIndefinitFunction(SOperatorInfo* pOperator) {
  SIndefOperatorInfo* pIndefInfo = pOperator->info;
4017
  SOptrBasicInfo*     pInfo = &pIndefInfo->binfo;
H
Haojun Liao 已提交
4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062

  SSDataBlock* pRes = pInfo->pRes;
  blockDataCleanup(pRes);

  SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
  if (pOperator->status == OP_EXEC_DONE) {
    return NULL;
  }

  int64_t st = 0;
  int32_t order = 0;
  int32_t scanFlag = 0;

  if (pOperator->cost.openCost == 0) {
    st = taosGetTimestampUs();
  }

  SOperatorInfo* downstream = pOperator->pDownstream[0];

  while (1) {
    // The downstream exec may change the value of the newgroup, so use a local variable instead.
    SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream);
    if (pBlock == NULL) {
      doSetOperatorCompleted(pOperator);
      break;
    }

    // the pDataBlock are always the same one, no need to call this again
    int32_t code = getTableScanInfo(pOperator->pDownstream[0], &order, &scanFlag);
    if (code != TSDB_CODE_SUCCESS) {
      longjmp(pTaskInfo->env, code);
    }

    // there is an scalar expression that needs to be calculated before apply the group aggregation.
    if (pIndefInfo->pScalarExpr != NULL) {
      code = projectApplyFunctions(pIndefInfo->pScalarExpr, pBlock, pBlock, pIndefInfo->pScalarCtx,
                                   pIndefInfo->numOfScalarExpr, pIndefInfo->pPseudoColInfo);
      if (code != TSDB_CODE_SUCCESS) {
        longjmp(pTaskInfo->env, code);
      }
    }

    setInputDataBlock(pOperator, pInfo->pCtx, pBlock, order, scanFlag, false);
    blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows);

4063 4064
    code = projectApplyFunctions(pOperator->pExpr, pInfo->pRes, pBlock, pInfo->pCtx, pOperator->numOfExprs,
                                 pIndefInfo->pPseudoColInfo);
H
Haojun Liao 已提交
4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079
    if (code != TSDB_CODE_SUCCESS) {
      longjmp(pTaskInfo->env, code);
    }
  }

  size_t rows = pInfo->pRes->info.rows;
  pOperator->resultInfo.totalRows += rows;

  if (pOperator->cost.openCost == 0) {
    pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0;
  }

  return (rows > 0) ? pInfo->pRes : NULL;
}

4080 4081
SOperatorInfo* createIndefinitOutputOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pNode,
                                                 SExecTaskInfo* pTaskInfo) {
H
Haojun Liao 已提交
4082
  SIndefOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SIndefOperatorInfo));
4083
  SOperatorInfo*      pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
H
Haojun Liao 已提交
4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095
  if (pInfo == NULL || pOperator == NULL) {
    goto _error;
  }

  SIndefRowsFuncPhysiNode* pPhyNode = (SIndefRowsFuncPhysiNode*)pNode;

  int32_t    numOfExpr = 0;
  SExprInfo* pExprInfo = createExprInfo(pPhyNode->pVectorFuncs, NULL, &numOfExpr);

  int32_t numOfScalarExpr = 0;
  if (pPhyNode->pExprs != NULL) {
    pInfo->pScalarExpr = createExprInfo(pPhyNode->pExprs, NULL, &numOfScalarExpr);
4096
    pInfo->pScalarCtx = createSqlFunctionCtx(pInfo->pScalarExpr, numOfScalarExpr, &pInfo->rowCellInfoOffset);
H
Haojun Liao 已提交
4097 4098
  }

4099 4100
  SSDataBlock* pResBlock = createResDataBlock(pPhyNode->node.pOutputDataBlockDesc);
  ;
H
Haojun Liao 已提交
4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114

  int32_t numOfRows = 4096;
  size_t  keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES;

  // Make sure the size of SSDataBlock will never exceed the size of 2MB.
  int32_t TWOMB = 2 * 1024 * 1024;
  if (numOfRows * pResBlock->info.rowSize > TWOMB) {
    numOfRows = TWOMB / pResBlock->info.rowSize;
  }
  initResultSizeInfo(pOperator, numOfRows);

  initAggInfo(&pInfo->binfo, &pInfo->aggSup, pExprInfo, numOfExpr, pResBlock, keyBufSize, pTaskInfo->id.str);
  setFunctionResultOutput(&pInfo->binfo, &pInfo->aggSup, MAIN_SCAN, numOfExpr, pTaskInfo);

4115 4116 4117
  pInfo->binfo.pRes = pResBlock;
  pInfo->numOfScalarExpr = numOfScalarExpr;
  pInfo->pPseudoColInfo = setRowTsColumnOutputInfo(pInfo->binfo.pCtx, numOfExpr);
H
Haojun Liao 已提交
4118

4119
  pOperator->name = "IndefinitOperator";
H
Haojun Liao 已提交
4120
  pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_PROJECT;
4121 4122 4123 4124 4125 4126
  pOperator->blocking = false;
  pOperator->status = OP_NOT_OPENED;
  pOperator->info = pInfo;
  pOperator->pExpr = pExprInfo;
  pOperator->numOfExprs = numOfExpr;
  pOperator->pTaskInfo = pTaskInfo;
H
Haojun Liao 已提交
4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137

  pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doApplyIndefinitFunction, NULL, NULL,
                                         destroyIndefinitOperatorInfo, NULL, NULL, NULL);

  int32_t code = appendDownstream(pOperator, &downstream, 1);
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

  return pOperator;

4138
_error:
H
Haojun Liao 已提交
4139 4140 4141 4142 4143 4144
  taosMemoryFree(pInfo);
  taosMemoryFree(pOperator);
  pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY;
  return NULL;
}

4145
static int32_t initFillInfo(SFillOperatorInfo* pInfo, SExprInfo* pExpr, int32_t numOfCols, SNodeListNode* pValNode,
L
Liu Jicong 已提交
4146
                            STimeWindow win, int32_t capacity, const char* id, SInterval* pInterval, int32_t fillType) {
4147
  SFillColInfo* pColInfo = createFillColInfo(pExpr, numOfCols, pValNode);
H
Haojun Liao 已提交
4148 4149

  STimeWindow w = TSWINDOW_INITIALIZER;
4150
  getAlignQueryTimeWindow(pInterval, pInterval->precision, win.skey, &w);
H
Haojun Liao 已提交
4151 4152

  int32_t order = TSDB_ORDER_ASC;
4153
  pInfo->pFillInfo = taosCreateFillInfo(order, w.skey, 0, capacity, numOfCols, pInterval, fillType, pColInfo, id);
H
Haojun Liao 已提交
4154

wafwerar's avatar
wafwerar 已提交
4155
  pInfo->p = taosMemoryCalloc(numOfCols, POINTER_BYTES);
H
Haojun Liao 已提交
4156
  if (pInfo->pFillInfo == NULL || pInfo->p == NULL) {
H
Haojun Liao 已提交
4157 4158
    taosMemoryFree(pInfo->pFillInfo);
    taosMemoryFree(pInfo->p);
H
Haojun Liao 已提交
4159 4160 4161 4162 4163 4164
    return TSDB_CODE_OUT_OF_MEMORY;
  } else {
    return TSDB_CODE_SUCCESS;
  }
}

L
Liu Jicong 已提交
4165
SOperatorInfo* createFillOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfCols,
X
Xiaoyu Wang 已提交
4166 4167 4168
                                      SInterval* pInterval, STimeWindow* pWindow, SSDataBlock* pResBlock,
                                      int32_t fillType, SNodeListNode* pValueNode, bool multigroupResult,
                                      SExecTaskInfo* pTaskInfo) {
wafwerar's avatar
wafwerar 已提交
4169
  SFillOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SFillOperatorInfo));
L
Liu Jicong 已提交
4170
  SOperatorInfo*     pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
H
Haojun Liao 已提交
4171

L
Liu Jicong 已提交
4172
  pInfo->pRes = pResBlock;
4173 4174
  pInfo->multigroupResult = multigroupResult;

4175 4176
  int32_t type = TSDB_FILL_NONE;
  switch (fillType) {
dengyihao's avatar
dengyihao 已提交
4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194
    case FILL_MODE_PREV:
      type = TSDB_FILL_PREV;
      break;
    case FILL_MODE_NONE:
      type = TSDB_FILL_NONE;
      break;
    case FILL_MODE_NULL:
      type = TSDB_FILL_NULL;
      break;
    case FILL_MODE_NEXT:
      type = TSDB_FILL_NEXT;
      break;
    case FILL_MODE_VALUE:
      type = TSDB_FILL_SET_VALUE;
      break;
    case FILL_MODE_LINEAR:
      type = TSDB_FILL_LINEAR;
      break;
4195 4196 4197 4198
    default:
      type = TSDB_FILL_NONE;
  }

H
Haojun Liao 已提交
4199
  SResultInfo* pResultInfo = &pOperator->resultInfo;
4200 4201
  initResultSizeInfo(pOperator, 4096);

X
Xiaoyu Wang 已提交
4202 4203
  int32_t code = initFillInfo(pInfo, pExpr, numOfCols, pValueNode, *pWindow, pResultInfo->capacity, pTaskInfo->id.str,
                              pInterval, type);
4204 4205 4206
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }
4207

dengyihao's avatar
dengyihao 已提交
4208
  pOperator->name = "FillOperator";
4209
  pOperator->blocking = false;
dengyihao's avatar
dengyihao 已提交
4210
  pOperator->status = OP_NOT_OPENED;
4211
  pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_FILL;
dengyihao's avatar
dengyihao 已提交
4212
  pOperator->pExpr = pExpr;
4213
  pOperator->numOfExprs = numOfCols;
dengyihao's avatar
dengyihao 已提交
4214
  pOperator->info = pInfo;
H
Haojun Liao 已提交
4215

L
Liu Jicong 已提交
4216 4217
  pOperator->fpSet =
      createOperatorFpSet(operatorDummyOpenFn, doFill, NULL, NULL, destroySFillOperatorInfo, NULL, NULL, NULL);
4218
  pOperator->pTaskInfo = pTaskInfo;
4219
  code = appendDownstream(pOperator, &downstream, 1);
4220
  return pOperator;
H
Haojun Liao 已提交
4221

L
Liu Jicong 已提交
4222
_error:
wafwerar's avatar
wafwerar 已提交
4223 4224
  taosMemoryFreeClear(pOperator);
  taosMemoryFreeClear(pInfo);
H
Haojun Liao 已提交
4225
  return NULL;
4226 4227
}

L
Liu Jicong 已提交
4228 4229
static SResSchema createResSchema(int32_t type, int32_t bytes, int32_t slotId, int32_t scale, int32_t precision,
                                  const char* name) {
H
Haojun Liao 已提交
4230
  SResSchema s = {0};
dengyihao's avatar
dengyihao 已提交
4231 4232 4233 4234
  s.scale = scale;
  s.type = type;
  s.bytes = bytes;
  s.slotId = slotId;
H
Haojun Liao 已提交
4235
  s.precision = precision;
H
Haojun Liao 已提交
4236 4237 4238 4239
  strncpy(s.name, name, tListLen(s.name));

  return s;
}
H
Haojun Liao 已提交
4240

4241
static SColumn* createColumn(int32_t blockId, int32_t slotId, int32_t colId, SDataType* pType) {
H
Haojun Liao 已提交
4242 4243 4244 4245 4246 4247
  SColumn* pCol = taosMemoryCalloc(1, sizeof(SColumn));
  if (pCol == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

dengyihao's avatar
dengyihao 已提交
4248
  pCol->slotId = slotId;
X
Xiaoyu Wang 已提交
4249 4250 4251 4252
  pCol->colId = colId;
  pCol->bytes = pType->bytes;
  pCol->type = pType->type;
  pCol->scale = pType->scale;
dengyihao's avatar
dengyihao 已提交
4253
  pCol->precision = pType->precision;
H
Haojun Liao 已提交
4254 4255 4256 4257 4258
  pCol->dataBlockId = blockId;

  return pCol;
}

H
Haojun Liao 已提交
4259
SExprInfo* createExprInfo(SNodeList* pNodeList, SNodeList* pGroupKeys, int32_t* numOfExprs) {
H
Haojun Liao 已提交
4260
  int32_t numOfFuncs = LIST_LENGTH(pNodeList);
H
Haojun Liao 已提交
4261 4262 4263 4264
  int32_t numOfGroupKeys = 0;
  if (pGroupKeys != NULL) {
    numOfGroupKeys = LIST_LENGTH(pGroupKeys);
  }
H
Haojun Liao 已提交
4265

H
Haojun Liao 已提交
4266
  *numOfExprs = numOfFuncs + numOfGroupKeys;
wafwerar's avatar
wafwerar 已提交
4267
  SExprInfo* pExprs = taosMemoryCalloc(*numOfExprs, sizeof(SExprInfo));
H
Haojun Liao 已提交
4268

L
Liu Jicong 已提交
4269
  for (int32_t i = 0; i < (*numOfExprs); ++i) {
H
Haojun Liao 已提交
4270 4271 4272 4273 4274 4275
    STargetNode* pTargetNode = NULL;
    if (i < numOfFuncs) {
      pTargetNode = (STargetNode*)nodesListGetNode(pNodeList, i);
    } else {
      pTargetNode = (STargetNode*)nodesListGetNode(pGroupKeys, i - numOfFuncs);
    }
H
Haojun Liao 已提交
4276

4277
    SExprInfo* pExp = &pExprs[i];
H
Haojun Liao 已提交
4278

wafwerar's avatar
wafwerar 已提交
4279
    pExp->pExpr = taosMemoryCalloc(1, sizeof(tExprNode));
H
Haojun Liao 已提交
4280
    pExp->pExpr->_function.num = 1;
H
Haojun Liao 已提交
4281
    pExp->pExpr->_function.functionId = -1;
H
Haojun Liao 已提交
4282

4283
    int32_t type = nodeType(pTargetNode->pExpr);
H
Haojun Liao 已提交
4284
    // it is a project query, or group by column
4285
    if (type == QUERY_NODE_COLUMN) {
H
Haojun Liao 已提交
4286
      pExp->pExpr->nodeType = QUERY_NODE_COLUMN;
L
Liu Jicong 已提交
4287
      SColumnNode* pColNode = (SColumnNode*)pTargetNode->pExpr;
H
Haojun Liao 已提交
4288

G
Ganlin Zhao 已提交
4289 4290 4291
      pExp->base.pParam = taosMemoryCalloc(1, sizeof(SFunctParam));
      pExp->base.numOfParams = 1;

H
Haojun Liao 已提交
4292
      SDataType* pType = &pColNode->node.resType;
dengyihao's avatar
dengyihao 已提交
4293 4294
      pExp->base.resSchema = createResSchema(pType->type, pType->bytes, pTargetNode->slotId, pType->scale,
                                             pType->precision, pColNode->colName);
4295
      pExp->base.pParam[0].pCol = createColumn(pColNode->dataBlockId, pColNode->slotId, pColNode->colId, pType);
H
Haojun Liao 已提交
4296
      pExp->base.pParam[0].type = FUNC_PARAM_TYPE_COLUMN;
4297
    } else if (type == QUERY_NODE_VALUE) {
4298 4299 4300 4301 4302 4303 4304
      pExp->pExpr->nodeType = QUERY_NODE_VALUE;
      SValueNode* pValNode = (SValueNode*)pTargetNode->pExpr;

      pExp->base.pParam = taosMemoryCalloc(1, sizeof(SFunctParam));
      pExp->base.numOfParams = 1;

      SDataType* pType = &pValNode->node.resType;
dengyihao's avatar
dengyihao 已提交
4305 4306
      pExp->base.resSchema = createResSchema(pType->type, pType->bytes, pTargetNode->slotId, pType->scale,
                                             pType->precision, pValNode->node.aliasName);
4307
      pExp->base.pParam[0].type = FUNC_PARAM_TYPE_VALUE;
X
Xiaoyu Wang 已提交
4308
      nodesValueNodeToVariant(pValNode, &pExp->base.pParam[0].param);
4309
    } else if (type == QUERY_NODE_FUNCTION) {
H
Haojun Liao 已提交
4310
      pExp->pExpr->nodeType = QUERY_NODE_FUNCTION;
H
Haojun Liao 已提交
4311 4312 4313
      SFunctionNode* pFuncNode = (SFunctionNode*)pTargetNode->pExpr;

      SDataType* pType = &pFuncNode->node.resType;
dengyihao's avatar
dengyihao 已提交
4314 4315
      pExp->base.resSchema = createResSchema(pType->type, pType->bytes, pTargetNode->slotId, pType->scale,
                                             pType->precision, pFuncNode->node.aliasName);
H
Haojun Liao 已提交
4316

H
Haojun Liao 已提交
4317
      pExp->pExpr->_function.functionId = pFuncNode->funcId;
H
Haojun Liao 已提交
4318
      pExp->pExpr->_function.pFunctNode = pFuncNode;
4319

dengyihao's avatar
dengyihao 已提交
4320 4321
      strncpy(pExp->pExpr->_function.functionName, pFuncNode->functionName,
              tListLen(pExp->pExpr->_function.functionName));
4322 4323 4324 4325 4326
#if 1
      // todo refactor: add the parameter for tbname function
      if (strcmp(pExp->pExpr->_function.functionName, "tbname") == 0) {
        pFuncNode->pParameterList = nodesMakeList();
        ASSERT(LIST_LENGTH(pFuncNode->pParameterList) == 0);
X
Xiaoyu Wang 已提交
4327 4328
        SValueNode* res = (SValueNode*)nodesMakeNode(QUERY_NODE_VALUE);
        if (NULL == res) {  // todo handle error
4329
        } else {
X
Xiaoyu Wang 已提交
4330
          res->node.resType = (SDataType){.bytes = sizeof(int64_t), .type = TSDB_DATA_TYPE_BIGINT};
4331
          nodesListAppend(pFuncNode->pParameterList, (SNode*)res);
4332 4333 4334
        }
      }
#endif
H
Haojun Liao 已提交
4335 4336

      int32_t numOfParam = LIST_LENGTH(pFuncNode->pParameterList);
G
Ganlin Zhao 已提交
4337 4338 4339 4340

      pExp->base.pParam = taosMemoryCalloc(numOfParam, sizeof(SFunctParam));
      pExp->base.numOfParams = numOfParam;

H
Haojun Liao 已提交
4341
      for (int32_t j = 0; j < numOfParam; ++j) {
4342
        SNode* p1 = nodesListGetNode(pFuncNode->pParameterList, j);
G
Ganlin Zhao 已提交
4343
        if (p1->type == QUERY_NODE_COLUMN) {
dengyihao's avatar
dengyihao 已提交
4344
          SColumnNode* pcn = (SColumnNode*)p1;
G
Ganlin Zhao 已提交
4345 4346

          pExp->base.pParam[j].type = FUNC_PARAM_TYPE_COLUMN;
4347
          pExp->base.pParam[j].pCol = createColumn(pcn->dataBlockId, pcn->slotId, pcn->colId, &pcn->node.resType);
G
Ganlin Zhao 已提交
4348 4349 4350
        } else if (p1->type == QUERY_NODE_VALUE) {
          SValueNode* pvn = (SValueNode*)p1;
          pExp->base.pParam[j].type = FUNC_PARAM_TYPE_VALUE;
X
Xiaoyu Wang 已提交
4351
          nodesValueNodeToVariant(pvn, &pExp->base.pParam[j].param);
G
Ganlin Zhao 已提交
4352
        }
H
Haojun Liao 已提交
4353
      }
4354
    } else if (type == QUERY_NODE_OPERATOR) {
H
Haojun Liao 已提交
4355
      pExp->pExpr->nodeType = QUERY_NODE_OPERATOR;
L
Liu Jicong 已提交
4356
      SOperatorNode* pNode = (SOperatorNode*)pTargetNode->pExpr;
4357

G
Ganlin Zhao 已提交
4358 4359 4360
      pExp->base.pParam = taosMemoryCalloc(1, sizeof(SFunctParam));
      pExp->base.numOfParams = 1;

4361
      SDataType* pType = &pNode->node.resType;
dengyihao's avatar
dengyihao 已提交
4362 4363
      pExp->base.resSchema = createResSchema(pType->type, pType->bytes, pTargetNode->slotId, pType->scale,
                                             pType->precision, pNode->node.aliasName);
4364 4365 4366
      pExp->pExpr->_optrRoot.pRootNode = pTargetNode->pExpr;
    } else {
      ASSERT(0);
H
Haojun Liao 已提交
4367 4368 4369
    }
  }

H
Haojun Liao 已提交
4370
  return pExprs;
H
Haojun Liao 已提交
4371 4372
}

D
dapan1121 已提交
4373
static SExecTaskInfo* createExecTaskInfo(uint64_t queryId, uint64_t taskId, EOPTR_EXEC_MODEL model, char* dbFName) {
wafwerar's avatar
wafwerar 已提交
4374
  SExecTaskInfo* pTaskInfo = taosMemoryCalloc(1, sizeof(SExecTaskInfo));
4375
  setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED);
H
Haojun Liao 已提交
4376

D
dapan1121 已提交
4377
  pTaskInfo->schemaVer.dbname = strdup(dbFName);
4378
  pTaskInfo->cost.created = taosGetTimestampMs();
H
Haojun Liao 已提交
4379
  pTaskInfo->id.queryId = queryId;
dengyihao's avatar
dengyihao 已提交
4380
  pTaskInfo->execModel = model;
H
Haojun Liao 已提交
4381

wafwerar's avatar
wafwerar 已提交
4382
  char* p = taosMemoryCalloc(1, 128);
L
Liu Jicong 已提交
4383
  snprintf(p, 128, "TID:0x%" PRIx64 " QID:0x%" PRIx64, taskId, queryId);
H
Haojun Liao 已提交
4384
  pTaskInfo->id.str = p;
H
Haojun Liao 已提交
4385

4386 4387
  return pTaskInfo;
}
H
Haojun Liao 已提交
4388

H
Hongze Cheng 已提交
4389 4390 4391
static STsdbReader* doCreateDataReader(STableScanPhysiNode* pTableScanNode, SReadHandle* pHandle,
                                       STableListInfo* pTableListInfo, uint64_t queryId, uint64_t taskId,
                                       SNode* pTagCond);
H
Haojun Liao 已提交
4392

H
Haojun Liao 已提交
4393
static SArray* extractColumnInfo(SNodeList* pNodeList);
4394

4395
static SArray* createSortInfo(SNodeList* pNodeList);
4396

D
dapan1121 已提交
4397
int32_t extractTableSchemaVersion(SReadHandle* pHandle, uint64_t uid, SExecTaskInfo* pTaskInfo) {
4398 4399
  SMetaReader mr = {0};
  metaReaderInit(&mr, pHandle->meta, 0);
D
dapan1121 已提交
4400 4401 4402 4403 4404
  int32_t code = metaGetTableEntryByUid(&mr, uid);
  if (code) {
    metaReaderClear(&mr);
    return code;
  }
4405 4406 4407 4408

  pTaskInfo->schemaVer.tablename = strdup(mr.me.name);

  if (mr.me.type == TSDB_SUPER_TABLE) {
4409 4410
    pTaskInfo->schemaVer.sversion = mr.me.stbEntry.schemaRow.version;
    pTaskInfo->schemaVer.tversion = mr.me.stbEntry.schemaTag.version;
4411 4412 4413
  } else if (mr.me.type == TSDB_CHILD_TABLE) {
    tb_uid_t suid = mr.me.ctbEntry.suid;
    metaGetTableEntryByUid(&mr, suid);
4414 4415
    pTaskInfo->schemaVer.sversion = mr.me.stbEntry.schemaRow.version;
    pTaskInfo->schemaVer.tversion = mr.me.stbEntry.schemaTag.version;
4416
  } else {
4417
    pTaskInfo->schemaVer.sversion = mr.me.ntbEntry.schemaRow.version;
4418
  }
4419 4420

  metaReaderClear(&mr);
D
dapan1121 已提交
4421 4422

  return TSDB_CODE_SUCCESS;
4423 4424
}

X
Xiaoyu Wang 已提交
4425 4426
int32_t generateGroupIdMap(STableListInfo* pTableListInfo, SReadHandle* pHandle, SArray* groupKey) {
  if (groupKey == NULL) {
wmmhello's avatar
wmmhello 已提交
4427 4428 4429 4430 4431 4432 4433 4434
    return TDB_CODE_SUCCESS;
  }

  pTableListInfo->map = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK);
  if (pTableListInfo->map == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }
  int32_t keyLen = 0;
X
Xiaoyu Wang 已提交
4435
  void*   keyBuf = NULL;
wmmhello's avatar
wmmhello 已提交
4436 4437 4438
  int32_t numOfGroupCols = taosArrayGetSize(groupKey);
  for (int32_t j = 0; j < numOfGroupCols; ++j) {
    SColumn* pCol = taosArrayGet(groupKey, j);
X
Xiaoyu Wang 已提交
4439
    keyLen += pCol->bytes;  // actual data + null_flag
wmmhello's avatar
wmmhello 已提交
4440 4441 4442 4443 4444 4445 4446 4447 4448 4449
  }

  int32_t nullFlagSize = sizeof(int8_t) * numOfGroupCols;
  keyLen += nullFlagSize;

  keyBuf = taosMemoryCalloc(1, keyLen);
  if (keyBuf == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }

X
Xiaoyu Wang 已提交
4450 4451 4452
  for (int32_t i = 0; i < taosArrayGetSize(pTableListInfo->pTableList); i++) {
    STableKeyInfo* info = taosArrayGet(pTableListInfo->pTableList, i);
    SMetaReader    mr = {0};
wmmhello's avatar
wmmhello 已提交
4453 4454 4455 4456 4457 4458 4459 4460
    metaReaderInit(&mr, pHandle->meta, 0);
    metaGetTableEntryByUid(&mr, info->uid);

    char* isNull = (char*)keyBuf;
    char* pStart = (char*)keyBuf + sizeof(int8_t) * numOfGroupCols;
    for (int32_t j = 0; j < numOfGroupCols; ++j) {
      SColumn* pCol = taosArrayGet(groupKey, j);

X
Xiaoyu Wang 已提交
4461
      if (strcmp(pCol->name, "tbname") == 0) {
wmmhello's avatar
wmmhello 已提交
4462 4463 4464
        isNull[i] = 0;
        memcpy(pStart, mr.me.name, strlen(mr.me.name));
        pStart += strlen(mr.me.name);
X
Xiaoyu Wang 已提交
4465
      } else {
wmmhello's avatar
wmmhello 已提交
4466 4467 4468
        STagVal tagVal = {0};
        tagVal.cid = pCol->colId;
        const char* p = metaGetTableTagVal(&mr.me, pCol->type, &tagVal);
X
Xiaoyu Wang 已提交
4469
        if (p == NULL) {
wmmhello's avatar
wmmhello 已提交
4470 4471 4472 4473 4474
          isNull[j] = 1;
          continue;
        }
        isNull[i] = 0;
        if (pCol->type == TSDB_DATA_TYPE_JSON) {
X
Xiaoyu Wang 已提交
4475 4476 4477
          //          int32_t dataLen = getJsonValueLen(pkey->pData);
          //          memcpy(pStart, (pkey->pData), dataLen);
          //          pStart += dataLen;
wmmhello's avatar
wmmhello 已提交
4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488
        } else if (IS_VAR_DATA_TYPE(pCol->type)) {
          memcpy(pStart, tagVal.pData, tagVal.nData);
          pStart += tagVal.nData;
          ASSERT(tagVal.nData <= pCol->bytes);
        } else {
          memcpy(pStart, &(tagVal.i64), pCol->bytes);
          pStart += pCol->bytes;
        }
      }
    }

X
Xiaoyu Wang 已提交
4489
    int32_t   len = (int32_t)(pStart - (char*)keyBuf);
wmmhello's avatar
wmmhello 已提交
4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503
    uint64_t* groupId = taosHashGet(pTableListInfo->map, keyBuf, len);
    if (groupId) {
      taosHashPut(pTableListInfo->map, &(info->uid), sizeof(uint64_t), groupId, sizeof(uint64_t));
    } else {
      uint64_t tmpId = calcGroupId(keyBuf, len);
      taosHashPut(pTableListInfo->map, &(info->uid), sizeof(uint64_t), &tmpId, sizeof(uint64_t));
    }

    metaReaderClear(&mr);
  }
  taosMemoryFree(keyBuf);
  return TDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
4504
SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle,
wmmhello's avatar
wmmhello 已提交
4505
                                  uint64_t queryId, uint64_t taskId, STableListInfo* pTableListInfo, SNode* pTagCond) {
4506 4507
  int32_t type = nodeType(pPhyNode);

X
Xiaoyu Wang 已提交
4508
  if (pPhyNode->pChildren == NULL || LIST_LENGTH(pPhyNode->pChildren) == 0) {
H
Haojun Liao 已提交
4509
    if (QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN == type) {
dengyihao's avatar
dengyihao 已提交
4510
      STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode;
H
Haojun Liao 已提交
4511

H
Hongze Cheng 已提交
4512
      STsdbReader* pDataReader =
dengyihao's avatar
dengyihao 已提交
4513
          doCreateDataReader(pTableScanNode, pHandle, pTableListInfo, (uint64_t)queryId, taskId, pTagCond);
4514
      if (pDataReader == NULL && terrno != 0) {
4515
        pTaskInfo->code = terrno;
4516 4517
        return NULL;
      }
wmmhello's avatar
wmmhello 已提交
4518

D
dapan1121 已提交
4519 4520
      int32_t code = extractTableSchemaVersion(pHandle, pTableScanNode->scan.uid, pTaskInfo);
      if (code) {
H
refact  
Hongze Cheng 已提交
4521
        tsdbReaderClose(pDataReader);
4522
        pTaskInfo->code = terrno;
D
dapan1121 已提交
4523 4524
        return NULL;
      }
wmmhello's avatar
wmmhello 已提交
4525

X
Xiaoyu Wang 已提交
4526 4527
      SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionTags);
      code = generateGroupIdMap(pTableListInfo, pHandle, groupKeys);  // todo for json
4528
      taosArrayDestroy(groupKeys);
X
Xiaoyu Wang 已提交
4529
      if (code) {
H
refact  
Hongze Cheng 已提交
4530
        tsdbReaderClose(pDataReader);
4531
        pTaskInfo->code = terrno;
wmmhello's avatar
wmmhello 已提交
4532 4533 4534
        return NULL;
      }

X
Xiaoyu Wang 已提交
4535
      SOperatorInfo*  pOperator = createTableScanOperatorInfo(pTableScanNode, pDataReader, pHandle, pTaskInfo);
4536 4537
      STableScanInfo* pScanInfo = pOperator->info;
      pTaskInfo->cost.pRecoder = &pScanInfo->readRecorder;
S
slzhou 已提交
4538 4539 4540
      return pOperator;
    } else if (QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN == type) {
      STableMergeScanPhysiNode* pTableScanNode = (STableMergeScanPhysiNode*)pPhyNode;
4541

S
shenglian zhou 已提交
4542 4543 4544
      SArray* dataReaders = taosArrayInit(8, POINTER_BYTES);
      createMultipleDataReaders(pTableScanNode, pHandle, pTableListInfo, dataReaders, queryId, taskId, pTagCond);
      extractTableSchemaVersion(pHandle, pTableScanNode->scan.uid, pTaskInfo);
X
Xiaoyu Wang 已提交
4545
      SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionTags);
X
Xiaoyu Wang 已提交
4546
      generateGroupIdMap(pTableListInfo, pHandle, groupKeys);  // todo for json
4547
      taosArrayDestroy(groupKeys);
X
Xiaoyu Wang 已提交
4548
      SOperatorInfo*  pOperator = createTableMergeScanOperatorInfo(pTableScanNode, dataReaders, pHandle, pTaskInfo);
4549 4550 4551
      STableScanInfo* pScanInfo = pOperator->info;
      pTaskInfo->cost.pRecoder = &pScanInfo->readRecorder;
      return pOperator;
H
Haojun Liao 已提交
4552
    } else if (QUERY_NODE_PHYSICAL_PLAN_EXCHANGE == type) {
4553
      return createExchangeOperatorInfo(pHandle->pMsgCb->clientRpc, (SExchangePhysiNode*)pPhyNode, pTaskInfo);
H
Haojun Liao 已提交
4554
    } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN == type) {
X
Xiaoyu Wang 已提交
4555
      SScanPhysiNode*      pScanPhyNode = (SScanPhysiNode*)pPhyNode;  // simple child table.
5
54liuyao 已提交
4556
      STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode;
4557
      STimeWindowAggSupp   twSup = {
S
shenglian zhou 已提交
4558
            .waterMark = pTableScanNode->watermark, .calTrigger = pTableScanNode->triggerType, .maxTs = INT64_MIN};
H
Hongze Cheng 已提交
4559
      STsdbReader* pDataReader = NULL;
5
54liuyao 已提交
4560
      if (pHandle->vnode) {
wmmhello's avatar
wmmhello 已提交
4561
        pDataReader = doCreateDataReader(pTableScanNode, pHandle, pTableListInfo, (uint64_t)queryId, taskId, pTagCond);
wmmhello's avatar
wmmhello 已提交
4562 4563
      } else {
        getTableList(pHandle->meta, pScanPhyNode->tableType, pScanPhyNode->uid, pTableListInfo, pTagCond);
5
54liuyao 已提交
4564
      }
4565

5
54liuyao 已提交
4566
      if (pDataReader == NULL && terrno != 0) {
4567
        qDebug("%s pDataReader is NULL", GET_TASKID(pTaskInfo));
5
54liuyao 已提交
4568 4569
        // return NULL;
      } else {
4570
        qDebug("%s pDataReader is not NULL", GET_TASKID(pTaskInfo));
5
54liuyao 已提交
4571
      }
4572

X
Xiaoyu Wang 已提交
4573 4574
      SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionTags);
      int32_t code = generateGroupIdMap(pTableListInfo, pHandle, groupKeys);  // todo for json
4575
      taosArrayDestroy(groupKeys);
X
Xiaoyu Wang 已提交
4576
      if (code) {
H
refact  
Hongze Cheng 已提交
4577
        tsdbReaderClose(pDataReader);
4578 4579
        return NULL;
      }
4580

X
Xiaoyu Wang 已提交
4581
      SOperatorInfo* pOperator = createStreamScanOperatorInfo(pDataReader, pHandle, pTableScanNode, pTaskInfo, &twSup);
4582

H
Haojun Liao 已提交
4583
      return pOperator;
H
Haojun Liao 已提交
4584
    } else if (QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN == type) {
L
Liu Jicong 已提交
4585
      SSystemTableScanPhysiNode* pSysScanPhyNode = (SSystemTableScanPhysiNode*)pPhyNode;
4586
      return createSysTableScanOperatorInfo(pHandle, pSysScanPhyNode, pTaskInfo);
4587
    } else if (QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN == type) {
X
Xiaoyu Wang 已提交
4588
      STagScanPhysiNode* pScanPhyNode = (STagScanPhysiNode*)pPhyNode;
4589

X
Xiaoyu Wang 已提交
4590 4591
      int32_t code = getTableList(pHandle->meta, pScanPhyNode->tableType, pScanPhyNode->uid, pTableListInfo,
                                  pScanPhyNode->node.pConditions);
4592 4593 4594 4595
      if (code != TSDB_CODE_SUCCESS) {
        return NULL;
      }

4596
      return createTagScanOperatorInfo(pHandle, pScanPhyNode, pTableListInfo, pTaskInfo);
H
Haojun Liao 已提交
4597 4598
    } else {
      ASSERT(0);
H
Haojun Liao 已提交
4599 4600 4601
    }
  }

4602 4603
  int32_t num = 0;
  size_t  size = LIST_LENGTH(pPhyNode->pChildren);
H
Haojun Liao 已提交
4604

4605
  SOperatorInfo** ops = taosMemoryCalloc(size, POINTER_BYTES);
dengyihao's avatar
dengyihao 已提交
4606
  for (int32_t i = 0; i < size; ++i) {
4607
    SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, i);
wmmhello's avatar
wmmhello 已提交
4608
    ops[i] = createOperatorTree(pChildNode, pTaskInfo, pHandle, queryId, taskId, pTableListInfo, pTagCond);
4609 4610 4611
    if (ops[i] == NULL) {
      return NULL;
    }
4612
  }
H
Haojun Liao 已提交
4613

4614
  SOperatorInfo* pOptr = NULL;
H
Haojun Liao 已提交
4615
  if (QUERY_NODE_PHYSICAL_PLAN_PROJECT == type) {
dengyihao's avatar
dengyihao 已提交
4616 4617
    SProjectPhysiNode* pProjPhyNode = (SProjectPhysiNode*)pPhyNode;
    SExprInfo*         pExprInfo = createExprInfo(pProjPhyNode->pProjections, NULL, &num);
H
Haojun Liao 已提交
4618

4619
    SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);
dengyihao's avatar
dengyihao 已提交
4620 4621
    SLimit       limit = {.limit = pProjPhyNode->limit, .offset = pProjPhyNode->offset};
    SLimit       slimit = {.limit = pProjPhyNode->slimit, .offset = pProjPhyNode->soffset};
X
Xiaoyu Wang 已提交
4622 4623
    pOptr = createProjectOperatorInfo(ops[0], pExprInfo, num, pResBlock, &limit, &slimit,
                                      pProjPhyNode->node.pConditions, pTaskInfo);
4624
  } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_AGG == type) {
H
Haojun Liao 已提交
4625 4626
    SAggPhysiNode* pAggNode = (SAggPhysiNode*)pPhyNode;
    SExprInfo*     pExprInfo = createExprInfo(pAggNode->pAggFuncs, pAggNode->pGroupKeys, &num);
4627
    SSDataBlock*   pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);
H
Haojun Liao 已提交
4628

dengyihao's avatar
dengyihao 已提交
4629
    int32_t    numOfScalarExpr = 0;
4630 4631 4632 4633 4634
    SExprInfo* pScalarExprInfo = NULL;
    if (pAggNode->pExprs != NULL) {
      pScalarExprInfo = createExprInfo(pAggNode->pExprs, NULL, &numOfScalarExpr);
    }

H
Haojun Liao 已提交
4635 4636
    if (pAggNode->pGroupKeys != NULL) {
      SArray* pColList = extractColumnInfo(pAggNode->pGroupKeys);
dengyihao's avatar
dengyihao 已提交
4637
      pOptr = createGroupOperatorInfo(ops[0], pExprInfo, num, pResBlock, pColList, pAggNode->node.pConditions,
wmmhello's avatar
wmmhello 已提交
4638
                                      pScalarExprInfo, numOfScalarExpr, pTaskInfo);
H
Haojun Liao 已提交
4639
    } else {
dengyihao's avatar
dengyihao 已提交
4640 4641
      pOptr =
          createAggregateOperatorInfo(ops[0], pExprInfo, num, pResBlock, pScalarExprInfo, numOfScalarExpr, pTaskInfo);
H
Haojun Liao 已提交
4642
    }
X
Xiaoyu Wang 已提交
4643
  } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL == type || QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) {
H
Haojun Liao 已提交
4644
    SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode;
H
Haojun Liao 已提交
4645

H
Haojun Liao 已提交
4646
    SExprInfo*   pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &num);
4647
    SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);
H
Haojun Liao 已提交
4648

dengyihao's avatar
dengyihao 已提交
4649 4650 4651 4652 4653 4654
    SInterval interval = {.interval = pIntervalPhyNode->interval,
                          .sliding = pIntervalPhyNode->sliding,
                          .intervalUnit = pIntervalPhyNode->intervalUnit,
                          .slidingUnit = pIntervalPhyNode->slidingUnit,
                          .offset = pIntervalPhyNode->offset,
                          .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision};
H
Haojun Liao 已提交
4655

X
Xiaoyu Wang 已提交
4656 4657 4658 4659 4660
    STimeWindowAggSupp as = {
        .waterMark = pIntervalPhyNode->window.watermark,
        .calTrigger = pIntervalPhyNode->window.triggerType,
        .maxTs = INT64_MIN,
    };
4661
    ASSERT(as.calTrigger != STREAM_TRIGGER_MAX_DELAY);
4662

4663
    int32_t tsSlotId = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId;
L
Liu Jicong 已提交
4664 4665 4666
    bool    isStream = (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type);
    pOptr =
        createIntervalOperatorInfo(ops[0], pExprInfo, num, pResBlock, &interval, tsSlotId, &as, pTaskInfo, isStream);
4667

S
shenglian zhou 已提交
4668
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL == type) {
X
Xiaoyu Wang 已提交
4669
    SMergeIntervalPhysiNode* pIntervalPhyNode = (SMergeIntervalPhysiNode*)pPhyNode;
S
shenglian zhou 已提交
4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682

    SExprInfo*   pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &num);
    SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);

    SInterval interval = {.interval = pIntervalPhyNode->interval,
                          .sliding = pIntervalPhyNode->sliding,
                          .intervalUnit = pIntervalPhyNode->intervalUnit,
                          .slidingUnit = pIntervalPhyNode->slidingUnit,
                          .offset = pIntervalPhyNode->offset,
                          .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision};

    int32_t tsSlotId = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId;
    pOptr = createMergeIntervalOperatorInfo(ops[0], pExprInfo, num, pResBlock, &interval, tsSlotId, pTaskInfo);
5
54liuyao 已提交
4683
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL == type) {
4684
    int32_t children = 0;
5
54liuyao 已提交
4685 4686
    pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL == type) {
4687
    int32_t children = 1;
5
54liuyao 已提交
4688
    pOptr = createStreamFinalIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
H
Haojun Liao 已提交
4689
  } else if (QUERY_NODE_PHYSICAL_PLAN_SORT == type) {
H
Haojun Liao 已提交
4690
    SSortPhysiNode* pSortPhyNode = (SSortPhysiNode*)pPhyNode;
H
Haojun Liao 已提交
4691

4692 4693 4694
    SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc;

    SSDataBlock* pResBlock = createResDataBlock(pDescNode);
4695
    SArray*      info = createSortInfo(pSortPhyNode->pSortKeys);
4696 4697

    int32_t    numOfCols = 0;
4698 4699 4700
    SExprInfo* pExprInfo = createExprInfo(pSortPhyNode->pExprs, NULL, &numOfCols);

    int32_t numOfOutputCols = 0;
X
Xiaoyu Wang 已提交
4701 4702
    SArray* pColList =
        extractColMatchInfo(pSortPhyNode->pTargets, pDescNode, &numOfOutputCols, pTaskInfo, COL_MATCH_FROM_SLOT_ID);
4703

4704
    pOptr = createSortOperatorInfo(ops[0], pResBlock, info, pExprInfo, numOfCols, pColList, pTaskInfo);
X
Xiaoyu Wang 已提交
4705
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE == type) {
4706 4707 4708
    SMergePhysiNode* pMergePhyNode = (SMergePhysiNode*)pPhyNode;

    SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc;
X
Xiaoyu Wang 已提交
4709
    SSDataBlock*        pResBlock = createResDataBlock(pDescNode);
4710

X
Xiaoyu Wang 已提交
4711
    SArray* sortInfo = createSortInfo(pMergePhyNode->pMergeKeys);
4712
    int32_t numOfOutputCols = 0;
X
Xiaoyu Wang 已提交
4713 4714
    SArray* pColList =
        extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, pTaskInfo, COL_MATCH_FROM_SLOT_ID);
S
shenglian zhou 已提交
4715
    SPhysiNode*  pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0);
4716 4717
    SSDataBlock* pInputDataBlock = createResDataBlock(pChildNode->pOutputDataBlockDesc);
    pOptr = createMultiwaySortMergeOperatorInfo(ops, size, pInputDataBlock, pResBlock, sortInfo, pColList, pTaskInfo);
4718
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION == type) {
H
Haojun Liao 已提交
4719 4720
    SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode;

X
Xiaoyu Wang 已提交
4721 4722
    STimeWindowAggSupp as = {.waterMark = pSessionNode->window.watermark,
                             .calTrigger = pSessionNode->window.triggerType};
4723

H
Haojun Liao 已提交
4724
    SExprInfo*   pExprInfo = createExprInfo(pSessionNode->window.pFuncs, NULL, &num);
4725
    SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);
4726 4727
    int32_t      tsSlotId = ((SColumnNode*)pSessionNode->window.pTspk)->slotId;

X
Xiaoyu Wang 已提交
4728 4729
    pOptr =
        createSessionAggOperatorInfo(ops[0], pExprInfo, num, pResBlock, pSessionNode->gap, tsSlotId, &as, pTaskInfo);
4730
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION == type) {
5
54liuyao 已提交
4731 4732 4733 4734 4735 4736 4737 4738 4739
    SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode;

    STimeWindowAggSupp as = {.waterMark = pSessionNode->window.watermark,
                             .calTrigger = pSessionNode->window.triggerType};

    SExprInfo*   pExprInfo = createExprInfo(pSessionNode->window.pFuncs, NULL, &num);
    SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);
    int32_t      tsSlotId = ((SColumnNode*)pSessionNode->window.pTspk)->slotId;

4740 4741
    pOptr = createStreamSessionAggOperatorInfo(ops[0], pExprInfo, num, pResBlock, pSessionNode->gap, tsSlotId, &as,
                                               pTaskInfo);
5
54liuyao 已提交
4742

H
Haojun Liao 已提交
4743
  } else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) {
4744
    pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo);
4745
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE == type) {
dengyihao's avatar
dengyihao 已提交
4746
    SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode;
4747

4748 4749
    STimeWindowAggSupp as = {.waterMark = pStateNode->window.watermark, .calTrigger = pStateNode->window.triggerType};

dengyihao's avatar
dengyihao 已提交
4750
    SExprInfo*   pExprInfo = createExprInfo(pStateNode->window.pFuncs, NULL, &num);
4751
    SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);
4752 4753
    int32_t      tsSlotId = ((SColumnNode*)pStateNode->window.pTspk)->slotId;

4754
    SColumnNode* pColNode = (SColumnNode*)((STargetNode*)pStateNode->pStateKey)->pExpr;
X
Xiaoyu Wang 已提交
4755
    SColumn      col = extractColumnFromColumnNode(pColNode);
4756
    pOptr = createStatewindowOperatorInfo(ops[0], pExprInfo, num, pResBlock, &as, tsSlotId, &col, pTaskInfo);
4757
  } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) {
5
54liuyao 已提交
4758
    pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo);
4759
  } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) {
dengyihao's avatar
dengyihao 已提交
4760 4761
    SJoinPhysiNode* pJoinNode = (SJoinPhysiNode*)pPhyNode;
    SSDataBlock*    pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);
4762 4763

    SExprInfo* pExprInfo = createExprInfo(pJoinNode->pTargets, NULL, &num);
H
Haojun Liao 已提交
4764
    pOptr = createMergeJoinOperatorInfo(ops, size, pExprInfo, num, pResBlock, pJoinNode->pOnConditions, pTaskInfo);
4765 4766
  } else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) {
    SFillPhysiNode* pFillNode = (SFillPhysiNode*)pPhyNode;
X
Xiaoyu Wang 已提交
4767 4768
    SSDataBlock*    pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc);
    SExprInfo*      pExprInfo = createExprInfo(pFillNode->pTargets, NULL, &num);
4769

4770
    SInterval* pInterval = &((SIntervalAggOperatorInfo*)ops[0]->info)->interval;
X
Xiaoyu Wang 已提交
4771 4772
    pOptr = createFillOperatorInfo(ops[0], pExprInfo, num, pInterval, &pFillNode->timeRange, pResBlock, pFillNode->mode,
                                   (SNodeListNode*)pFillNode->pValues, false, pTaskInfo);
H
Haojun Liao 已提交
4773 4774
  } else if (QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC == type) {
    pOptr = createIndefinitOutputOperatorInfo(ops[0], pPhyNode, pTaskInfo);
H
Haojun Liao 已提交
4775 4776
  } else {
    ASSERT(0);
H
Haojun Liao 已提交
4777
  }
4778 4779 4780

  taosMemoryFree(ops);
  return pOptr;
4781
}
H
Haojun Liao 已提交
4782

4783
int32_t compareTimeWindow(const void* p1, const void* p2, const void* param) {
dengyihao's avatar
dengyihao 已提交
4784 4785 4786
  const SQueryTableDataCond* pCond = param;
  const STimeWindow*         pWin1 = p1;
  const STimeWindow*         pWin2 = p2;
4787 4788 4789 4790 4791 4792 4793 4794
  if (pCond->order == TSDB_ORDER_ASC) {
    return pWin1->skey - pWin2->skey;
  } else if (pCond->order == TSDB_ORDER_DESC) {
    return pWin2->skey - pWin1->skey;
  }
  return 0;
}

4795
int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode) {
4796 4797 4798 4799 4800 4801
  pCond->loadExternalRows = false;

  pCond->order = pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
  pCond->numOfCols = LIST_LENGTH(pTableScanNode->scan.pScanCols);
  pCond->colList = taosMemoryCalloc(pCond->numOfCols, sizeof(SColumnInfo));
  if (pCond->colList == NULL) {
H
Haojun Liao 已提交
4802
    terrno = TSDB_CODE_QRY_OUT_OF_MEMORY;
4803 4804 4805
    return terrno;
  }

dengyihao's avatar
dengyihao 已提交
4806 4807
  // pCond->twindow = pTableScanNode->scanRange;
  // TODO: get it from stable scan node
4808 4809 4810
  pCond->numOfTWindows = 1;
  pCond->twindows = taosMemoryCalloc(pCond->numOfTWindows, sizeof(STimeWindow));
  pCond->twindows[0] = pTableScanNode->scanRange;
H
more  
Hongze Cheng 已提交
4811
  pCond->suid = pTableScanNode->scan.suid;
4812 4813

#if 1
L
Liu Jicong 已提交
4814
  // todo work around a problem, remove it later
4815 4816 4817 4818 4819
  for (int32_t i = 0; i < pCond->numOfTWindows; ++i) {
    if ((pCond->order == TSDB_ORDER_ASC && pCond->twindows[i].skey > pCond->twindows[i].ekey) ||
        (pCond->order == TSDB_ORDER_DESC && pCond->twindows[i].skey < pCond->twindows[i].ekey)) {
      TSWAP(pCond->twindows[i].skey, pCond->twindows[i].ekey);
    }
H
Haojun Liao 已提交
4820
  }
4821
#endif
H
Haojun Liao 已提交
4822

4823 4824 4825 4826 4827 4828
  for (int32_t i = 0; i < pCond->numOfTWindows; ++i) {
    if ((pCond->order == TSDB_ORDER_ASC && pCond->twindows[i].skey > pCond->twindows[i].ekey) ||
        (pCond->order == TSDB_ORDER_DESC && pCond->twindows[i].skey < pCond->twindows[i].ekey)) {
      TSWAP(pCond->twindows[i].skey, pCond->twindows[i].ekey);
    }
  }
dengyihao's avatar
dengyihao 已提交
4829
  taosqsort(pCond->twindows, pCond->numOfTWindows, sizeof(STimeWindow), pCond, compareTimeWindow);
4830

4831 4832
  pCond->type = BLOCK_LOAD_OFFSET_SEQ_ORDER;
  //  pCond->type = pTableScanNode->scanFlag;
H
Haojun Liao 已提交
4833

H
Haojun Liao 已提交
4834
  int32_t j = 0;
4835
  for (int32_t i = 0; i < pCond->numOfCols; ++i) {
H
Haojun Liao 已提交
4836 4837
    STargetNode* pNode = (STargetNode*)nodesListGetNode(pTableScanNode->scan.pScanCols, i);
    SColumnNode* pColNode = (SColumnNode*)pNode->pExpr;
H
Haojun Liao 已提交
4838 4839 4840
    if (pColNode->colType == COLUMN_TYPE_TAG) {
      continue;
    }
H
Haojun Liao 已提交
4841

4842 4843 4844
    pCond->colList[j].type = pColNode->node.resType.type;
    pCond->colList[j].bytes = pColNode->node.resType.bytes;
    pCond->colList[j].colId = pColNode->colId;
H
Haojun Liao 已提交
4845
    j += 1;
H
Haojun Liao 已提交
4846 4847
  }

4848 4849
  pCond->numOfCols = j;
  return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4850
}
H
Haojun Liao 已提交
4851

H
Haojun Liao 已提交
4852 4853 4854 4855 4856
void clearupQueryTableDataCond(SQueryTableDataCond* pCond) {
  taosMemoryFree(pCond->twindows);
  taosMemoryFree(pCond->colList);
}

4857 4858
SColumn extractColumnFromColumnNode(SColumnNode* pColNode) {
  SColumn c = {0};
X
Xiaoyu Wang 已提交
4859 4860 4861 4862 4863
  c.slotId = pColNode->slotId;
  c.colId = pColNode->colId;
  c.type = pColNode->node.resType.type;
  c.bytes = pColNode->node.resType.bytes;
  c.scale = pColNode->node.resType.scale;
4864 4865 4866 4867
  c.precision = pColNode->node.resType.precision;
  return c;
}

H
Haojun Liao 已提交
4868
SArray* extractColumnInfo(SNodeList* pNodeList) {
L
Liu Jicong 已提交
4869
  size_t  numOfCols = LIST_LENGTH(pNodeList);
H
Haojun Liao 已提交
4870 4871 4872 4873 4874 4875
  SArray* pList = taosArrayInit(numOfCols, sizeof(SColumn));
  if (pList == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

L
Liu Jicong 已提交
4876 4877
  for (int32_t i = 0; i < numOfCols; ++i) {
    STargetNode* pNode = (STargetNode*)nodesListGetNode(pNodeList, i);
H
Haojun Liao 已提交
4878

4879 4880 4881
    if (nodeType(pNode->pExpr) == QUERY_NODE_COLUMN) {
      SColumnNode* pColNode = (SColumnNode*)pNode->pExpr;

4882
      SColumn c = extractColumnFromColumnNode(pColNode);
4883 4884
      taosArrayPush(pList, &c);
    } else if (nodeType(pNode->pExpr) == QUERY_NODE_VALUE) {
L
Liu Jicong 已提交
4885 4886
      SValueNode* pValNode = (SValueNode*)pNode->pExpr;
      SColumn     c = {0};
4887
      c.slotId = pNode->slotId;
L
Liu Jicong 已提交
4888 4889 4890 4891
      c.colId = pNode->slotId;
      c.type = pValNode->node.type;
      c.bytes = pValNode->node.resType.bytes;
      c.scale = pValNode->node.resType.scale;
4892 4893 4894 4895
      c.precision = pValNode->node.resType.precision;

      taosArrayPush(pList, &c);
    }
H
Haojun Liao 已提交
4896 4897 4898 4899 4900
  }

  return pList;
}

4901
SArray* extractPartitionColInfo(SNodeList* pNodeList) {
H
refact  
Hongze Cheng 已提交
4902 4903
  if (!pNodeList) {
    return NULL;
4904 4905
  }

4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918
  size_t  numOfCols = LIST_LENGTH(pNodeList);
  SArray* pList = taosArrayInit(numOfCols, sizeof(SColumn));
  if (pList == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnNode* pColNode = (SColumnNode*)nodesListGetNode(pNodeList, i);

    // todo extract method
    SColumn c = {0};
    c.slotId = pColNode->slotId;
dengyihao's avatar
dengyihao 已提交
4919 4920 4921
    c.colId = pColNode->colId;
    c.type = pColNode->node.resType.type;
    c.bytes = pColNode->node.resType.bytes;
4922 4923 4924 4925 4926 4927 4928 4929 4930
    c.precision = pColNode->node.resType.precision;
    c.scale = pColNode->node.resType.scale;

    taosArrayPush(pList, &c);
  }

  return pList;
}

4931
SArray* createSortInfo(SNodeList* pNodeList) {
L
Liu Jicong 已提交
4932
  size_t  numOfCols = LIST_LENGTH(pNodeList);
H
Haojun Liao 已提交
4933 4934 4935 4936 4937 4938
  SArray* pList = taosArrayInit(numOfCols, sizeof(SBlockOrderInfo));
  if (pList == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return pList;
  }

L
Liu Jicong 已提交
4939
  for (int32_t i = 0; i < numOfCols; ++i) {
4940
    SOrderByExprNode* pSortKey = (SOrderByExprNode*)nodesListGetNode(pNodeList, i);
L
Liu Jicong 已提交
4941 4942
    SBlockOrderInfo   bi = {0};
    bi.order = (pSortKey->order == ORDER_ASC) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
H
Haojun Liao 已提交
4943 4944 4945
    bi.nullFirst = (pSortKey->nullOrder == NULL_ORDER_FIRST);

    SColumnNode* pColNode = (SColumnNode*)pSortKey->pExpr;
4946
    bi.slotId = pColNode->slotId;
H
Haojun Liao 已提交
4947 4948 4949 4950 4951 4952
    taosArrayPush(pList, &bi);
  }

  return pList;
}

X
Xiaoyu Wang 已提交
4953
SArray* extractColMatchInfo(SNodeList* pNodeList, SDataBlockDescNode* pOutputNodeList, int32_t* numOfOutputCols,
4954
                            SExecTaskInfo* pTaskInfo, int32_t type) {
L
Liu Jicong 已提交
4955
  size_t  numOfCols = LIST_LENGTH(pNodeList);
H
Haojun Liao 已提交
4956 4957 4958 4959 4960 4961
  SArray* pList = taosArrayInit(numOfCols, sizeof(SColMatchInfo));
  if (pList == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    return NULL;
  }

L
Liu Jicong 已提交
4962 4963 4964
  for (int32_t i = 0; i < numOfCols; ++i) {
    STargetNode* pNode = (STargetNode*)nodesListGetNode(pNodeList, i);
    SColumnNode* pColNode = (SColumnNode*)pNode->pExpr;
H
Haojun Liao 已提交
4965 4966

    SColMatchInfo c = {0};
X
Xiaoyu Wang 已提交
4967 4968 4969 4970
    c.output = true;
    c.colId = pColNode->colId;
    c.srcSlotId = pColNode->slotId;
    c.matchType = type;
H
Haojun Liao 已提交
4971 4972 4973 4974
    c.targetSlotId = pNode->slotId;
    taosArrayPush(pList, &c);
  }

H
Haojun Liao 已提交
4975 4976
  *numOfOutputCols = 0;
  int32_t num = LIST_LENGTH(pOutputNodeList->pSlots);
L
Liu Jicong 已提交
4977 4978
  for (int32_t i = 0; i < num; ++i) {
    SSlotDescNode* pNode = (SSlotDescNode*)nodesListGetNode(pOutputNodeList->pSlots, i);
4979

4980
    // todo: add reserve flag check
4981 4982
    // it is a column reserved for the arithmetic expression calculation
    if (pNode->slotId >= numOfCols) {
4983 4984 4985 4986
      (*numOfOutputCols) += 1;
      continue;
    }

H
Haojun Liao 已提交
4987
    SColMatchInfo* info = taosArrayGet(pList, pNode->slotId);
H
Haojun Liao 已提交
4988 4989 4990 4991 4992
    if (pNode->output) {
      (*numOfOutputCols) += 1;
    } else {
      info->output = false;
    }
H
Haojun Liao 已提交
4993 4994
  }

H
Haojun Liao 已提交
4995 4996 4997
  return pList;
}

dengyihao's avatar
dengyihao 已提交
4998 4999
int32_t getTableList(void* metaHandle, int32_t tableType, uint64_t tableUid, STableListInfo* pListInfo,
                     SNode* pTagCond) {
wmmhello's avatar
wmmhello 已提交
5000
  int32_t code = TSDB_CODE_SUCCESS;
wmmhello's avatar
wmmhello 已提交
5001
  pListInfo->pTableList = taosArrayInit(8, sizeof(STableKeyInfo));
wmmhello's avatar
wmmhello 已提交
5002

wmmhello's avatar
wmmhello 已提交
5003
  if (tableType == TSDB_SUPER_TABLE) {
dengyihao's avatar
dengyihao 已提交
5004
    if (pTagCond) {
dengyihao's avatar
dengyihao 已提交
5005 5006
      SIndexMetaArg metaArg = {
          .metaEx = metaHandle, .idx = tsdbGetIdx(metaHandle), .ivtIdx = tsdbGetIvtIdx(metaHandle), .suid = tableUid};
dengyihao's avatar
dengyihao 已提交
5007

wmmhello's avatar
wmmhello 已提交
5008
      SArray* res = taosArrayInit(8, sizeof(uint64_t));
dengyihao's avatar
dengyihao 已提交
5009
      code = doFilterTag(pTagCond, &metaArg, res);
X
Xiaoyu Wang 已提交
5010
      if (code == TSDB_CODE_INDEX_REBUILDING) {  // todo
5011 5012
        // doFilter();
      } else if (code != TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
5013
        qError("failed  to  get tableIds, reason: %s, suid: %" PRIu64 "", tstrerror(code), tableUid);
wmmhello's avatar
wmmhello 已提交
5014
        taosArrayDestroy(res);
wmmhello's avatar
wmmhello 已提交
5015
        terrno = code;
wmmhello's avatar
wmmhello 已提交
5016
        return code;
dengyihao's avatar
dengyihao 已提交
5017
      } else {
dengyihao's avatar
dengyihao 已提交
5018
        qDebug("sucess to  get tableIds, size: %d, suid: %" PRIu64 "", (int)taosArrayGetSize(res), tableUid);
wmmhello's avatar
wmmhello 已提交
5019
      }
5020

dengyihao's avatar
dengyihao 已提交
5021
      for (int i = 0; i < taosArrayGetSize(res); i++) {
wmmhello's avatar
wmmhello 已提交
5022 5023 5024 5025
        STableKeyInfo info = {.lastKey = TSKEY_INITIAL_VAL, .uid = *(uint64_t*)taosArrayGet(res, i)};
        taosArrayPush(pListInfo->pTableList, &info);
      }
      taosArrayDestroy(res);
dengyihao's avatar
dengyihao 已提交
5026
    } else {
wmmhello's avatar
wmmhello 已提交
5027
      code = tsdbGetAllTableList(metaHandle, tableUid, pListInfo->pTableList);
wmmhello's avatar
wmmhello 已提交
5028
    }
H
Haojun Liao 已提交
5029
  } else {  // Create one table group.
wmmhello's avatar
wmmhello 已提交
5030 5031
    STableKeyInfo info = {.lastKey = 0, .uid = tableUid};
    taosArrayPush(pListInfo->pTableList, &info);
5032 5033 5034 5035
  }

  return code;
}
H
Haojun Liao 已提交
5036

H
Hongze Cheng 已提交
5037 5038
STsdbReader* doCreateDataReader(STableScanPhysiNode* pTableScanNode, SReadHandle* pHandle,
                                STableListInfo* pTableListInfo, uint64_t queryId, uint64_t taskId, SNode* pTagCond) {
dengyihao's avatar
dengyihao 已提交
5039 5040
  int32_t code =
      getTableList(pHandle->meta, pTableScanNode->scan.tableType, pTableScanNode->scan.uid, pTableListInfo, pTagCond);
wmmhello's avatar
wmmhello 已提交
5041 5042 5043 5044 5045 5046 5047 5048 5049 5050
  if (code != TSDB_CODE_SUCCESS) {
    goto _error;
  }

  if (taosArrayGetSize(pTableListInfo->pTableList) == 0) {
    code = 0;
    qDebug("no table qualified for query, TID:0x%" PRIx64 ", QID:0x%" PRIx64, taskId, queryId);
    goto _error;
  }

5051
  SQueryTableDataCond cond = {0};
wmmhello's avatar
wmmhello 已提交
5052
  code = initQueryTableDataCond(&cond, pTableScanNode);
5053
  if (code != TSDB_CODE_SUCCESS) {
wmmhello's avatar
wmmhello 已提交
5054
    goto _error;
X
Xiaoyu Wang 已提交
5055
  }
5056

H
Hongze Cheng 已提交
5057 5058
  STsdbReader* pReader;
  code = tsdbReaderOpen(pHandle->vnode, &cond, pTableListInfo, queryId, taskId, &pReader);
H
Haojun Liao 已提交
5059 5060 5061
  clearupQueryTableDataCond(&cond);

  return pReader;
wmmhello's avatar
wmmhello 已提交
5062 5063 5064 5065

_error:
  terrno = code;
  return NULL;
H
Haojun Liao 已提交
5066 5067
}

5068
int32_t encodeOperator(SOperatorInfo* ops, char** result, int32_t* length) {
wmmhello's avatar
wmmhello 已提交
5069
  int32_t code = TDB_CODE_SUCCESS;
5070
  char*   pCurrent = NULL;
wmmhello's avatar
wmmhello 已提交
5071
  int32_t currLength = 0;
5072 5073
  if (ops->fpSet.encodeResultRow) {
    if (result == NULL || length == NULL) {
wmmhello's avatar
wmmhello 已提交
5074 5075 5076
      return TSDB_CODE_TSC_INVALID_INPUT;
    }
    code = ops->fpSet.encodeResultRow(ops, &pCurrent, &currLength);
wmmhello's avatar
wmmhello 已提交
5077

5078 5079
    if (code != TDB_CODE_SUCCESS) {
      if (*result != NULL) {
wmmhello's avatar
wmmhello 已提交
5080 5081 5082 5083 5084
        taosMemoryFree(*result);
        *result = NULL;
      }
      return code;
    }
wmmhello's avatar
wmmhello 已提交
5085

5086
    if (*result == NULL) {
wmmhello's avatar
wmmhello 已提交
5087
      *result = (char*)taosMemoryCalloc(1, currLength + sizeof(int32_t));
wmmhello's avatar
wmmhello 已提交
5088 5089 5090 5091 5092 5093
      if (*result == NULL) {
        taosMemoryFree(pCurrent);
        return TSDB_CODE_OUT_OF_MEMORY;
      }
      memcpy(*result + sizeof(int32_t), pCurrent, currLength);
      *(int32_t*)(*result) = currLength + sizeof(int32_t);
5094
    } else {
wmmhello's avatar
wmmhello 已提交
5095
      int32_t sizePre = *(int32_t*)(*result);
5096
      char*   tmp = (char*)taosMemoryRealloc(*result, sizePre + currLength);
wmmhello's avatar
wmmhello 已提交
5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108
      if (tmp == NULL) {
        taosMemoryFree(pCurrent);
        taosMemoryFree(*result);
        *result = NULL;
        return TSDB_CODE_OUT_OF_MEMORY;
      }
      *result = tmp;
      memcpy(*result + sizePre, pCurrent, currLength);
      *(int32_t*)(*result) += currLength;
    }
    taosMemoryFree(pCurrent);
    *length = *(int32_t*)(*result);
wmmhello's avatar
wmmhello 已提交
5109 5110
  }

wmmhello's avatar
wmmhello 已提交
5111 5112
  for (int32_t i = 0; i < ops->numOfDownstream; ++i) {
    code = encodeOperator(ops->pDownstream[i], result, length);
5113
    if (code != TDB_CODE_SUCCESS) {
wmmhello's avatar
wmmhello 已提交
5114
      return code;
wmmhello's avatar
wmmhello 已提交
5115 5116
    }
  }
wmmhello's avatar
wmmhello 已提交
5117
  return TDB_CODE_SUCCESS;
wmmhello's avatar
wmmhello 已提交
5118 5119
}

5120
int32_t decodeOperator(SOperatorInfo* ops, char* result, int32_t length) {
wmmhello's avatar
wmmhello 已提交
5121
  int32_t code = TDB_CODE_SUCCESS;
5122 5123
  if (ops->fpSet.decodeResultRow) {
    if (result == NULL) {
wmmhello's avatar
wmmhello 已提交
5124 5125
      return TSDB_CODE_TSC_INVALID_INPUT;
    }
5126 5127 5128
    ASSERT(length == *(int32_t*)result);
    char* data = result + sizeof(int32_t);
    code = ops->fpSet.decodeResultRow(ops, data);
5129
    if (code != TDB_CODE_SUCCESS) {
wmmhello's avatar
wmmhello 已提交
5130 5131
      return code;
    }
wmmhello's avatar
wmmhello 已提交
5132

wmmhello's avatar
wmmhello 已提交
5133
    int32_t totalLength = *(int32_t*)result;
5134 5135
    int32_t dataLength = *(int32_t*)data;

5136
    if (totalLength == dataLength + sizeof(int32_t)) {  // the last data
wmmhello's avatar
wmmhello 已提交
5137 5138
      result = NULL;
      length = 0;
5139
    } else {
wmmhello's avatar
wmmhello 已提交
5140 5141 5142 5143
      result += dataLength;
      *(int32_t*)(result) = totalLength - dataLength;
      length = totalLength - dataLength;
    }
wmmhello's avatar
wmmhello 已提交
5144 5145
  }

wmmhello's avatar
wmmhello 已提交
5146 5147
  for (int32_t i = 0; i < ops->numOfDownstream; ++i) {
    code = decodeOperator(ops->pDownstream[i], result, length);
5148
    if (code != TDB_CODE_SUCCESS) {
wmmhello's avatar
wmmhello 已提交
5149
      return code;
wmmhello's avatar
wmmhello 已提交
5150 5151
    }
  }
wmmhello's avatar
wmmhello 已提交
5152
  return TDB_CODE_SUCCESS;
wmmhello's avatar
wmmhello 已提交
5153 5154
}

5155
int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, qTaskInfo_t* pTaskInfo) {
D
dapan1121 已提交
5156
  SExecTaskInfo* pTask = *(SExecTaskInfo**)pTaskInfo;
5157

D
dapan1121 已提交
5158 5159
  switch (pNode->type) {
    case QUERY_NODE_PHYSICAL_PLAN_DELETE: {
5160
      SDeleterParam* pDeleterParam = taosMemoryCalloc(1, sizeof(SDeleterParam));
D
dapan1121 已提交
5161 5162 5163 5164 5165 5166 5167 5168 5169 5170
      if (NULL == pDeleterParam) {
        return TSDB_CODE_OUT_OF_MEMORY;
      }
      int32_t tbNum = taosArrayGetSize(pTask->tableqinfoList.pTableList);
      pDeleterParam->pUidList = taosArrayInit(tbNum, sizeof(uint64_t));
      if (NULL == pDeleterParam->pUidList) {
        taosMemoryFree(pDeleterParam);
        return TSDB_CODE_OUT_OF_MEMORY;
      }
      for (int32_t i = 0; i < tbNum; ++i) {
5171
        STableKeyInfo* pTable = taosArrayGet(pTask->tableqinfoList.pTableList, i);
D
dapan1121 已提交
5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184
        taosArrayPush(pDeleterParam->pUidList, &pTable->uid);
      }

      *pParam = pDeleterParam;
      break;
    }
    default:
      break;
  }

  return TSDB_CODE_SUCCESS;
}

dengyihao's avatar
dengyihao 已提交
5185
int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId,
5186
                               const char* sql, EOPTR_EXEC_MODEL model) {
H
Haojun Liao 已提交
5187 5188
  uint64_t queryId = pPlan->id.queryId;

H
Haojun Liao 已提交
5189
  int32_t code = TSDB_CODE_SUCCESS;
D
dapan1121 已提交
5190
  *pTaskInfo = createExecTaskInfo(queryId, taskId, model, pPlan->dbFName);
H
Haojun Liao 已提交
5191 5192 5193 5194
  if (*pTaskInfo == NULL) {
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
    goto _complete;
  }
H
Haojun Liao 已提交
5195

5196
  (*pTaskInfo)->sql = sql;
dengyihao's avatar
dengyihao 已提交
5197
  (*pTaskInfo)->pRoot = createOperatorTree(pPlan->pNode, *pTaskInfo, pHandle, queryId, taskId,
dengyihao's avatar
dengyihao 已提交
5198
                                           &(*pTaskInfo)->tableqinfoList, pPlan->pTagCond);
D
dapan1121 已提交
5199
  if (NULL == (*pTaskInfo)->pRoot) {
5200
    code = (*pTaskInfo)->code;
D
dapan1121 已提交
5201 5202
    goto _complete;
  }
H
Haojun Liao 已提交
5203

5204
  if ((*pTaskInfo)->pRoot == NULL) {
H
Haojun Liao 已提交
5205
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
Haojun Liao 已提交
5206
    goto _complete;
5207 5208
  }

H
Haojun Liao 已提交
5209 5210
  return code;

H
Haojun Liao 已提交
5211
_complete:
wafwerar's avatar
wafwerar 已提交
5212
  taosMemoryFreeClear(*pTaskInfo);
H
Haojun Liao 已提交
5213 5214
  terrno = code;
  return code;
H
Haojun Liao 已提交
5215 5216
}

wmmhello's avatar
wmmhello 已提交
5217 5218 5219
static void doDestroyTableList(STableListInfo* pTableqinfoList) {
  taosArrayDestroy(pTableqinfoList->pTableList);
  taosHashCleanup(pTableqinfoList->map);
5220

wmmhello's avatar
wmmhello 已提交
5221 5222
  pTableqinfoList->pTableList = NULL;
  pTableqinfoList->map = NULL;
5223 5224
}

L
Liu Jicong 已提交
5225
void doDestroyTask(SExecTaskInfo* pTaskInfo) {
H
Haojun Liao 已提交
5226 5227
  qDebug("%s execTask is freed", GET_TASKID(pTaskInfo));

wmmhello's avatar
wmmhello 已提交
5228
  doDestroyTableList(&pTaskInfo->tableqinfoList);
H
Haojun Liao 已提交
5229
  destroyOperatorInfo(pTaskInfo->pRoot);
L
Liu Jicong 已提交
5230 5231
  //  taosArrayDestroy(pTaskInfo->summary.queryProfEvents);
  //  taosHashCleanup(pTaskInfo->summary.operatorProfResults);
5232

5233 5234
  taosMemoryFree(pTaskInfo->schemaVer.dbname);
  taosMemoryFree(pTaskInfo->schemaVer.tablename);
wafwerar's avatar
wafwerar 已提交
5235 5236 5237
  taosMemoryFreeClear(pTaskInfo->sql);
  taosMemoryFreeClear(pTaskInfo->id.str);
  taosMemoryFreeClear(pTaskInfo);
5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249
}

static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (val == NULL) {
    setNull(output, type, bytes);
    return;
  }

  if (IS_VAR_DATA_TYPE(type)) {
    // Binary data overflows for sort of unknown reasons. Let trim the overflow data
    if (varDataTLen(val) > bytes) {
      int32_t maxLen = bytes - VARSTR_HEADER_SIZE;
L
Liu Jicong 已提交
5250
      int32_t len = (varDataLen(val) > maxLen) ? maxLen : varDataLen(val);
5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262
      memcpy(varDataVal(output), varDataVal(val), len);
      varDataSetLen(output, len);
    } else {
      varDataCopy(output, val);
    }
  } else {
    memcpy(output, val, bytes);
  }
}

static int64_t getQuerySupportBufSize(size_t numOfTables) {
  size_t s1 = sizeof(STableQueryInfo);
L
Liu Jicong 已提交
5263 5264
  //  size_t s3 = sizeof(STableCheckInfo);  buffer consumption in tsdb
  return (int64_t)(s1 * 1.5 * numOfTables);
5265 5266 5267 5268 5269 5270 5271
}

int32_t checkForQueryBuf(size_t numOfTables) {
  int64_t t = getQuerySupportBufSize(numOfTables);
  if (tsQueryBufferSizeBytes < 0) {
    return TSDB_CODE_SUCCESS;
  } else if (tsQueryBufferSizeBytes > 0) {
L
Liu Jicong 已提交
5272
    while (1) {
5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298
      int64_t s = tsQueryBufferSizeBytes;
      int64_t remain = s - t;
      if (remain >= 0) {
        if (atomic_val_compare_exchange_64(&tsQueryBufferSizeBytes, s, remain) == s) {
          return TSDB_CODE_SUCCESS;
        }
      } else {
        return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
      }
    }
  }

  // disable query processing if the value of tsQueryBufferSize is zero.
  return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER;
}

void releaseQueryBuf(size_t numOfTables) {
  if (tsQueryBufferSizeBytes < 0) {
    return;
  }

  int64_t t = getQuerySupportBufSize(numOfTables);

  // restore value is not enough buffer available
  atomic_add_fetch_64(&tsQueryBufferSizeBytes, t);
}
D
dapan1121 已提交
5299

dengyihao's avatar
dengyihao 已提交
5300 5301
int32_t getOperatorExplainExecInfo(SOperatorInfo* operatorInfo, SExplainExecInfo** pRes, int32_t* capacity,
                                   int32_t* resNum) {
D
dapan1121 已提交
5302 5303
  if (*resNum >= *capacity) {
    *capacity += 10;
dengyihao's avatar
dengyihao 已提交
5304

D
dapan1121 已提交
5305 5306
    *pRes = taosMemoryRealloc(*pRes, (*capacity) * sizeof(SExplainExecInfo));
    if (NULL == *pRes) {
D
dapan1121 已提交
5307
      qError("malloc %d failed", (*capacity) * (int32_t)sizeof(SExplainExecInfo));
D
dapan1121 已提交
5308 5309 5310 5311
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
  }

5312 5313 5314 5315 5316
  SExplainExecInfo* pInfo = &(*pRes)[*resNum];

  pInfo->numOfRows = operatorInfo->resultInfo.totalRows;
  pInfo->startupCost = operatorInfo->cost.openCost;
  pInfo->totalCost = operatorInfo->cost.totalCost;
D
dapan1121 已提交
5317

5318
  if (operatorInfo->fpSet.getExplainFn) {
5319
    int32_t code = operatorInfo->fpSet.getExplainFn(operatorInfo, &pInfo->verboseInfo, &pInfo->verboseLen);
D
dapan1121 已提交
5320
    if (code) {
5321
      qError("%s operator getExplainFn failed, code:%s", GET_TASKID(operatorInfo->pTaskInfo), tstrerror(code));
D
dapan1121 已提交
5322 5323
      return code;
    }
5324 5325 5326
  } else {
    pInfo->verboseLen = 0;
    pInfo->verboseInfo = NULL;
D
dapan1121 已提交
5327
  }
dengyihao's avatar
dengyihao 已提交
5328

D
dapan1121 已提交
5329
  ++(*resNum);
dengyihao's avatar
dengyihao 已提交
5330

D
dapan1121 已提交
5331
  int32_t code = 0;
D
dapan1121 已提交
5332 5333
  for (int32_t i = 0; i < operatorInfo->numOfDownstream; ++i) {
    code = getOperatorExplainExecInfo(operatorInfo->pDownstream[i], pRes, capacity, resNum);
D
dapan1121 已提交
5334 5335 5336 5337 5338 5339 5340
    if (code) {
      taosMemoryFreeClear(*pRes);
      return TSDB_CODE_QRY_OUT_OF_MEMORY;
    }
  }

  return TSDB_CODE_SUCCESS;
D
dapan1121 已提交
5341
}
5
54liuyao 已提交
5342

L
Liu Jicong 已提交
5343
int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, const char* pKey, SqlFunctionCtx* pCtx, int32_t numOfOutput,
5344
                               int32_t size) {
5345
  pSup->resultRowSize = getResultRowSize(pCtx, numOfOutput);
5
54liuyao 已提交
5346 5347
  pSup->keySize = sizeof(int64_t) + sizeof(TSKEY);
  pSup->pKeyBuf = taosMemoryCalloc(1, pSup->keySize);
5348 5349
  _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY);
  pSup->pResultRows = taosHashInit(1024, hashFn, false, HASH_NO_LOCK);
5
54liuyao 已提交
5350 5351 5352
  if (pSup->pKeyBuf == NULL || pSup->pResultRows == NULL) {
    return TSDB_CODE_OUT_OF_MEMORY;
  }
5353
  pSup->valueSize = size;
5
54liuyao 已提交
5354

5
54liuyao 已提交
5355 5356
  pSup->pScanWindow = taosArrayInit(4, sizeof(STimeWindow));

5
54liuyao 已提交
5357 5358 5359 5360 5361 5362 5363 5364 5365
  int32_t pageSize = 4096;
  while (pageSize < pSup->resultRowSize * 4) {
    pageSize <<= 1u;
  }
  // at least four pages need to be in buffer
  int32_t bufSize = 4096 * 256;
  if (bufSize <= pageSize) {
    bufSize = pageSize * 4;
  }
5366
  int32_t code = createDiskbasedBuf(&pSup->pResultBuf, pageSize, bufSize, pKey, TD_TMP_DIR_PATH);
L
Liu Jicong 已提交
5367
  for (int32_t i = 0; i < numOfOutput; ++i) {
5368 5369 5370
    pCtx[i].pBuf = pSup->pResultBuf;
  }
  return code;
5
54liuyao 已提交
5371
}