qExecutor.c 209.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16
#include "qfill.h"
17 18 19

#include "hash.h"
#include "hashfunc.h"
20 21
#include "qExecutor.h"
#include "qUtil.h"
H
hjxilinx 已提交
22
#include "qast.h"
23
#include "qresultBuf.h"
H
hjxilinx 已提交
24
#include "query.h"
S
slguan 已提交
25
#include "queryLog.h"
26
#include "taosmsg.h"
27
#include "tdataformat.h"
28
#include "tlosertree.h"
29
#include "tscUtil.h"  // todo move the function to common module
30 31
#include "tscompression.h"
#include "ttime.h"
32 33 34 35 36 37 38 39 40

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

46
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50 51 52

/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
53 54
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
55

56
enum {
H
hjxilinx 已提交
57
  // when query starts to execute, this status will set
58 59
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
60 61
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
62
   */
63 64
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
65 66 67
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
68
   */
69
  QUERY_COMPLETED = 0x4u,
70

H
hjxilinx 已提交
71 72
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
73
   */
74
  QUERY_OVER = 0x8u,
75
};
76 77

enum {
78 79
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
80 81 82
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

83
typedef struct {
84 85 86 87 88 89
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
90 91
} SQueryStatusInfo;

92
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
93
static void setQueryStatus(SQuery *pQuery, int8_t status);
94

H
hjxilinx 已提交
95
static bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; }
96

H
hjxilinx 已提交
97
// todo move to utility
98
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
99

H
hjxilinx 已提交
100
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
101
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
102 103 104
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
105

106 107 108
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

109
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
110
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
111 112
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
113
static void buildTagQueryResult(SQInfo *pQInfo);
114

H
hjxilinx 已提交
115
static int32_t setAdditionalInfo(SQInfo *pQInfo, STableId *pTableId, STableQueryInfo *pTableQueryInfo);
116
static int32_t flushFromResultBuf(SQInfo *pQInfo);
117

118
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
119 120
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
121

122 123
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
124 125
      return false;
    }
126

127 128
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
129
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
130

131 132 133 134 135
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
136

137 138 139 140
    if (!qualified) {
      return false;
    }
  }
141

142 143 144 145 146 147
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
148

149
  int64_t maxOutput = 0;
150
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
151
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
152

153 154 155 156 157 158 159 160
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
161

162 163 164 165 166
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
167

168
  assert(maxOutput >= 0);
169 170 171
  return maxOutput;
}

172 173 174 175 176 177 178 179 180
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
181 182 183 184 185 186 187
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
188 189 190 191
    pResInfo->numOfRes = numOfRes;
  }
}

192 193 194 195 196 197 198 199 200
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
201

202
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
203
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
204 205 206 207 208
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
209
        assert(pColIndex->colIndex > 0);
210
      }
211

212 213 214
      return true;
    }
  }
215

216 217 218 219 220
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
221

222 223
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
224

225
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
226
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
227 228 229 230 231
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
232

233
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
234 235
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
236 237 238
      break;
    }
  }
239

240 241 242 243 244 245
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
246

247
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
248
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
249 250 251 252
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
253

254 255 256 257
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
258

259 260 261
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
262

263 264 265
  return false;
}

266
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
267

268 269 270 271
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
272 273
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
274 275 276 277
    
    qTrace("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%d, total:%"PRId64,
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
278 279 280
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
281

282 283 284 285
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
286
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
287
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
288 289 290
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
291

292 293 294 295
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
296

297 298 299
  return false;
}

H
Haojun Liao 已提交
300
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, int32_t numOfCols, int32_t index) {
301
  // for a tag column, no corresponding field info
H
Haojun Liao 已提交
302 303
  SColIndex *pColIndex = &pQuery->pSelectExpr[index].base.colInfo;
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
304 305
    return NULL;
  }
H
Haojun Liao 已提交
306
  
307 308 309
  /*
   * Choose the right column field info by field id, since the file block may be out of date,
   * which means the newest table schema is not equalled to the schema of this block.
H
Haojun Liao 已提交
310
   * TODO: speedup by using bsearch
311
   */
H
Haojun Liao 已提交
312 313
  for (int32_t i = 0; i < numOfCols; ++i) {
    if (pColIndex->colId == pStatis[i].colId) {
314 315 316
      return &pStatis[i];
    }
  }
H
Haojun Liao 已提交
317
  
318 319 320
  return NULL;
}

321 322 323 324 325 326 327 328
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
329
static bool hasNullValue(SQuery *pQuery, int32_t col, int32_t numOfCols, SDataStatis *pStatis, SDataStatis **pColStatis) {
330
  SColIndex *pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
331
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
332 333
    return false;
  }
334

335 336 337 338
  // query on primary timestamp column, not null value at all
  if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }
339

340
  if (pStatis != NULL) {
H
Haojun Liao 已提交
341
    *pColStatis = getStatisInfo(pQuery, pStatis, numOfCols, col);
H
hjxilinx 已提交
342 343
  } else {
    *pColStatis = NULL;
344
  }
345

346 347 348
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
349

350 351 352 353 354 355
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
                                             int16_t bytes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
356

357
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
358 359 360 361 362
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
  } else {  // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 2;
363

364 365 366 367 368 369 370
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
      } else {
        // todo
      }
371

372 373 374 375 376 377
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        SPosInfo pos = {-1, -1};
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
      }
      pWindowResInfo->capacity = newCap;
    }
378

379 380 381 382
    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
  }
383

384 385 386 387 388 389
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
390

391 392 393 394 395 396 397
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
398

399 400
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
401

402 403 404
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
405

406 407 408 409
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
410

411 412 413
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
414

415 416 417 418 419 420 421
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
422

423
  assert(ts >= w.skey && ts <= w.ekey);
424

425 426 427 428 429 430 431 432
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
433

434
  tFilePage *pData = NULL;
435

436 437 438
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
439

440 441 442 443
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
H
Haojun Liao 已提交
444
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
445

446
    if (pData->num >= numOfRowsPerPage) {
447 448
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
449
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
450 451 452
      }
    }
  }
453

454 455 456
  if (pData == NULL) {
    return -1;
  }
457

458 459 460
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
461
    pWindowRes->pos.rowId = pData->num++;
462
  }
463

464 465 466 467 468 469 470
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
                                       STimeWindow *win) {
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
471

472 473 474 475
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE);
  if (pWindowRes == NULL) {
    return -1;
  }
476

477 478 479 480 481 482 483
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
484

485 486
  // set time window for current result
  pWindowRes->window = *win;
487

H
Haojun Liao 已提交
488
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
489 490 491 492 493 494 495 496
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
497
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
498
                                      int16_t order, int64_t *pData) {
H
Haojun Liao 已提交
499
  int32_t endPos = searchFn((char *)pData, numOfRows, ekey, order);
500
  int32_t forwardStep = 0;
501

502
  if (endPos >= 0) {
503
    forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
504
    assert(forwardStep >= 0);
505

506 507 508 509 510
    // endPos data is equalled to the key so, we do need to read the element in endPos
    if (pData[endPos] == ekey) {
      forwardStep += 1;
    }
  }
511

512 513 514 515 516 517
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
518
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
519 520
  SQuery *pQuery = pRuntimeEnv->pQuery;
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) {
521
    return pWindowResInfo->size;
522
  }
523

524
  // no qualified results exist, abort check
525 526
  int32_t numOfClosed = 0;
  
527
  if (pWindowResInfo->size == 0) {
528
    return pWindowResInfo->size;
529
  }
530

531
  // query completed
H
hjxilinx 已提交
532 533
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
534
    closeAllTimeWindow(pWindowResInfo);
535

536 537 538 539
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
540
    int64_t skey = TSKEY_INITIAL_VAL;
541

542 543 544
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
545
        numOfClosed += 1;
546 547
        continue;
      }
548

549 550 551 552 553 554 555 556
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
557

558
    // all windows are closed, set the last one to be the skey
559
    if (skey == TSKEY_INITIAL_VAL) {
560 561 562 563 564
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
565

566
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
567

568 569 570 571 572
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
      qTrace("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
573
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
574 575 576
    } else {
      qTrace("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
             numOfClosed);
577 578
    }
  }
579 580 581 582 583 584 585
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
586
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
587
  return numOfClosed;
588 589 590
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
591
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
592
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
593

594 595 596
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
597

H
hjxilinx 已提交
598 599
  STableQueryInfo* item = pQuery->current;
  
600 601
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
602
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
603 604 605 606
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
607
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
608 609 610
        }
      }
    } else {
611
      num = pDataBlockInfo->rows - startPos;
612
      if (updateLastKey) {
H
hjxilinx 已提交
613
        item->lastKey = pDataBlockInfo->window.ekey + step;
614 615 616 617
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
618
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
619 620 621 622
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
623
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
624 625 626 627 628
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
629
        item->lastKey = pDataBlockInfo->window.skey + step;
630 631 632
      }
    }
  }
633

634 635 636 637 638
  assert(num >= 0);
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
639
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
640 641
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
642

643
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
644
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
645
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
646

647 648
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
H
Haojun Liao 已提交
649
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
650

651
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
652
        pCtx[k].ptsList = &tsBuf[offset];
653
      }
654

H
Haojun Liao 已提交
655 656 657 658 659
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
      
660 661 662 663 664 665 666 667 668 669 670
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
671

672
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
673
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
674
      pCtx[k].nStartQueryTimestamp = pWin->skey;
675

676
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
677 678 679 680 681 682 683 684
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
    }
  }
}

static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
685 686
                                      SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
                                      __block_search_fn_t searchFn) {
687
  SQuery *pQuery = pRuntimeEnv->pQuery;
688

H
Haojun Liao 已提交
689 690 691 692
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime) {
    // todo opt
  }
693

H
Haojun Liao 已提交
694
  getNextTimeWindow(pQuery, pNextWin);
695

H
Haojun Liao 已提交
696 697 698 699 700
  // next time window is not in current block
  if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
    return -1;
  }
701

H
Haojun Liao 已提交
702 703 704 705 706
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    startKey = pNextWin->skey;
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
707
    }
H
Haojun Liao 已提交
708 709 710 711
  } else {
    startKey = pNextWin->ekey;
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
712
    }
H
Haojun Liao 已提交
713
  }
714

H
Haojun Liao 已提交
715
  int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
716

H
Haojun Liao 已提交
717 718 719 720 721 722
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNextWin->ekey) {
    TSKEY next = primaryKeys[startPos];
723

H
Haojun Liao 已提交
724 725 726 727
    pNextWin->ekey += ((next - pNextWin->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->skey = pNextWin->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNextWin->skey) {
    TSKEY next = primaryKeys[startPos];
728

H
Haojun Liao 已提交
729 730
    pNextWin->skey -= ((pNextWin->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->ekey = pNextWin->skey + pQuery->intervalTime - 1;
731
  }
732

H
Haojun Liao 已提交
733
  return startPos;
734 735 736 737 738 739 740 741 742 743 744 745 746 747 748
}

static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
749

750 751 752
  return ekey;
}

H
hjxilinx 已提交
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
768
                    SArray *pDataBlock) {
769
  char *dataBlock = NULL;
770
  SQuery *pQuery = pRuntimeEnv->pQuery;
771

772
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
773

774
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
775
  if (functionId == TSDB_FUNC_ARITHM) {
776
    sas->pArithExpr = &pQuery->pSelectExpr[col];
777

778 779 780 781 782 783
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
784

785 786 787 788
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
789

790
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
791
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
792
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
793
      SColumnInfo *pColMsg = &pQuery->colList[i];
794

795 796 797 798 799 800 801 802
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
803

804
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
805
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
806
    }
807

808
  } else {  // other type of query function
809
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
810
    if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
811 812
      dataBlock = NULL;
    } else {
H
hjxilinx 已提交
813
      dataBlock = getDataBlockImpl(pDataBlock, pCol->colId);
814 815
    }
  }
816

817 818 819 820 821 822 823
  return dataBlock;
}

/**
 *
 * @param pRuntimeEnv
 * @param forwardStep
824
 * @param tsCols
825 826 827 828 829
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
830
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
831 832
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
833
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
834 835 836
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
837
  if (pDataBlock != NULL) {
838
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
839
    tsCols = (TSKEY *)(pColInfo->pData);
840
  }
841

842
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
843

844
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
845
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
846
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
847
  }
848

849 850 851
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  if (isIntervalQuery(pQuery)) {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
852
    TSKEY   ts = tsCols[offset];
853

854
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
hjxilinx 已提交
855
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
856
      return;
857
    }
858

859 860
    TSKEY   ekey = reviseWindowEkey(pQuery, &win);
    int32_t forwardStep =
861
        getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
862

863
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
864
    doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, tsCols, pDataBlockInfo->rows);
865

866 867
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
868

869
    while (1) {
870
      int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn);
871 872 873
      if (startPos < 0) {
        break;
      }
874

875
      // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
876
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
877 878
        break;
      }
879

880
      ekey = reviseWindowEkey(pQuery, &nextWin);
881
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
882

883
      pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
884
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
885
    }
886

887 888 889 890 891 892 893
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
894
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
895
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
896 897 898 899 900
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
901

902 903 904 905
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
906

907 908
    tfree(sasArray[i].data);
  }
909

910 911 912 913 914 915 916
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
917

918
  int32_t GROUPRESULTID = 1;
919

920
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
921

922 923 924 925 926 927 928 929 930 931 932
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
933 934 935 936
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes);
  if (pWindowRes == NULL) {
    return -1;
  }
937

938 939 940
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

941 942 943 944 945 946
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
947

948 949 950 951 952
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

953
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
954
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
955

956
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
957 958
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
959 960
      continue;
    }
961

962
    int16_t colIndex = -1;
963
    int32_t colId = pColIndex->colId;
964

965
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
966
      if (pQuery->colList[i].colId == colId) {
967 968 969 970
        colIndex = i;
        break;
      }
    }
971

972
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
973

974 975
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
976 977 978 979 980 981
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
982

983 984 985 986 987 988
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
989
  }
990

991
  return NULL;
992 993 994 995
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
996

997 998
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
999

1000 1001 1002 1003
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1004

1005 1006 1007
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1008 1009
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1010 1011
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1012

1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1026

1027 1028 1029 1030 1031
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1032 1033
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
1034 1035 1036
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1037

1038
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1039 1040
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1041 1042 1043 1044 1045 1046 1047

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
//    return !QUERY_IS_ASC_QUERY(pQuery);
  }

1048
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1049
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1050 1051
    return false;
  }
1052

1053 1054 1055
  return true;
}

1056 1057
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1058
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1059

1060
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1061 1062
  STableQueryInfo* item = pQuery->current;
  
1063
  TSKEY  *tsCols = (TSKEY*) ((SColumnInfoData *)taosArrayGet(pDataBlock, 0))->pData;
H
hjxilinx 已提交
1064
  bool    groupbyStateValue = isGroupbyNormalCol(pQuery->pGroupbyExpr);
1065
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
1066

1067 1068
  int16_t type = 0;
  int16_t bytes = 0;
1069

1070 1071
  char *groupbyColumnData = NULL;
  if (groupbyStateValue) {
1072
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1073
  }
1074

1075
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1076
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1077
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1078
  }
1079

1080 1081
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1082
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1083 1084
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1085
  }
1086

1087
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1088

1089 1090 1091 1092
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery);
1093
    qTrace("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1094 1095
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1096

1097
  int32_t j = 0;
H
hjxilinx 已提交
1098
  int32_t offset = -1;
1099

1100
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1101
    offset = GET_COL_DATA_POS(pQuery, j, step);
1102

1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1113

1114
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1115 1116
      continue;
    }
1117

1118 1119 1120
    // interval window query
    if (isIntervalQuery(pQuery)) {
      // decide the time window according to the primary timestamp
1121
      int64_t     ts = tsCols[offset];
1122
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1123

H
hjxilinx 已提交
1124
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win);
1125 1126 1127
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1128

1129 1130
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1131

1132 1133
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1134

1135 1136
      while (1) {
        getNextTimeWindow(pQuery, &nextWin);
H
Haojun Liao 已提交
1137
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1138
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1139
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1140 1141
          break;
        }
1142

1143 1144 1145
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1146

1147
        // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
1148
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
1149 1150
          break;
        }
1151

1152 1153 1154
        pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
        doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
      }
1155

1156 1157 1158 1159
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
      if (groupbyStateValue) {
H
hjxilinx 已提交
1160
        char *val = groupbyColumnData + bytes * offset;
1161

H
hjxilinx 已提交
1162
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1163 1164 1165 1166
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1167

1168
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1169
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1170 1171 1172 1173 1174
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1175

1176 1177 1178
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1179
        setQueryStatus(pQuery, QUERY_COMPLETED);
1180 1181 1182 1183
        break;
      }
    }
  }
1184
  
1185
  item->lastKey = tsCols[offset] + step;
1186 1187 1188 1189 1190 1191
  
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1192

1193 1194
    tfree(sasArray[i].data);
  }
1195

1196 1197 1198 1199
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1200
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1201
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1202 1203 1204
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1205
  
1206
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1207
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1208
  } else {
1209
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1210
  }
1211

1212
  // update the lastkey of current table
1213
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1214
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1215

1216
  // interval query with limit applied
1217 1218 1219 1220 1221 1222
  int32_t numOfRes = 0;
  
  if (isIntervalQuery(pQuery)) {
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1223

1224 1225 1226 1227
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1228

1229 1230 1231
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1232

1233 1234 1235
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1236
    }
1237
  }
1238

1239
  return numOfRes;
1240 1241
}

H
Haojun Liao 已提交
1242
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1243 1244 1245 1246 1247 1248 1249
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
  pCtx->hasNull = hasNullValue(pQuery, colIndex, pBlockInfo->numOfCols, pStatis, &tpField);
1250
  pCtx->aInputElemBuf = inputData;
1251

1252
  if (tpField != NULL) {
H
Haojun Liao 已提交
1253
    pCtx->preAggVals.isSet  = true;
1254 1255
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1256 1257 1258
  } else {
    pCtx->preAggVals.isSet = false;
  }
1259

H
Haojun Liao 已提交
1260 1261 1262
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1263

1264 1265
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1266
    pCtx->ptsList = tsCol;
1267
  }
1268

1269 1270 1271 1272 1273
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1274
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1275
    /*
H
Haojun Liao 已提交
1276
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1287

1288 1289
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1290 1291 1292 1293 1294 1295
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1309
  }
1310

1311 1312 1313 1314 1315 1316
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
S
slguan 已提交
1317
//        qTrace("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1318 1319 1320
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
S
slguan 已提交
1321
//        qTrace("QInfo:%p block not loaded, bstatus:%d",
1322 1323 1324 1325 1326 1327 1328 1329
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) {
  if (isSelectivityWithTagsQuery(pQuery)) {
1330
    int32_t num = 0;
1331
    int16_t tagLen = 0;
1332 1333
    
    SQLFunctionCtx *p = NULL;
1334
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
1335
    
1336
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1337
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1338
      
1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
1352

1353 1354 1355 1356 1357 1358 1359
    p->tagInfo.pTagCtxList = pTagCtx;
    p->tagInfo.numOfTagCols = num;
    p->tagInfo.tagsLen = tagLen;
  }
}

static void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
1360
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1361 1362
    assert(pQuery->pSelectExpr[i].interBytes <= DEFAULT_INTERN_BUF_PAGE_SIZE);
    
1363
    setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interBytes, isStableQuery);
1364 1365 1366
  }
}

1367
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
S
slguan 已提交
1368
  qTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1369 1370
  SQuery *pQuery = pRuntimeEnv->pQuery;

1371 1372
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1373

1374
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1375
    goto _clean;
1376
  }
1377

1378
  pRuntimeEnv->offset[0] = 0;
1379
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1380
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1381

1382
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1383
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1384

1385 1386
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1387
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1388 1389 1390 1391
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1392 1393 1394 1395
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1396
      
1397 1398 1399 1400
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1401 1402
  
    assert(isValidDataType(pCtx->inputType, pCtx->inputBytes));
1403
    pCtx->ptsOutputBuf = NULL;
1404

1405 1406
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1407

1408 1409
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1410

1411 1412 1413 1414 1415 1416 1417 1418 1419 1420
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1421

1422 1423
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1424

1425
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1426
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1427
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1428

1429 1430 1431 1432
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1433

1434 1435
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1436

1437 1438 1439 1440
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1441

1442
  // set the intermediate result output buffer
1443
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
1444

1445
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1446
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1447 1448
    resetCtxOutputBuf(pRuntimeEnv);
  }
1449

1450 1451
  setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx);
  return TSDB_CODE_SUCCESS;
1452

1453
_clean:
1454 1455
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1456

1457
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1458 1459 1460 1461 1462 1463
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1464

1465
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1466
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1467

H
hjxilinx 已提交
1468
  qTrace("QInfo:%p teardown runtime env", pQInfo);
1469
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1470

1471
  if (pRuntimeEnv->pCtx != NULL) {
1472
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1473
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1474

1475 1476 1477
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1478

1479 1480 1481 1482
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
      tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
    }
1483

1484 1485 1486
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1487

H
Haojun Liao 已提交
1488
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1489

H
hjxilinx 已提交
1490
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1491
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1492
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1493

1494 1495 1496
  pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
}

1497
static bool isQueryKilled(SQInfo *pQInfo) {
1498
  return (pQInfo->code == TSDB_CODE_TSC_QUERY_CANCELLED);
1499 1500
}

1501
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
H
hjxilinx 已提交
1502

H
hjxilinx 已提交
1503
static bool isFixedOutputQuery(SQuery *pQuery) {
1504 1505 1506
  if (pQuery->intervalTime != 0) {
    return false;
  }
1507

1508 1509 1510 1511
  // Note:top/bottom query is fixed output query
  if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    return true;
  }
1512

1513
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1514
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1515

1516 1517
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1518
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1519 1520
      continue;
    }
1521

1522 1523 1524
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1525

1526 1527 1528 1529
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1530

1531 1532 1533
  return false;
}

1534
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1535
static bool isPointInterpoQuery(SQuery *pQuery) {
1536
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1537
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1538
    if (functionID == TSDB_FUNC_INTERP) {
1539 1540 1541
      return true;
    }
  }
1542

1543 1544 1545 1546
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1547
static bool isSumAvgRateQuery(SQuery *pQuery) {
1548
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1549
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1550 1551 1552
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1553

1554 1555 1556 1557 1558
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1559

1560 1561 1562
  return false;
}

H
hjxilinx 已提交
1563
static bool isFirstLastRowQuery(SQuery *pQuery) {
1564
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1565
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1566 1567 1568 1569
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1570

1571 1572 1573
  return false;
}

H
hjxilinx 已提交
1574
static bool needReverseScan(SQuery *pQuery) {
1575
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1576
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1577 1578 1579
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1580

1581
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1582 1583
      return true;
    }
1584 1585 1586 1587 1588

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1589
  }
1590

1591 1592
  return false;
}
H
hjxilinx 已提交
1593 1594 1595

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1596 1597 1598 1599 1600
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1601 1602 1603
      return false;
    }
  }
1604

H
hjxilinx 已提交
1605 1606 1607
  return true;
}

1608 1609
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1610
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *realWin, STimeWindow *win) {
1611
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
1612

1613
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision);
1614

1615 1616 1617 1618 1619 1620
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    /*
     * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
     * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
     */
    assert(keyLast - keyFirst < pQuery->intervalTime);
1621

H
Haojun Liao 已提交
1622 1623
    realWin->skey = keyFirst;
    realWin->ekey = keyLast;
1624

1625 1626 1627
    win->ekey = INT64_MAX;
    return;
  }
1628

1629
  win->ekey = win->skey + pQuery->intervalTime - 1;
1630

H
Haojun Liao 已提交
1631 1632
  realWin->skey = (win->skey < keyFirst)? keyFirst : win->skey;
  realWin->ekey = (win->ekey < keyLast) ? win->ekey : keyLast;
1633 1634 1635 1636
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1637
    pQuery->checkBuffer = 0;
1638
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1639
    pQuery->checkBuffer = 0;
1640 1641
  } else {
    bool hasMultioutput = false;
1642
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1643
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1644 1645 1646
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1647

1648 1649 1650 1651 1652
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1653

1654
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1655 1656 1657 1658 1659 1660
  }
}

/*
 * todo add more parameters to check soon..
 */
1661
bool colIdCheck(SQuery *pQuery) {
1662 1663
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1664
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1665
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1666 1667 1668
      return false;
    }
  }
1669
  
1670 1671 1672 1673 1674 1675
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1676
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1677
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1678

1679 1680 1681 1682
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1683

1684 1685 1686 1687
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1688

1689 1690 1691 1692 1693 1694 1695
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

1696
static void changeExecuteScanOrder(SQuery *pQuery, bool stableQuery) {
1697 1698 1699
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1700

1701 1702 1703
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
S
slguan 已提交
1704
    qTrace("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1705
           pQuery->order.order, TSDB_ORDER_DESC);
1706

1707
    pQuery->order.order = TSDB_ORDER_DESC;
1708

1709 1710
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1711

1712 1713
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1714

1715 1716
    return;
  }
1717

1718 1719
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1720
      qTrace(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1721
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1722 1723
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1724

1725
    pQuery->order.order = TSDB_ORDER_ASC;
1726 1727
    return;
  }
1728

1729 1730 1731
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1732
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1733 1734
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1735 1736
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1737

1738
      pQuery->order.order = TSDB_ORDER_ASC;
1739 1740
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1741
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1742 1743
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1744 1745
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1746

1747
      pQuery->order.order = TSDB_ORDER_DESC;
1748
    }
1749

1750
  } else {  // interval query
1751
    if (stableQuery) {
1752 1753
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1754
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1755 1756
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1757 1758
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1759

1760
        pQuery->order.order = TSDB_ORDER_ASC;
1761 1762
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1763
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1764 1765
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1766 1767
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1768

1769
        pQuery->order.order = TSDB_ORDER_DESC;
1770 1771 1772 1773 1774 1775 1776 1777
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1778

1779
  int32_t num = 0;
1780

1781 1782 1783
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
  } else if (isIntervalQuery(pQuery)) {  // time window query, allocate one page for each table
1784
    size_t s = pQInfo->groupInfo.numOfTables;
1785
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1786 1787
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1788
  }
1789

1790 1791 1792 1793
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1794
#define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pSelectExpr[1].base.arg->argValue.i64:1)
1795

H
Haojun Liao 已提交
1796 1797
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1798
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1799 1800 1801 1802
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1803

H
Haojun Liao 已提交
1804 1805 1806
  SQuery    *pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = GET_RES_BUF_PAGE_BY_ID(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
  int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
1807

H
Haojun Liao 已提交
1808
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
1809
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1810 1811 1812 1813 1814 1815
}

/**
 * decrease the refcount for each table involved in this query
 * @param pQInfo
 */
1816
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
1817
  if (pQInfo != NULL) {
1818
    //    assert(taosHashGetSize(pQInfo->groupInfo) >= 1);
1819 1820 1821
  }

#if 0
1822
  if (pQInfo == NULL || pQInfo->groupInfo.numOfTables == 1) {
1823
    atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
S
slguan 已提交
1824
    qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
1825 1826 1827
           pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
  } else {
    int32_t num = 0;
1828 1829
    for (int32_t i = 0; i < pQInfo->groupInfo.numOfTables; ++i) {
      SMeterObj *pMeter = getMeterObj(pQInfo->groupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
1830
      atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
1831

1832
      if (pMeter->numOfQueries > 0) {
S
slguan 已提交
1833
        qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
1834 1835 1836 1837
               pMeter->meterId, pMeter->numOfQueries);
        num++;
      }
    }
1838

1839 1840 1841 1842
    /*
     * in order to reduce log output, for all meters of which numOfQueries count are 0,
     * we do not output corresponding information
     */
1843
    num = pQInfo->groupInfo.numOfTables - num;
S
slguan 已提交
1844
    qTrace("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
1845
           pQInfo->groupInfo.numOfTables, num);
1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858
  }
#endif
}

static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
                                int32_t numOfTotalPoints) {
  if (pDataStatis == NULL) {
    return true;
  }

#if 0
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1859
    int32_t                  colIndex = pFilterInfo->info.colIndex;
1860

1861 1862 1863 1864
    // this column not valid in current data block
    if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
      continue;
    }
1865

1866 1867 1868 1869
    // not support pre-filter operation on binary/nchar data type
    if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
      continue;
    }
1870

1871 1872 1873 1874
    // all points in current column are NULL, no need to check its boundary value
    if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
      continue;
    }
1875

1876 1877 1878
    if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataStatis[colIndex].min);
      float maxval = *(double *)(&pDataStatis[colIndex].max);
1879

1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
                                        (char *)&pDataStatis[colIndex].max)) {
          return true;
        }
      }
    }
  }
1894

1895
  // todo disable this opt code block temporarily
1896
  //  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1897
  //    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
1898 1899 1900 1901
  //    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
  //      return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
  //    }
  //  }
1902

1903 1904 1905 1906 1907 1908 1909
#endif
  return true;
}

// previous time window may not be of the same size of pQuery->intervalTime
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1910

1911 1912 1913 1914
  pTimeWindow->skey += (pQuery->slidingTime * factor);
  pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
}

H
hjxilinx 已提交
1915
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis) {
1916
  SQuery *pQuery = pRuntimeEnv->pQuery;
1917 1918 1919 1920

  uint32_t r = 0;
  SArray * pDataBlock = NULL;

1921 1922 1923
  if (pQuery->numOfFilterCols > 0) {
    r = BLK_DATA_ALL_NEEDED;
  } else {
1924
    // check if this data block is required to load
1925
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1926 1927 1928 1929
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
      
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
1930
      r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
1931
    }
1932

1933 1934 1935 1936
    if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) {
      r |= BLK_DATA_ALL_NEEDED;
    }
  }
1937

1938
  if (r == BLK_DATA_NO_NEEDED) {
1939
    qTrace("QInfo:%p data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
1940
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
1941 1942
    pRuntimeEnv->summary.discardBlocks += 1;
  } else if (r == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
1943
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
1944
      //        return DISK_DATA_LOAD_FAILED;
1945
    }
1946 1947 1948 1949
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
hjxilinx 已提交
1950
      pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
1951
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
1952 1953 1954
    }
  } else {
    assert(r == BLK_DATA_ALL_NEEDED);
1955 1956 1957
  
    // load the data block statistics to perform further filter
    pRuntimeEnv->summary.loadBlockStatis +=1;
H
hjxilinx 已提交
1958
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
1959
    }
1960 1961
    
    if (!needToLoadDataBlock(pQuery,*pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
1962
#if defined(_DEBUG_VIEW)
1963
      qTrace("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
1964
#endif
1965 1966
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
1967 1968
      //        return DISK_DATA_DISCARDED;
    }
1969
  
1970
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
1971
    pRuntimeEnv->summary.loadBlocks += 1;
H
hjxilinx 已提交
1972
    pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
1973
  }
1974

1975 1976 1977
  return pDataBlock;
}

H
hjxilinx 已提交
1978
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
1979
  int32_t midPos = -1;
H
Haojun Liao 已提交
1980
  int32_t numOfRows;
1981

1982 1983 1984
  if (num <= 0) {
    return -1;
  }
1985

1986
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
1987 1988

  TSKEY * keyList = (TSKEY *)pValue;
1989
  int32_t firstPos = 0;
1990
  int32_t lastPos = num - 1;
1991

1992
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
1993 1994 1995 1996 1997
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
1998

H
Haojun Liao 已提交
1999 2000
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2001

H
hjxilinx 已提交
2002 2003 2004 2005 2006 2007 2008 2009
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2010

H
hjxilinx 已提交
2011 2012 2013 2014 2015
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2016

H
hjxilinx 已提交
2017 2018 2019 2020 2021 2022 2023
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2024

H
Haojun Liao 已提交
2025 2026
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2027

H
hjxilinx 已提交
2028 2029 2030 2031 2032 2033 2034 2035 2036
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2037

H
hjxilinx 已提交
2038 2039 2040
  return midPos;
}

2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

  qTrace("QInfo:%p realloc output buffer to inc output buffer from: %d rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (!isIntervalQuery(pQuery) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isFixedOutputQuery(pQuery)) {
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2081 2082
        assert(bytes > 0 && newSize > 0);

2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
      qTrace("QInfo:%p realloc output buffer, new size: %d rows, old:%d, remain:%d", GET_QINFO_ADDR(pRuntimeEnv),
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2107 2108
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2109
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2110
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2111

S
slguan 已提交
2112
  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2113 2114
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2115

2116
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
2117
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2118
    summary->totalBlocks += 1;
2119
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
2120
      return 0;
2121
    }
2122

2123
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
2124

2125
    // todo extract methods
H
Haojun Liao 已提交
2126
    if (isIntervalQuery(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2127
      STimeWindow realWin = TSWINDOW_INITIALIZER, w = TSWINDOW_INITIALIZER;
2128 2129
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2130
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2131
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
2132 2133 2134 2135
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2136
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
2137

H
hjxilinx 已提交
2138
        pWindowResInfo->startTime = pQuery->window.skey;
2139 2140
        pWindowResInfo->prevSKey = w.skey;
      }
2141 2142 2143 2144
      
      if (pRuntimeEnv->pFillInfo != NULL) {
        pRuntimeEnv->pFillInfo->start = w.skey;
      }
2145
    }
2146

H
hjxilinx 已提交
2147
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2148
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2149

2150
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2151
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
2152

H
Haojun Liao 已提交
2153 2154
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2155
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2156

H
Haojun Liao 已提交
2157
    summary->totalRows += blockInfo.rows;
2158 2159
    qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2160

2161 2162
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2163
      break;
2164 2165
    }
  }
2166

H
hjxilinx 已提交
2167
  // if the result buffer is not full, set the query complete
2168 2169 2170
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2171

2172
  if (isIntervalQuery(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2173
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2174 2175
      int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;

2176
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2177
      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2178
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2179 2180 2181 2182
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2183

2184
  return 0;
2185 2186 2187 2188 2189 2190
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
H
[td-90]  
Haojun Liao 已提交
2191 2192 2193
static void doSetTagValueInParam(void *tsdb, STableId* pTableId, int32_t tagColId, tVariant *tag, int16_t type,
    int16_t bytes) {
  tVariantDestroy(tag);
2194

2195
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
H
[td-90]  
Haojun Liao 已提交
2196 2197 2198 2199
    char* val = tsdbGetTableName(tsdb, pTableId);
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2200
  } else {
H
[td-90]  
Haojun Liao 已提交
2201 2202 2203 2204 2205
    char* val = tsdbGetTableTagVal(tsdb, pTableId, tagColId, type, bytes);
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2206 2207
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
2208
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2209
    } else {
H
[td-90]  
Haojun Liao 已提交
2210
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2211
    }
2212
  }
2213 2214
}

H
hjxilinx 已提交
2215
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, STableId* pTableId, void *tsdb) {
2216
  SQuery *pQuery = pRuntimeEnv->pQuery;
2217

H
[td-90]  
Haojun Liao 已提交
2218 2219 2220 2221 2222 2223
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
  
    assert(pExprInfo->base.numOfParams == 1);
    doSetTagValueInParam(tsdb, pTableId, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag,
                         pExprInfo->type, pExprInfo->bytes);
2224 2225
  } else {
    // set tag value, by which the results are aggregated.
2226
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
[td-90]  
Haojun Liao 已提交
2227 2228
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[idx];
  
2229
      // ts_comp column required the tag value for join filter
H
[td-90]  
Haojun Liao 已提交
2230
      if (!TSDB_COL_IS_TAG(pExprInfo->base.colInfo.flag)) {
2231 2232
        continue;
      }
2233

2234
      // todo use tag column index to optimize performance
H
[td-90]  
Haojun Liao 已提交
2235 2236
      doSetTagValueInParam(tsdb, pTableId, pExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
          pExprInfo->type, pExprInfo->bytes);
2237
    }
2238

2239
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2240
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2241
    if (pFuncMsg->functionId == TSDB_FUNC_TS && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2242 2243
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
2244 2245
      assert(0);  // to do fix me
      //      doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag);
2246 2247 2248 2249 2250 2251 2252
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2253

2254
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2255
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2256 2257 2258
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2259

2260 2261 2262
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2263

2264 2265 2266
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2267

2268 2269 2270
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2271 2272 2273 2274 2275 2276 2277 2278
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2279 2280
    }
  }
2281

2282
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2283
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2284 2285 2286
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2287

2288 2289 2290 2291
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2292
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2361
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2362
  SQuery* pQuery = pRuntimeEnv->pQuery;
2363
  int32_t numOfCols = pQuery->numOfOutput;
2364
  printf("super table query intermediate result, total:%d\n", numOfRows);
2365

2366 2367
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2368
      
2369
      switch (pQuery->pSelectExpr[i].type) {
2370
        case TSDB_DATA_TYPE_BINARY: {
2371 2372 2373 2374 2375
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2376
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2377 2378 2379 2380 2381
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2382
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2383 2384
          break;
        case TSDB_DATA_TYPE_INT:
2385
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2386 2387
          break;
        case TSDB_DATA_TYPE_FLOAT:
2388
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2389 2390
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2391
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2392 2393 2394 2395 2396 2397 2398 2399
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2400 2401 2402
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2403 2404 2405 2406 2407
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2408

2409 2410
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2411

2412 2413
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2414

2415 2416 2417 2418
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2419

2420 2421 2422 2423
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2424

H
hjxilinx 已提交
2425
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2426
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2427

2428 2429
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2430

H
hjxilinx 已提交
2431
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2432
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2433

2434 2435
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2436

2437 2438 2439
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2440

2441 2442 2443
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2444
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2445
  int64_t st = taosGetTimestampMs();
2446
  int32_t ret = TSDB_CODE_SUCCESS;
2447

2448
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
2449

2450
  while (pQInfo->groupIndex < numOfGroups) {
2451
    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
2452
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2453 2454 2455 2456
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2457
    pQInfo->groupIndex += 1;
2458 2459

    // this group generates at least one result, return results
2460 2461 2462
    if (ret > 0) {
      break;
    }
2463 2464

    assert(pQInfo->numOfGroupResultPages == 0);
H
hjxilinx 已提交
2465
    qTrace("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2466
  }
2467

2468 2469
  qTrace("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%lldms", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2470

2471 2472 2473 2474 2475 2476
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2477

2478
    // current results of group has been sent to client, try next group
2479
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2480 2481
      return;  // failed to save data in the disk
    }
2482

2483 2484 2485 2486 2487 2488
    // check if all results has been sent to client
    int32_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
      pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;  // set query completed
      return;
    }
2489
  }
2490 2491

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2492
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2493

2494
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2495
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2496

2497 2498
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
H
Haojun Liao 已提交
2499
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[i]);
2500
    total += pData->num;
2501
  }
2502

2503
  int32_t rows = total;
2504

2505 2506
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
H
Haojun Liao 已提交
2507
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[num]);
2508

2509
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2510
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2511
      char *  pDest = pQuery->sdata[i]->data;
2512

2513 2514
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2515
    }
2516

2517
    offset += pData->num;
2518
  }
2519

2520
  assert(pQuery->rec.rows == 0);
2521

2522
  pQuery->rec.rows += rows;
2523 2524 2525
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2526 2527
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
//  int64_t maxOutput = 0;
2528
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2529
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2530

2531 2532 2533 2534 2535 2536 2537
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2538

2539
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2540
    assert(pResultInfo != NULL);
2541

H
Haojun Liao 已提交
2542 2543
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2544
    }
H
Haojun Liao 已提交
2545 2546 2547 2548 2549 2550 2551 2552 2553
//    if (pResultInfo != NULL && maxOutput < pResultInfo->numOfRes) {
//      maxOutput = pResultInfo->numOfRes;
//
//      if (maxOutput > 0) {
//        break;
//      }
//    }
//
//    assert(pResultInfo != NULL);
2554
  }
2555

H
Haojun Liao 已提交
2556
  return 0;
2557 2558
}

2559
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2560
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2561
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2562

2563
  size_t size = taosArrayGetSize(pGroup);
2564

2565
  tFilePage **buffer = pQuery->sdata;
2566 2567
  int32_t *   posList = calloc(size, sizeof(int32_t));

H
hjxilinx 已提交
2568
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2569

2570
  // todo opt for the case of one table per group
2571
  int32_t numOfTables = 0;
2572
  for (int32_t i = 0; i < size; ++i) {
H
hjxilinx 已提交
2573 2574
    SGroupItem *item = taosArrayGet(pGroup, i);
    STableQueryInfo *pInfo = item->info;
2575

H
hjxilinx 已提交
2576 2577
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, pInfo->id.tid);
    if (list.size > 0 && pInfo->windowResInfo.size > 0) {
2578
      pTableList[numOfTables] = pInfo;
2579
      numOfTables += 1;
2580 2581
    }
  }
2582

2583
  if (numOfTables == 0) {
2584 2585
    tfree(posList);
    tfree(pTableList);
2586

2587 2588
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2589
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2590

2591
  }
2592

2593
  SCompSupporter cs = {pTableList, posList, pQInfo};
2594

2595
  SLoserTreeInfo *pTree = NULL;
2596
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2597

2598
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
2599 2600
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2601

2602 2603
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2604

2605 2606
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2607

H
hjxilinx 已提交
2608
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2609
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2610

2611 2612
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2613

2614
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2615
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2616 2617
    if (num <= 0) {
      cs.position[pos] += 1;
2618

2619 2620
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2621

2622
        // all input sources are exhausted
2623
        if (--numOfTables == 0) {
2624 2625 2626 2627 2628 2629 2630
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2631
        if (buffer[0]->num == pQuery->rec.capacity) {
2632 2633 2634
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2635

2636 2637
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2638

2639
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2640
        buffer[0]->num += 1;
2641
      }
2642

2643
      lastTimestamp = ts;
2644

2645 2646 2647
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2648

2649
        // all input sources are exhausted
2650
        if (--numOfTables == 0) {
2651 2652 2653 2654
          break;
        }
      }
    }
2655

2656 2657
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2658

2659
  if (buffer[0]->num != 0) {  // there are data in buffer
2660
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2661
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2662

2663 2664 2665 2666
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2667

2668 2669 2670
      return -1;
    }
  }
2671

2672 2673 2674
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2675
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2676
#endif
2677

H
Haojun Liao 已提交
2678
  qTrace("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2679

2680 2681
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2682
  tfree(pTree);
2683

2684
  pQInfo->offset = 0;
2685
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2686 2687
    tfree(pResultInfo[i].interResultBuf);
  }
2688

2689 2690 2691 2692 2693
  tfree(pResultInfo);
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2694 2695 2696
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2697
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2698
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2699

2700 2701
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2702

2703
  int32_t remain = pQuery->sdata[0]->num;
2704
  int32_t offset = 0;
2705

2706 2707 2708 2709 2710
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2711

2712
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2713
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2714

2715
    // pagewise copy to dest buffer
2716
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2717
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2718
      buf->num = r;
2719

2720 2721
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2722
    }
2723

2724 2725 2726
    offset += r;
    remain -= r;
  }
2727

2728 2729 2730 2731 2732
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2733
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2734
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2735 2736 2737
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2738

2739
    pQuery->sdata[k]->num = 0;
2740 2741 2742
  }
}

2743 2744 2745 2746 2747 2748 2749
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2750 2751 2752 2753 2754 2755 2756
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2770 2771 2772 2773 2774
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2775

2776
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2777

2778
    // open/close the specified query for each group result
2779
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2780
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2781

2782 2783
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2784 2785 2786 2787 2788 2789 2790 2791
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2792 2793
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2794
  SQuery *pQuery = pRuntimeEnv->pQuery;
2795
  int32_t order = pQuery->order.order;
2796

2797 2798 2799
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
2800
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2801
  } else {  // for simple result of table query,
2802
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2803
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2804

2805
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2806 2807 2808
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2809

2810 2811
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2812 2813 2814 2815 2816 2817
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829
  
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
      SGroupItem *item = taosArrayGet(group, j);
      updateTableQueryInfoForReverseScan(pQuery, item->info);
    }
  }
2830 2831
}

2832
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2833
  SQuery *pQuery = pRuntimeEnv->pQuery;
2834
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2835
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2836 2837 2838 2839
  }
}

void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
2840
  int32_t numOfCols = pQuery->numOfOutput;
2841

2842 2843
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2844

2845 2846 2847 2848 2849 2850
  // set the intermediate result output buffer
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2851

2852
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2853 2854
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2855

2856 2857 2858 2859 2860 2861
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2862

2863
    // set the timestamp output buffer for top/bottom/diff query
2864
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2865 2866 2867
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2868

2869
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2870
  }
2871

2872 2873 2874 2875 2876
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2877

2878
  // reset the execution contexts
2879
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2880
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2881
    assert(functionId != TSDB_FUNC_DIFF);
2882

2883 2884 2885 2886
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
2887

2888 2889 2890 2891 2892 2893 2894 2895 2896 2897
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
2898

2899 2900 2901 2902 2903 2904
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2905

2906
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2907
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2908
    pRuntimeEnv->pCtx[j].currentStage = 0;
2909

H
Haojun Liao 已提交
2910 2911 2912 2913
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
2914

2915 2916 2917 2918
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

2919
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
2920
  SQuery *pQuery = pRuntimeEnv->pQuery;
2921
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
2922 2923
    return;
  }
2924

2925
  if (pQuery->rec.rows <= pQuery->limit.offset) {
2926 2927 2928
    qTrace("QInfo:%p skip rows:%d, new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
        pQuery->limit.offset - pQuery->rec.rows);
    
2929 2930
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
2931

2932
    resetCtxOutputBuf(pRuntimeEnv);
2933

H
Haojun Liao 已提交
2934
    // clear the buffer full flag if exists
2935
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
2936
  } else {
2937
    int64_t numOfSkip = pQuery->limit.offset;
2938
    pQuery->rec.rows -= numOfSkip;
2939 2940 2941 2942 2943
    pQuery->limit.offset = 0;
  
    qTrace("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
           0, pQuery->rec.rows);
    
2944
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2945
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2946
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2947
      
H
Haojun Liao 已提交
2948 2949
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
2950

2951
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
2952
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
2953 2954
      }
    }
2955

2956
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
2957 2958 2959 2960 2961 2962 2963 2964
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
2965
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
2966 2967 2968 2969 2970 2971
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2972

H
hjxilinx 已提交
2973
  bool toContinue = false;
2974 2975 2976
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2977

2978 2979 2980 2981 2982
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
2983

2984
      setWindowResOutputBuf(pRuntimeEnv, pResult);
2985

2986
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2987
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
2988 2989 2990
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
2991

2992 2993
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
2994

2995 2996 2997 2998
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
2999
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3000
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3001 3002 3003
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3004

3005 3006
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3007

3008 3009 3010
      toContinue |= (!pResInfo->complete);
    }
  }
3011

3012 3013 3014
  return toContinue;
}

H
Haojun Liao 已提交
3015
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3016
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3017 3018
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3019 3020 3021
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3022
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3023
      .status      = pQuery->status,
3024
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3025
      .lastKey     = start,
H
hjxilinx 已提交
3026
      .w           = pQuery->window,
H
Haojun Liao 已提交
3027
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3028
  };
3029

3030 3031 3032
  return info;
}

3033 3034 3035 3036
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3037 3038 3039 3040 3041
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3042

3043
  // reverse order time range
3044 3045 3046
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3047
  SWITCH_ORDER(pQuery->order.order);
3048
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3049

3050
  STsdbQueryCond cond = {
3051
      .twindow = pQuery->window,
H
hjxilinx 已提交
3052
      .order   = pQuery->order.order,
3053
      .colList = pQuery->colList,
3054 3055
      .numOfCols = pQuery->numOfCols,
  };
3056

3057 3058 3059 3060
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3061

H
Haojun Liao 已提交
3062
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
3063

3064 3065
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3066
  disableFuncInReverseScan(pQInfo);
3067 3068
}

3069 3070
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3071
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3072

3073 3074
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3075

3076 3077 3078 3079
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3080

3081
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3082

3083 3084
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3085
  pTableQueryInfo->lastKey = pStatus->lastKey;
3086
  pQuery->status = pStatus->status;
3087
  
H
hjxilinx 已提交
3088
  pTableQueryInfo->win = pStatus->w;
3089
  pQuery->window = pTableQueryInfo->win;
3090 3091
}

3092
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3093
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3094
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3095 3096
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3097
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3098

3099
  // store the start query position
H
Haojun Liao 已提交
3100
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3101

3102 3103
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3104

3105 3106
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3107

3108 3109
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3110
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3111
      qstatus.lastKey = pTableQueryInfo->lastKey;
3112
    }
3113

3114
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3115
      // restore the status code and jump out of loop
3116
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3117
        pQuery->status = qstatus.status;
3118
      }
3119

3120 3121
      break;
    }
3122

3123
    STsdbQueryCond cond = {
3124
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3125
        .order   = pQuery->order.order,
3126
        .colList = pQuery->colList,
3127
        .numOfCols = pQuery->numOfCols,
3128
    };
3129

3130 3131
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3132
    }
3133

H
Haojun Liao 已提交
3134
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
3135
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3136

3137 3138
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3139 3140 3141
    
    qTrace("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
        cond.twindow.skey, cond.twindow.ekey);
3142

3143
    // check if query is killed or not
3144
    if (isQueryKilled(pQInfo)) {
3145 3146 3147
      return;
    }
  }
3148

H
hjxilinx 已提交
3149
  if (!needReverseScan(pQuery)) {
3150 3151
    return;
  }
3152

3153
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3154

3155
  // reverse scan from current position
3156
  qTrace("QInfo:%p start to reverse scan", pQInfo);
3157
  doScanAllDataBlocks(pRuntimeEnv);
3158 3159

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3160 3161
}

H
hjxilinx 已提交
3162
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3163
  SQuery *pQuery = pRuntimeEnv->pQuery;
3164

3165 3166 3167 3168 3169 3170
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      closeAllTimeWindow(pWindowResInfo);
    }
3171

3172 3173 3174 3175 3176
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3177

3178
      setWindowResOutputBuf(pRuntimeEnv, buf);
3179

3180
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3181
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3182
      }
3183

3184 3185 3186 3187 3188 3189
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3190

3191
  } else {
3192
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3193
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3194 3195 3196 3197 3198
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3199
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3200
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3201

3202 3203 3204 3205
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3206

3207 3208 3209
  return false;
}

weixin_48148422's avatar
weixin_48148422 已提交
3210 3211 3212 3213 3214
static STableQueryInfo *createTableQueryInfo(
  SQueryRuntimeEnv *pRuntimeEnv,
  STableId tableId,
  STimeWindow win
) {
3215
  STableQueryInfo *pTableQueryInfo = calloc(1, sizeof(STableQueryInfo));
3216

H
hjxilinx 已提交
3217 3218
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3219

H
hjxilinx 已提交
3220
  pTableQueryInfo->id = tableId;
3221
  pTableQueryInfo->cur.vgroupIndex = -1;
3222

3223 3224 3225 3226
  initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, 100, 100, TSDB_DATA_TYPE_INT);
  return pTableQueryInfo;
}

3227
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3228 3229 3230
  if (pTableQueryInfo == NULL) {
    return;
  }
3231

3232 3233 3234 3235
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
  free(pTableQueryInfo);
}

3236
void setCurrentQueryTable(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3237
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3238 3239 3240 3241
  pQuery->current = pTableQueryInfo;
  
  assert(((pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) && QUERY_IS_ASC_QUERY(pQuery)) ||
         ((pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) && !QUERY_IS_ASC_QUERY(pQuery)));
3242 3243 3244 3245 3246
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3247
 * @param pDataBlockInfo
3248
 */
3249
void setExecutionContext(SQInfo *pQInfo, STableId* pTableId, int32_t groupIndex, TSKEY nextKey) {
3250
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
3251 3252
  STableQueryInfo *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  
3253 3254
  SWindowResInfo *  pWindowResInfo = &pRuntimeEnv->windowResInfo;
  int32_t           GROUPRESULTID = 1;
3255

3256
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex, sizeof(groupIndex));
3257 3258 3259
  if (pWindowRes == NULL) {
    return;
  }
3260

3261 3262 3263 3264 3265 3266 3267 3268 3269 3270
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3271

3272 3273
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
3274

3275
  pTableQueryInfo->lastKey = nextKey;
H
hjxilinx 已提交
3276
  setAdditionalInfo(pQInfo, pTableId, pTableQueryInfo);
3277 3278
}

H
Haojun Liao 已提交
3279
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3280
  SQuery *pQuery = pRuntimeEnv->pQuery;
3281

3282
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3283
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3284 3285
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3286

3287
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3288 3289 3290
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3291

3292 3293 3294 3295 3296
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3297

3298 3299 3300 3301 3302 3303
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3304 3305
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3306

H
Haojun Liao 已提交
3307 3308 3309 3310 3311 3312 3313 3314
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
    if (pCtx->resultInfo->complete) {
      continue;
    }
3315

H
Haojun Liao 已提交
3316 3317
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3318

H
Haojun Liao 已提交
3319 3320 3321 3322
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3323

H
Haojun Liao 已提交
3324 3325 3326 3327 3328
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3329

H
Haojun Liao 已提交
3330 3331 3332 3333 3334 3335
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

H
hjxilinx 已提交
3336
int32_t setAdditionalInfo(SQInfo *pQInfo, STableId* pTableId, STableQueryInfo *pTableQueryInfo) {
3337
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3338
  assert(pTableQueryInfo->lastKey >= TSKEY_INITIAL_VAL);
3339

H
hjxilinx 已提交
3340
  setTagVal(pRuntimeEnv, pTableId, pQInfo->tsdb);
3341

3342 3343
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3344
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3345
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3346

3347
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3348

3349 3350 3351 3352 3353 3354
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3355

3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3368
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3369 3370
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3371 3372
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3373 3374 3375
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3376
    pTableQueryInfo->win.skey = key;
3377
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3378

3379 3380 3381 3382 3383
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3384

3385 3386 3387 3388 3389 3390
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3391
    STimeWindow     w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3392
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3393

H
Haojun Liao 已提交
3394 3395
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3396
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &realWin, &w);
3397
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3398

3399 3400
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3401
        assert(win.ekey == pQuery->window.ekey);
3402
      }
3403 3404
      
      pWindowResInfo->prevSKey = w.skey;
3405
    }
3406

3407
    pTableQueryInfo->queryRangeSet = 1;
3408
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3409 3410 3411 3412
  }
}

bool requireTimestamp(SQuery *pQuery) {
3413
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3414
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3428 3429 3430
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3431 3432
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3433 3434 3435 3436 3437
  return loadPrimaryTS;
}

static int32_t getNumOfSubset(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3438

3439 3440 3441 3442
  int32_t totalSubset = 0;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (isIntervalQuery(pQuery))) {
    totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  } else {
3443
    totalSubset = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
3444
  }
3445

3446 3447 3448 3449 3450 3451
  return totalSubset;
}

static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3452

3453 3454 3455
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3456

3457
  qTrace("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3458
  int32_t totalSubset = getNumOfSubset(pQInfo);
3459

3460
  if (orderType == TSDB_ORDER_ASC) {
3461
    startIdx = pQInfo->groupIndex;
3462 3463
    step = 1;
  } else {  // desc order copy all data
3464
    startIdx = totalSubset - pQInfo->groupIndex - 1;
3465 3466
    step = -1;
  }
3467

3468 3469 3470
  for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3471
      pQInfo->groupIndex += 1;
3472 3473
      continue;
    }
3474

3475
    assert(result[i].numOfRows >= 0 && pQInfo->offset <= 1);
3476

3477 3478
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3479

3480 3481 3482 3483
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3484 3485 3486 3487 3488
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3489
      pQInfo->groupIndex += 1;
3490
    }
3491

3492
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3493
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3494

3495 3496 3497 3498
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3499

3500
    numOfResult += numOfRowsToCopy;
3501 3502 3503
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3504
  }
3505

S
slguan 已提交
3506
  qTrace("QInfo:%p copy data to query buf completed", pQInfo);
3507 3508

#ifdef _DEBUG_VIEW
3509
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3525

3526
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3527
  int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
3528

3529
  pQuery->rec.rows += numOfResult;
3530

3531
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3532 3533
}

3534
static UNUSED_FUNC void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3535
  SQuery *pQuery = pRuntimeEnv->pQuery;
3536

3537 3538
  // update the number of result for each, only update the number of rows for the corresponding window result.
  if (pQuery->intervalTime == 0) {
3539

3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550
    for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
      SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
        if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
          continue;
        }

        pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
      }
3551
    }
3552 3553 3554 3555 3556 3557 3558 3559

//    int32_t g = pTableQueryInfo->groupIndex;
//    assert(pRuntimeEnv->windowResInfo.size > 0);
//
//    SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&g, sizeof(g));
//    if (pWindowRes->numOfRows == 0) {
//      pWindowRes->numOfRows = getNumOfResult(pRuntimeEnv);
//    }
3560 3561 3562
  }
}

3563 3564
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3565
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3566 3567
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3568
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3569
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3570

3571
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
3572
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3573
  } else {
3574
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3575
  }
3576

H
hjxilinx 已提交
3577
  updateWindowResNumOfRes(pRuntimeEnv, pTableQueryInfo);
3578 3579
}

3580 3581 3582 3583
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
  
3584 3585
  // todo refactor
  if (pQuery->fillType == TSDB_FILL_NONE || (pQuery->fillType != TSDB_FILL_NONE && isPointInterpoQuery(pQuery))) {
3586
    assert(pFillInfo == NULL);
3587 3588
    return false;
  }
3589

3590
  if (pQuery->limit.limit > 0 && pQuery->rec.rows >= pQuery->limit.limit) {
3591 3592
    return false;
  }
3593

3594 3595 3596
  // There are results not returned to client, fill operation applied to the remain result set in the
  // first place is required.
  int32_t remain = taosNumOfRemainRows(pFillInfo);
3597 3598 3599 3600
  if (remain > 0) {
    return true;
  }
  
3601
  /*
3602
   * While the code reaches here, there are no results returned to client now.
3603 3604 3605 3606 3607 3608 3609 3610
   * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
   * is retrieved from TSDB.
   *
   * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
   * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
   * first result row in the actual result set will fill nothing.
   */
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
3611
    int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
3612
    return numOfTotal > 0;
3613
  }
3614 3615

  return false;
3616 3617 3618
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3619
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3620 3621
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3622

3623 3624 3625
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3626

weixin_48148422's avatar
weixin_48148422 已提交
3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3639 3640
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3641
    if (pQInfo->runtimeEnv.stableQuery) {
3642 3643 3644 3645
      if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3646 3647 3648
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3649
    }
H
hjxilinx 已提交
3650
  }
3651 3652
}

H
Haojun Liao 已提交
3653
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfInterpo) {
3654
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3655
  SQuery *pQuery = pRuntimeEnv->pQuery;
3656 3657
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3658
  while (1) {
3659
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3660
    
3661
    // todo apply limit output function
3662 3663
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3664
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3665 3666
      return ret;
    }
3667

3668
    if (pQuery->limit.offset < ret) {
3669 3670 3671
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%d. Discard due to offset, remain:%d, new offset:%d",
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3672 3673 3674
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3675 3676 3677
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3678
      }
3679
      
3680 3681 3682
      pQuery->limit.offset = 0;
      return ret;
    } else {
3683 3684 3685 3686
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%d. Discard due to offset, "
             "remain:%d, new offset:%d", pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
          pQuery->limit.offset - ret);
      
3687
      pQuery->limit.offset -= ret;
3688
      pQuery->rec.rows = 0;
3689 3690
      ret = 0;
    }
3691 3692

    if (!queryHasRemainResults(pRuntimeEnv)) {
3693 3694 3695 3696 3697
      return ret;
    }
  }
}

3698
static void queryCostStatis(SQInfo *pQInfo) {
3699
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
//  qTrace("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
//  qTrace("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
//  qTrace(
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3718
  
3719 3720 3721
  qTrace("QInfo:%p :cost summary: elpased time:%"PRId64" us, total blocks:%d, use block statis:%d, use block data:%d, "
         "total rows:%"PRId64 ", check rows:%"PRId64, pQInfo, pSummary->elapsedTime, pSummary->totalBlocks,
         pSummary->loadBlockStatis, pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3722 3723 3724 3725 3726 3727 3728 3729

//  qTrace("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
//
//  qTrace("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qTrace("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3730 3731
  
  // todo add the intermediate result save cost!!
3732 3733 3734 3735 3736 3737 3738 3739
//  double computing = total - io;
//
//  qTrace(
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3740 3741
}

3742 3743
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3744 3745
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3746
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3747

3748
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3749
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3750 3751 3752
    pQuery->limit.offset = 0;
    return;
  }
3753

3754 3755 3756 3757 3758
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3759

3760
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3761

3762
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3763
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3764

3765
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3766
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3767 3768

  // update the offset value
H
hjxilinx 已提交
3769
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3770
  pQuery->limit.offset = 0;
3771

H
hjxilinx 已提交
3772
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3773

3774 3775
  qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3776
}
3777

3778 3779 3780 3781 3782
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3783
  }
3784

3785 3786 3787
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3788
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3789
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3790

3791 3792 3793
  while (tsdbNextDataBlock(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      return;
3794
    }
3795

3796
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
3797

3798 3799
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3800 3801
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3802

3803
      qTrace("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3804 3805
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3806 3807 3808
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3809
  }
3810
}
3811

H
Haojun Liao 已提交
3812
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3813
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3814
  *start = pQuery->current->lastKey;
3815

3816
  // if queried with value filter, do NOT forward query start position
3817
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3818
    return true;
3819
  }
3820

3821 3822 3823 3824 3825
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3826
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3827

H
Haojun Liao 已提交
3828
  STimeWindow w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3829
  
3830
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3831
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3832

3833 3834
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle);
3835

H
Haojun Liao 已提交
3836 3837
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3838
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
H
Haojun Liao 已提交
3839 3840 3841
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3842
    } else {
H
Haojun Liao 已提交
3843
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
3844

3845 3846 3847
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3848

3849 3850
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3851

3852 3853 3854 3855 3856 3857
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3858

3859 3860
      STimeWindow tw = win;
      getNextTimeWindow(pQuery, &tw);
3861

3862
      if (pQuery->limit.offset == 0) {
3863 3864
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3865 3866
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3867 3868 3869
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

3870 3871 3872
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
3873 3874 3875 3876
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
3877 3878 3879 3880 3881 3882
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
3883
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
3884 3885
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
3886
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
3887 3888
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
3889 3890 3891
          qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
3892
          return true;
H
Haojun Liao 已提交
3893 3894 3895 3896
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
3897
          return true;
3898 3899 3900
        }
      }

H
Haojun Liao 已提交
3901 3902 3903 3904 3905 3906 3907
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
3920
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
3921 3922
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
3923
      } else {
H
Haojun Liao 已提交
3924
        break;  // offset is not 0, and next time window begins or ends in the next block.
3925 3926 3927
      }
    }
  }
3928

3929 3930 3931
  return true;
}

B
Bomin Zhang 已提交
3932 3933
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3934 3935
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
3936 3937 3938 3939 3940 3941 3942
  if (onlyQueryTags(pQuery)) {
    return;
  }

  if (isSTableQuery && (!isIntervalQuery(pQuery)) && (!isFixedOutputQuery(pQuery))) {
    return;
  }
3943 3944

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
3945 3946 3947 3948
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
3949
  };
weixin_48148422's avatar
weixin_48148422 已提交
3950

B
Bomin Zhang 已提交
3951 3952 3953 3954 3955 3956 3957 3958 3959 3960
  if (!isSTableQuery
    && (pQInfo->groupInfo.numOfTables == 1)
    && (cond.order == TSDB_ORDER_ASC) 
    && (!isIntervalQuery(pQuery))
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
    && (!isFixedOutputQuery(pQuery))
  ) {
    SArray* pa = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
    SGroupItem* pItem = taosArrayGet(pa, 0);
    cond.twindow = pItem->info->win;
3961
  }
B
Bomin Zhang 已提交
3962

H
Haojun Liao 已提交
3963
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
3964
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
3965
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3966
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
H
Haojun Liao 已提交
3967
  } else {
H
Haojun Liao 已提交
3968
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
H
Haojun Liao 已提交
3969
  }
B
Bomin Zhang 已提交
3970 3971
}

3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
3985
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
3986 3987 3988 3989 3990 3991 3992
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

3993
int32_t doInitQInfo(SQInfo *pQInfo, void *param, void *tsdb, int32_t vgId, bool isSTableQuery) {
3994 3995
  int32_t code = TSDB_CODE_SUCCESS;
  
3996 3997 3998
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3999
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
4000 4001 4002

  setScanLimitationByResultBuffer(pQuery);
  changeExecuteScanOrder(pQuery, false);
B
Bomin Zhang 已提交
4003
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4004
  
4005
  pQInfo->tsdb = tsdb;
4006
  pQInfo->vgId = vgId;
4007 4008 4009

  pRuntimeEnv->pQuery = pQuery;
  pRuntimeEnv->pTSBuf = param;
4010
  pRuntimeEnv->cur.vgroupIndex = -1;
4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023
  pRuntimeEnv->stableQuery = isSTableQuery;

  if (param != NULL) {
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4024
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4025 4026 4027

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4028
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

      if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // group by columns not tags;
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
    }

  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    int32_t rows = getInitialPageNum(pQInfo);
4047
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

4062
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4063 4064
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, 0, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4065 4066
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4067
  }
4068

4069 4070 4071
  // todo refactor
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4072

4073
  return TSDB_CODE_SUCCESS;
4074 4075
}

4076
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4077
  SQuery *pQuery = pRuntimeEnv->pQuery;
4078

4079
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4080 4081 4082 4083 4084 4085 4086
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4087
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4088
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4089 4090
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4091
  
H
hjxilinx 已提交
4092
  int64_t st = taosGetTimestampMs();
4093

4094
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
4095
  while (tsdbNextDataBlock(pQueryHandle)) {
4096
    summary->totalBlocks += 1;
4097
    if (isQueryKilled(pQInfo)) {
4098 4099
      break;
    }
4100

4101
    SDataBlockInfo  blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
H
hjxilinx 已提交
4102
    STableQueryInfo *pTableQueryInfo = NULL;
4103

4104 4105
    // todo opt performance using hash table
    size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4106 4107 4108
    for (int32_t i = 0; i < numOfGroup; ++i) {
      SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);

4109
      size_t num = taosArrayGetSize(group);
4110
      for (int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
4111 4112
        SGroupItem *item = taosArrayGet(group, j);
        STableQueryInfo *pInfo = item->info;
4113

H
hjxilinx 已提交
4114 4115 4116
        if (pInfo->id.tid == blockInfo.tid) {
          assert(pInfo->id.uid == blockInfo.uid);
          pTableQueryInfo = item->info;
4117

4118 4119
          break;
        }
H
hjxilinx 已提交
4120
      }
4121

H
hjxilinx 已提交
4122 4123 4124
      if (pTableQueryInfo != NULL) {
        break;
      }
H
hjxilinx 已提交
4125
    }
H
hjxilinx 已提交
4126
  
4127
    assert(pTableQueryInfo != NULL);
4128
    setCurrentQueryTable(pRuntimeEnv, pTableQueryInfo);
4129

4130
    SDataStatis *pStatis = NULL;
H
hjxilinx 已提交
4131
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
4132

4133 4134 4135 4136 4137 4138 4139 4140 4141
    if (!isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      if (!isIntervalQuery(pQuery)) {
        int32_t step = QUERY_IS_ASC_QUERY(pQuery)? 1:-1;
        setExecutionContext(pQInfo, &pTableQueryInfo->id, pTableQueryInfo->groupIndex, blockInfo.window.ekey + step);
      } else {  // interval query
        TSKEY nextKey = blockInfo.window.skey;
        setIntervalQueryRange(pQInfo, nextKey);
        /*int32_t ret = */setAdditionalInfo(pQInfo, &pTableQueryInfo->id, pTableQueryInfo);
      }
4142
    }
4143

4144 4145 4146
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
H
Haojun Liao 已提交
4147
    qTrace("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4148
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4149
  }
4150

H
hjxilinx 已提交
4151 4152
  int64_t et = taosGetTimestampMs();
  return et - st;
4153 4154
}

4155 4156
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4157
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4158

4159
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4160
  SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
H
hjxilinx 已提交
4161
  SGroupItem* item = taosArrayGet(group, index);
4162

H
hjxilinx 已提交
4163
  setTagVal(pRuntimeEnv, &item->id, pQInfo->tsdb);
4164

S
slguan 已提交
4165
  qTrace("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
hjxilinx 已提交
4166
         item->id.uid, item->id.tid, item->info->lastKey, item->info->win.ekey);
4167

4168
  STsdbQueryCond cond = {
H
hjxilinx 已提交
4169 4170 4171
      .twindow   = {item->info->lastKey, item->info->win.ekey},
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4172
      .numOfCols = pQuery->numOfCols,
4173
  };
4174

H
hjxilinx 已提交
4175
  // todo refactor
4176
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
hjxilinx 已提交
4177
  SArray *tx = taosArrayInit(1, sizeof(STableId));
4178

H
hjxilinx 已提交
4179
  taosArrayPush(tx, &item->info->id);
4180
  taosArrayPush(g1, &tx);
4181
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4182

4183
  // include only current table
4184 4185 4186 4187
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4188

H
Haojun Liao 已提交
4189
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4190 4191
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4192

4193
  if (pRuntimeEnv->pTSBuf != NULL) {
4194
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4195 4196
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4197

4198 4199 4200 4201 4202 4203 4204 4205
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4206

4207
  initCtxOutputBuf(pRuntimeEnv);
4208 4209 4210 4211 4212 4213 4214 4215 4216 4217
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4218
static void sequentialTableProcess(SQInfo *pQInfo) {
4219
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4220
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4221
  setQueryStatus(pQuery, QUERY_COMPLETED);
4222

4223
  size_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4224

H
Haojun Liao 已提交
4225
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4226 4227
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4228

4229 4230
    while (pQInfo->groupIndex < numOfGroups) {
      SArray* group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
4231

H
Haojun Liao 已提交
4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253
      qTrace("QInfo:%p last_row query on group:%d, total group:%d, current group:%d", pQInfo, pQInfo->groupIndex,
             numOfGroups);

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4254
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4255
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4256
      } else {
H
Haojun Liao 已提交
4257
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4258
      }
H
Haojun Liao 已提交
4259 4260
      
      initCtxOutputBuf(pRuntimeEnv);
4261 4262 4263 4264 4265 4266 4267 4268 4269
      
      SArray* s = tsdbGetQueriedTableIdList(pRuntimeEnv->pQueryHandle);
      assert(taosArrayGetSize(s) >= 1);
      
      setTagVal(pRuntimeEnv, (STableId*) taosArrayGet(s, 0), pQInfo->tsdb);
      
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4270 4271 4272
      
      // here we simply set the first table as current table
      pQuery->current = ((SGroupItem*) taosArrayGet(group, 0))->info;
4273
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // group-by on normal columns query
    while (pQInfo->groupIndex < numOfGroups) {
      SArray* group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);

      qTrace("QInfo:%p group by normal columns group:%d, total group:%d", pQInfo, pQInfo->groupIndex, numOfGroups);

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);

      SArray* s = tsdbGetQueriedTableIdList(pRuntimeEnv->pQueryHandle);
      assert(taosArrayGetSize(s) >= 1);

      setTagVal(pRuntimeEnv, (STableId*) taosArrayGet(s, 0), pQInfo->tsdb);

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

      qTrace("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
      copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4360 4361 4362
    }
  } else {
    /*
4363
     * 1. super table projection query, 2. ts-comp query
4364 4365 4366
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4367
    if (pQInfo->groupIndex > 0) {
4368
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4369
      pQuery->rec.total += pQuery->rec.rows;
4370

4371
      if (pQuery->rec.rows > 0) {
4372 4373 4374
        return;
      }
    }
4375

4376 4377
    // all data have returned already
    if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
4378 4379
      return;
    }
4380

4381 4382
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4383 4384 4385 4386 4387

    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
    assert(taosArrayGetSize(group) == pQInfo->groupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->groupInfo.pGroupList));

4388
    while (pQInfo->tableIndex < pQInfo->groupInfo.numOfTables) {
4389
      if (isQueryKilled(pQInfo)) {
4390 4391
        return;
      }
4392

H
hjxilinx 已提交
4393
      SGroupItem *item = taosArrayGet(group, pQInfo->tableIndex);
H
hjxilinx 已提交
4394
      pQuery->current = item->info;
H
hjxilinx 已提交
4395
      
4396
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4397
        pQInfo->tableIndex++;
4398 4399
        continue;
      }
4400

H
hjxilinx 已提交
4401
      // TODO handle the limit offset problem
4402
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4403
        //        skipBlocks(pRuntimeEnv);
4404 4405
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4406 4407 4408
          continue;
        }
      }
4409

4410
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4411
      skipResults(pRuntimeEnv);
4412

4413
      // the limitation of output result is reached, set the query completed
4414
      if (limitResults(pRuntimeEnv)) {
4415
        pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
4416 4417
        break;
      }
4418

4419 4420
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4421

4422
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4423 4424 4425 4426 4427 4428
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4429
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4430 4431 4432 4433

        STableIdInfo tidInfo;
        tidInfo.uid = item->id.uid;
        tidInfo.tid = item->id.tid;
weixin_48148422's avatar
weixin_48148422 已提交
4434
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4435 4436
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4437
        // if the buffer is full or group by each table, we need to jump out of the loop
4438 4439
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4440 4441
          break;
        }
4442

4443
      } else {
4444
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4445 4446
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4447 4448
          continue;
        } else {
4449 4450 4451
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4452 4453 4454 4455
        }
      }
    }
  }
4456

4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4469
    finalizeQueryResult(pRuntimeEnv);
4470
  }
4471

4472 4473 4474
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4475 4476

  qTrace(
H
Haojun Liao 已提交
4477
      "QInfo %p numOfTables:%d, index:%d, numOfGroups:%d, %d points returned, total:%"PRId64", offset:%" PRId64,
4478 4479
      pQInfo, pQInfo->groupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
      pQuery->limit.offset);
4480 4481
}

4482 4483 4484 4485
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4486 4487 4488 4489
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4490
  if (pRuntimeEnv->pTSBuf != NULL) {
4491
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4492
  }
4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
  
H
Haojun Liao 已提交
4506
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
4507 4508 4509 4510
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4511 4512
}

4513 4514 4515 4516
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4517
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4518

4519
  if (pRuntimeEnv->pTSBuf != NULL) {
4520
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4521
  }
4522

4523
  switchCtxOrder(pRuntimeEnv);
4524 4525 4526
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4527 4528 4529
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
4530
  if (isIntervalQuery(pQuery)) {
4531
    size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4532 4533 4534
    for (int32_t i = 0; i < numOfGroup; ++i) {
      SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);

4535
      size_t num = taosArrayGetSize(group);
4536
      for (int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
4537 4538
        SGroupItem* item = taosArrayGet(group, j);
        closeAllTimeWindow(&item->info->windowResInfo);
4539
      }
H
hjxilinx 已提交
4540 4541 4542 4543 4544 4545 4546
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4547 4548 4549
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4550
  if (pQInfo->groupIndex > 0) {
4551
    /*
4552
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4553 4554
     * copy the data into output buffer
     */
H
hjxilinx 已提交
4555
    if (isIntervalQuery(pQuery)) {
4556 4557
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4558
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4559 4560 4561 4562
#endif
    } else {
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
    }
4563

S
slguan 已提交
4564
    qTrace("QInfo:%p current:%lld, total:%lld", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4565 4566
    return;
  }
4567 4568 4569 4570

  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4571
  // do check all qualified data blocks
H
Haojun Liao 已提交
4572
  int64_t el = scanMultiTableDataBlocks(pQInfo);
H
hjxilinx 已提交
4573
  qTrace("QInfo:%p master scan completed, elapsed time: %lldms, reverse scan start", pQInfo, el);
4574

H
hjxilinx 已提交
4575 4576
  // query error occurred or query is killed, abort current execution
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4577
    qTrace("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4578
    return;
4579
  }
4580

H
hjxilinx 已提交
4581 4582
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4583

H
hjxilinx 已提交
4584 4585
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4586

H
Haojun Liao 已提交
4587
    el = scanMultiTableDataBlocks(pQInfo);
S
slguan 已提交
4588
    qTrace("QInfo:%p reversed scan completed, elapsed time: %lldms", pQInfo, el);
4589

H
hjxilinx 已提交
4590 4591
    doRestoreContext(pQInfo);
  } else {
S
slguan 已提交
4592
    qTrace("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4593
  }
4594

4595
  setQueryStatus(pQuery, QUERY_COMPLETED);
4596

H
hjxilinx 已提交
4597
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4598
    qTrace("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4599 4600
    return;
  }
4601

H
hjxilinx 已提交
4602
  if (isIntervalQuery(pQuery) || isSumAvgRateQuery(pQuery)) {
4603
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4604
      copyResToQueryResultBuf(pQInfo, pQuery);
4605 4606

#ifdef _DEBUG_VIEW
4607
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4608 4609 4610 4611 4612
#endif
    }
  } else {  // not a interval query
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
  }
4613

4614
  // handle the limitation of output buffer
S
slguan 已提交
4615
  qTrace("QInfo:%p points returned:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4616 4617 4618 4619 4620 4621 4622 4623
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4624
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4625
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4626 4627
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4628 4629 4630 4631
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4632 4633
  pQuery->current = pTableInfo;  // set current query table info
  
4634
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4635
  finalizeQueryResult(pRuntimeEnv);
4636

4637
  if (isQueryKilled(pQInfo)) {
4638 4639
    return;
  }
4640

H
Haojun Liao 已提交
4641
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4642
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4643

4644
  skipResults(pRuntimeEnv);
4645
  limitResults(pRuntimeEnv);
4646 4647
}

H
hjxilinx 已提交
4648
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4649
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4650 4651 4652 4653
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4654 4655 4656 4657
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4658

4659 4660 4661 4662 4663 4664
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4665 4666

  while (1) {
4667
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4668
    finalizeQueryResult(pRuntimeEnv);
4669

4670
    if (isQueryKilled(pQInfo)) {
4671 4672 4673
      return;
    }

4674 4675
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4676
      skipResults(pRuntimeEnv);
4677 4678 4679
    }

    /*
H
hjxilinx 已提交
4680 4681
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4682
     */
4683
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4684 4685 4686
      break;
    }

S
slguan 已提交
4687
    qTrace("QInfo:%p vid:%d sid:%d id:%s, skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
H
hjxilinx 已提交
4688
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey);
4689 4690 4691 4692

    resetCtxOutputBuf(pRuntimeEnv);
  }

4693
  limitResults(pRuntimeEnv);
4694
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
hjxilinx 已提交
4695 4696
    qTrace("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4697 4698 4699 4700 4701 4702
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
    tidInfo.uid = pQuery->current->id.uid;
    tidInfo.tid = pQuery->current->id.tid;
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4703 4704
  }

4705 4706 4707
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4708 4709
}

H
Haojun Liao 已提交
4710
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4711
  SQuery *pQuery = pRuntimeEnv->pQuery;
4712

4713
  while (1) {
4714
    scanOneTableDataBlocks(pRuntimeEnv, start);
4715

4716
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
4717 4718
      return;
    }
4719

4720
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4721
    finalizeQueryResult(pRuntimeEnv);
4722

4723 4724 4725
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4726
        pQuery->fillType == TSDB_FILL_NONE) {
4727 4728
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4729

4730 4731 4732 4733
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4734

4735
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4736 4737 4738 4739 4740
      break;
    }
  }
}

4741
// handle time interval query on table
H
hjxilinx 已提交
4742
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4743 4744
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4745 4746
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4747

H
Haojun Liao 已提交
4748 4749 4750
  int32_t numOfInterpo = 0;
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4751
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4752
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4753
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4754 4755 4756 4757
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4758
  while (1) {
H
Haojun Liao 已提交
4759
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4760

H
hjxilinx 已提交
4761
    if (isIntervalQuery(pQuery)) {
4762
      pQInfo->groupIndex = 0;  // always start from 0
4763
      pQuery->rec.rows = 0;
4764
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4765

4766
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4767
    }
4768

4769
    // the offset is handled at prepare stage if no interpolation involved
4770
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4771
      limitResults(pRuntimeEnv);
4772 4773
      break;
    } else {
H
Haojun Liao 已提交
4774
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4775
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
4776
      numOfInterpo = 0;
4777
      
H
Haojun Liao 已提交
4778
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfInterpo);
4779
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4780
        limitResults(pRuntimeEnv);
4781 4782
        break;
      }
4783

4784
      // no result generated yet, continue retrieve data
4785
      pQuery->rec.rows = 0;
4786 4787
    }
  }
4788

4789 4790
  // all data scanned, the group by normal column can return
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // todo refactor with merge interval time result
4791
    pQInfo->groupIndex = 0;
4792
    pQuery->rec.rows = 0;
4793
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4794
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4795
  }
4796

4797 4798 4799
  pQInfo->pointsInterpo += numOfInterpo;
}

4800 4801 4802 4803
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4804
  if (queryHasRemainResults(pRuntimeEnv)) {
4805 4806 4807 4808 4809
    /*
     * There are remain results that are not returned due to result interpolation
     * So, we do keep in this procedure instead of launching retrieve procedure for next results.
     */
    int32_t numOfInterpo = 0;
H
Haojun Liao 已提交
4810
    pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfInterpo);
4811 4812
  
    if (pQuery->rec.rows > 0) {
4813
      limitResults(pRuntimeEnv);
4814 4815
    }
    
S
slguan 已提交
4816
    qTrace("QInfo:%p current:%d returned, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4817 4818
    return;
  }
4819

4820
  // here we have scan all qualified data in both data file and cache
H
hjxilinx 已提交
4821
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4822 4823
    // continue to get push data from the group result
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr) ||
4824
        ((isIntervalQuery(pQuery) && pQuery->rec.total < pQuery->limit.limit))) {
4825
      // todo limit the output for interval query?
4826
      pQuery->rec.rows = 0;
4827
      pQInfo->groupIndex = 0;  // always start from 0
4828

4829 4830
      if (pRuntimeEnv->windowResInfo.size > 0) {
        copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4831
        pQuery->rec.rows += pQuery->rec.rows;
4832

4833
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4834

4835
        if (pQuery->rec.rows > 0) {
S
slguan 已提交
4836
          qTrace("QInfo:%p %d rows returned from group results, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4837 4838 4839 4840
          return;
        }
      }
    }
4841

S
slguan 已提交
4842
    qTrace("QInfo:%p query over, %d rows are returned", pQInfo, pQuery->rec.total);
4843 4844
    return;
  }
4845

H
hjxilinx 已提交
4846
  // number of points returned during this query
4847
  pQuery->rec.rows = 0;
4848
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
4849 4850 4851 4852 4853
  
  assert(pQInfo->groupInfo.numOfTables == 1);
  SArray* g = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
  SGroupItem* item = taosArrayGet(g, 0);
  
4854
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
[td-98]  
hjxilinx 已提交
4855
  if (isIntervalQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // interval (down sampling operation)
H
hjxilinx 已提交
4856
    tableIntervalProcess(pQInfo, item->info);
4857
  } else if (isFixedOutputQuery(pQuery)) {
H
hjxilinx 已提交
4858
    tableFixedOutputProcess(pQInfo, item->info);
4859 4860
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
H
hjxilinx 已提交
4861
    tableMultiOutputProcess(pQInfo, item->info);
4862
  }
4863

4864
  // record the total elapsed time
4865
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
4866
  assert(pQInfo->groupInfo.numOfTables == 1);
4867

4868
  /* check if query is killed or not */
4869
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
4870
    qTrace("QInfo:%p query is killed", pQInfo);
H
Haojun Liao 已提交
4871
  } else {
H
hjxilinx 已提交
4872 4873
    qTrace("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
        pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4874 4875 4876
  }
}

4877 4878
static void stableQueryImpl(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4879
  pQuery->rec.rows = 0;
4880

4881
  int64_t st = taosGetTimestampUs();
4882

H
hjxilinx 已提交
4883
  if (isIntervalQuery(pQuery) ||
4884 4885
      (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) &&
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
4886
    multiTableQueryProcess(pQInfo);
4887
  } else {
4888
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
4889
            isFirstLastRowQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr));
4890

4891
    sequentialTableProcess(pQInfo);
4892
  }
4893

H
hjxilinx 已提交
4894
  // record the total elapsed time
4895 4896
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
  
4897
  if (pQuery->rec.rows == 0) {
4898
    qTrace("QInfo:%p over, %d tables queried, %d rows are returned", pQInfo, pQInfo->groupInfo.numOfTables, pQuery->rec.total);
4899
  }
H
hjxilinx 已提交
4900 4901
}

4902
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
4903
  int32_t j = 0;
4904

4905
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
4906 4907 4908 4909
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

4910 4911 4912 4913
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
4914

4915 4916
      j += 1;
    }
4917

4918 4919 4920 4921 4922
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
4923

4924
      j += 1;
4925 4926 4927
    }
  }

4928
  assert(0);
4929 4930
}

4931 4932 4933
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
4934 4935
}

4936
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
4937
  if (pQueryMsg->intervalTime < 0) {
4938
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
4939
    return false;
4940 4941
  }

H
hjxilinx 已提交
4942
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
4943
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
4944
    return false;
4945 4946
  }

H
hjxilinx 已提交
4947
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
4948
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
4949
    return false;
4950 4951
  }

4952 4953
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
4954
    return false;
4955 4956
  }

4957 4958 4959 4960 4961 4962 4963 4964 4965 4966
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
4967 4968 4969 4970 4971
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
4972
        continue;
4973
      }
4974

4975
      return false;
4976 4977
    }
  }
4978

4979
  return true;
4980 4981
}

4982
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
4983
  assert(pQueryMsg->numOfTables > 0);
4984

weixin_48148422's avatar
weixin_48148422 已提交
4985
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
4986

weixin_48148422's avatar
weixin_48148422 已提交
4987 4988
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
4989

4990
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
4991 4992
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
4993

H
hjxilinx 已提交
4994 4995 4996
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
4997

H
hjxilinx 已提交
4998 4999
  return pMsg;
}
5000

5001
/**
H
hjxilinx 已提交
5002
 * pQueryMsg->head has been converted before this function is called.
5003
 *
H
hjxilinx 已提交
5004
 * @param pQueryMsg
5005 5006 5007 5008
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5009
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5010
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5011 5012 5013 5014 5015 5016 5017 5018
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5019

5020 5021 5022
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
  pQueryMsg->queryType = htons(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5023
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5024 5025

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5026
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5027
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5028 5029 5030
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5031
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5032
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5033
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5034

5035
  // query msg safety check
5036
  if (!validateQueryMsg(pQueryMsg)) {
5037
    return TSDB_CODE_QRY_INVALID_MSG;
5038 5039
  }

H
hjxilinx 已提交
5040
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
5041

H
hjxilinx 已提交
5042
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5043 5044
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5045
    pColInfo->colId = htons(pColInfo->colId);
5046
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5047 5048
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5049

H
hjxilinx 已提交
5050
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5051

H
hjxilinx 已提交
5052
    int32_t numOfFilters = pColInfo->numOfFilters;
5053
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5054
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5055 5056 5057
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5058 5059 5060 5061
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5062 5063 5064

      pMsg += sizeof(SColumnFilterInfo);

5065 5066
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5067

5068
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5069 5070
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5071
      } else {
5072 5073
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5074 5075
      }

5076 5077
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5078 5079 5080
    }
  }

5081 5082
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5083

5084
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5085
    (*pExpr)[i] = pExprMsg;
5086

5087
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5088 5089 5090 5091
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5092

5093
    pMsg += sizeof(SSqlFuncMsg);
5094 5095

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5096
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5097 5098 5099 5100
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5101
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5102 5103 5104 5105 5106
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5107 5108
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5109
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5110
        return TSDB_CODE_QRY_INVALID_MSG;
5111 5112
      }
    } else {
5113
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5114
//        return TSDB_CODE_QRY_INVALID_MSG;
5115
//      }
5116 5117
    }

5118
    pExprMsg = (SSqlFuncMsg *)pMsg;
5119
  }
5120

5121 5122
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
    tfree(*pExpr);
5123

5124
    return TSDB_CODE_QRY_INVALID_MSG;
5125
  }
5126

H
hjxilinx 已提交
5127
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5128

H
hjxilinx 已提交
5129
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5130 5131 5132 5133
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5134
      pMsg += sizeof((*groupbyCols)[i].colId);
5135 5136

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5137 5138
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5139
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5140 5141 5142 5143 5144
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5145

H
hjxilinx 已提交
5146 5147
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5148 5149
  }

5150 5151
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5152
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5153 5154

    int64_t *v = (int64_t *)pMsg;
5155
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5156 5157
      v[i] = htobe64(v[i]);
    }
5158

5159
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5160
  }
5161

5162 5163 5164 5165
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5166

5167 5168 5169 5170
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5171

5172
      (*tagCols)[i] = *pTagCol;
5173
      pMsg += sizeof(SColumnInfo);
5174
    }
H
hjxilinx 已提交
5175
  }
5176

5177 5178 5179 5180 5181 5182
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5183

weixin_48148422's avatar
weixin_48148422 已提交
5184
  if (*pMsg != 0) {
5185 5186
    size_t len = strlen(pMsg) + 1;
    *tbnameCond = malloc(len);
weixin_48148422's avatar
weixin_48148422 已提交
5187
    strcpy(*tbnameCond, pMsg);
5188
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5189
  }
5190

5191 5192
  qTrace("qmsg:%p query %d tables, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, limit:%" PRId64 ", offset:%" PRId64,
5193
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5194
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
5195
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->limit, pQueryMsg->offset);
5196 5197 5198 5199

  return 0;
}

H
hjxilinx 已提交
5200
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5201
  qTrace("qmsg:%p create arithmetic expr from binary string", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5202 5203 5204 5205 5206 5207 5208 5209 5210

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5211
  if (pExprNode == NULL) {
5212
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5213
    return TSDB_CODE_QRY_APP_ERROR;
5214
  }
5215

5216
  pArithExprInfo->pExpr = pExprNode;
5217 5218 5219
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5220
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5221 5222
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5223
  int32_t code = TSDB_CODE_SUCCESS;
5224

H
Haojun Liao 已提交
5225
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5226
  if (pExprs == NULL) {
5227
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5228 5229 5230 5231 5232
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5233
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5234
    pExprs[i].base = *pExprMsg[i];
5235
    pExprs[i].bytes = 0;
5236 5237 5238 5239

    int16_t type = 0;
    int16_t bytes = 0;

5240
    // parse the arithmetic expression
5241
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5242
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5243

5244 5245 5246
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5247 5248
      }

5249
      type  = TSDB_DATA_TYPE_DOUBLE;
5250
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5251
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5252 5253 5254
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5255
    } else{
5256
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
H
Haojun Liao 已提交
5257 5258 5259 5260 5261 5262 5263
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags || j == TSDB_TBNAME_COLUMN_INDEX);

      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX) {
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5264
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5265

H
Haojun Liao 已提交
5266 5267 5268
        type  = s.type;
        bytes = s.bytes;
      }
5269 5270
    }

5271 5272
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5273
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5274
      tfree(pExprs);
5275
      return TSDB_CODE_QRY_INVALID_MSG;
5276 5277
    }

5278
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5279
      tagLen += pExprs[i].bytes;
5280
    }
5281
    assert(isValidDataType(pExprs[i].type, pExprs[i].bytes));
5282 5283 5284
  }

  // TODO refactor
5285
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5286 5287
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5288

5289
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5290
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5291 5292 5293 5294 5295
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5296
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5297
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5298 5299 5300 5301
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }

5302
  tfree(pExprMsg);
5303
  *pExprInfo = pExprs;
5304 5305 5306 5307

  return TSDB_CODE_SUCCESS;
}

5308
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5309 5310 5311 5312 5313
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5314
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5315
  if (pGroupbyExpr == NULL) {
5316
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5317 5318 5319 5320 5321 5322 5323
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5324 5325 5326 5327
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5328

5329 5330 5331
  return pGroupbyExpr;
}

5332
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5333
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5334
    if (pQuery->colList[i].numOfFilters > 0) {
5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5346
    if (pQuery->colList[i].numOfFilters > 0) {
5347 5348
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5349
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5350
      pFilterInfo->info = pQuery->colList[i];
5351

5352
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5353 5354 5355 5356
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5357
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5358 5359 5360 5361 5362

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5363
          qError("QInfo:%p invalid filter info", pQInfo);
5364
          return TSDB_CODE_QRY_INVALID_MSG;
5365 5366
        }

5367 5368
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5369

5370 5371 5372
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5373 5374

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5375
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5376
          return TSDB_CODE_QRY_INVALID_MSG;
5377 5378
        }

5379
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5380
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
5381
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
S
slguan 已提交
5399
              qError("pQInfo:%p failed to get filter function, invalid filter condition", pQInfo, type);
5400
              return TSDB_CODE_QRY_INVALID_MSG;
5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5417
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5418
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5419

5420
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5421
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5422
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5423 5424
      continue;
    }
5425

5426
    // todo opt performance
H
Haojun Liao 已提交
5427 5428
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5429 5430
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5431 5432
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5433 5434 5435
          break;
        }
      }
5436 5437
      
      assert (f < pQuery->numOfCols);
5438
    } else {
5439 5440
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5441 5442
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5443 5444
          break;
        }
5445
      }
5446 5447
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5448 5449 5450 5451
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5452 5453 5454 5455 5456 5457 5458 5459 5460 5461

static int compareTableIdInfo( const void* a, const void* b ) {
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5462
                               STableGroupInfo *groupInfo, SColumnInfo* pTagCols) {
5463 5464
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5465
    return NULL;
5466 5467 5468 5469 5470 5471
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5472
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5473

5474
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5475
  pQuery->numOfOutput     = numOfOutput;
5476 5477 5478
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5479
  pQuery->order.orderColId = pQueryMsg->orderColId;
5480 5481 5482 5483
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5484
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5485
  pQuery->fillType        = pQueryMsg->fillType;
5486
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5487
  
5488
  // todo do not allocate ??
5489
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5490
  if (pQuery->colList == NULL) {
5491
    goto _cleanup;
5492
  }
5493

H
hjxilinx 已提交
5494
  for (int16_t i = 0; i < numOfCols; ++i) {
5495
    pQuery->colList[i] = pQueryMsg->colList[i];
5496
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5497
  }
5498

5499
  pQuery->tagColList = pTagCols;
5500

5501
  // calculate the result row size
5502 5503 5504
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5505
  }
5506

5507
  doUpdateExprColumnIndex(pQuery);
5508

5509
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5510
  if (ret != TSDB_CODE_SUCCESS) {
5511
    goto _cleanup;
5512 5513 5514
  }

  // prepare the result buffer
5515
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5516
  if (pQuery->sdata == NULL) {
5517
    goto _cleanup;
5518 5519
  }

H
hjxilinx 已提交
5520
  // set the output buffer capacity
H
hjxilinx 已提交
5521
  pQuery->rec.capacity = 4096;
5522
  pQuery->rec.threshold = 4000;
5523

5524
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5525
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5526 5527

    // allocate additional memory for interResults that are usually larger then final results
5528 5529
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5530
    if (pQuery->sdata[col] == NULL) {
5531
      goto _cleanup;
5532 5533 5534
    }
  }

5535
  if (pQuery->fillType != TSDB_FILL_NONE) {
5536 5537
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5538
      goto _cleanup;
5539 5540 5541
    }

    // the first column is the timestamp
5542
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5543 5544 5545
  }

  // to make sure third party won't overwrite this structure
5546
  pQInfo->signature = pQInfo;
5547

H
hjxilinx 已提交
5548 5549
  pQInfo->tableIdGroupInfo = *groupInfo;
  size_t numOfGroups = taosArrayGetSize(groupInfo->pGroupList);
5550

H
hjxilinx 已提交
5551 5552 5553
  pQInfo->groupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
  pQInfo->groupInfo.numOfTables = groupInfo->numOfTables;
  
weixin_48148422's avatar
weixin_48148422 已提交
5554 5555
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5556
  taosArraySort(pTableIdList, compareTableIdInfo);
5557

H
hjxilinx 已提交
5558 5559 5560
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray* pa = taosArrayGetP(groupInfo->pGroupList, i);
    size_t s = taosArrayGetSize(pa);
5561

H
hjxilinx 已提交
5562
    SArray* p1 = taosArrayInit(s, sizeof(SGroupItem));
5563

H
hjxilinx 已提交
5564
    for(int32_t j = 0; j < s; ++j) {
weixin_48148422's avatar
weixin_48148422 已提交
5565 5566 5567
      STableId id = *(STableId*) taosArrayGet(pa, j);
      SGroupItem item = { .id = id };
      // NOTE: compare STableIdInfo with STableId
5568
      STableIdInfo* pTableId = taosArraySearch( pTableIdList, &id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5569 5570 5571
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5572
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5573 5574
      }
      item.info = createTableQueryInfo(&pQInfo->runtimeEnv, item.id, window);
5575
      item.info->groupIndex = i;
weixin_48148422's avatar
weixin_48148422 已提交
5576
      item.info->tableIndex = tableIndex++;
H
hjxilinx 已提交
5577 5578
      taosArrayPush(p1, &item);
    }
5579

H
hjxilinx 已提交
5580 5581
    taosArrayPush(pQInfo->groupInfo.pGroupList, &p1);
  }
5582

weixin_48148422's avatar
weixin_48148422 已提交
5583 5584
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5585
  pQuery->pos = -1;
5586
  pQuery->window = pQueryMsg->window;
5587

5588
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
S
slguan 已提交
5589
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, strerror(errno));
5590
    goto _cleanup;
5591
  }
5592

5593
  colIdCheck(pQuery);
5594

S
slguan 已提交
5595
  qTrace("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5596 5597
  return pQInfo;

5598
_cleanup:
5599
  tfree(pQuery->fillVal);
5600 5601

  if (pQuery->sdata != NULL) {
5602
    for (int16_t col = 0; col < pQuery->numOfOutput; ++col) {
5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618
      tfree(pQuery->sdata[col]);
    }
  }

  tfree(pQuery->sdata);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);

  tfree(pExprs);
  tfree(pGroupbyExpr);

  tfree(pQInfo);

  return NULL;
}

H
hjxilinx 已提交
5619
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5620 5621 5622 5623
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5624

H
hjxilinx 已提交
5625 5626 5627 5628
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5629
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5630 5631 5632
  return (sig == (uint64_t)pQInfo);
}

H
hjxilinx 已提交
5633 5634
static void freeQInfo(SQInfo *pQInfo);

5635
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
5636
  int32_t code = TSDB_CODE_SUCCESS;
5637
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5638

H
hjxilinx 已提交
5639 5640 5641 5642
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
    char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset;
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5643

H
hjxilinx 已提交
5644 5645 5646
    tsBufResetPos(pTSBuf);
    tsBufNextPos(pTSBuf);
  }
5647

5648 5649 5650
  // only the successful complete requries the sem_post/over = 1 operations.
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
S
slguan 已提交
5651
    qTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5652
           pQuery->window.ekey, pQuery->order.order);
5653
    setQueryStatus(pQuery, QUERY_COMPLETED);
5654

5655 5656 5657
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5658 5659 5660 5661 5662 5663 5664 5665
  
  if (pQInfo->groupInfo.numOfTables == 0) {
    qTrace("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5666 5667

  // filter the qualified
5668
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5669 5670
    goto _error;
  }
H
hjxilinx 已提交
5671
  
H
hjxilinx 已提交
5672 5673 5674 5675
  return code;

_error:
  // table query ref will be decrease during error handling
5676
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5677 5678 5679 5680 5681 5682 5683
  return code;
}

static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5684 5685

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5686
  setQueryKilled(pQInfo);
5687

S
slguan 已提交
5688
  qTrace("QInfo:%p start to free QInfo", pQInfo);
5689
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5690 5691
    tfree(pQuery->sdata[col]);
  }
5692

H
hjxilinx 已提交
5693
  sem_destroy(&(pQInfo->dataReady));
5694
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5695

H
hjxilinx 已提交
5696 5697 5698 5699 5700 5701
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5702

H
hjxilinx 已提交
5703
  if (pQuery->pSelectExpr != NULL) {
5704
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5705
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5706

H
hjxilinx 已提交
5707 5708 5709
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5710
    }
5711

H
hjxilinx 已提交
5712 5713
    tfree(pQuery->pSelectExpr);
  }
5714

5715 5716
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5717
  }
5718

5719
  // todo refactor, extract method to destroytableDataInfo
H
hjxilinx 已提交
5720
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
5721 5722
  for (int32_t i = 0; i < numOfGroups; ++i) {
    SArray *p = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
5723

5724 5725
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
5726 5727 5728
      SGroupItem* item = taosArrayGet(p, j);
      if (item->info != NULL) {
        destroyTableQueryInfo(item->info, pQuery->numOfOutput);
5729 5730
      }
    }
5731

H
hjxilinx 已提交
5732 5733
    taosArrayDestroy(p);
  }
5734

H
hjxilinx 已提交
5735
  taosArrayDestroy(pQInfo->groupInfo.pGroupList);
5736

H
hjxilinx 已提交
5737 5738 5739 5740
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray* p = taosArrayGetP(pQInfo->tableIdGroupInfo.pGroupList, i);
    taosArrayDestroy(p);
  }
5741

H
hjxilinx 已提交
5742
  taosArrayDestroy(pQInfo->tableIdGroupInfo.pGroupList);
weixin_48148422's avatar
weixin_48148422 已提交
5743
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5744
  
5745 5746 5747 5748
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5749

5750 5751 5752 5753
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);
  tfree(pQuery->sdata);
5754

5755
  tfree(pQuery);
5756

S
slguan 已提交
5757
  qTrace("QInfo:%p QInfo is freed", pQInfo);
5758

5759
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5760 5761 5762 5763
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5764
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5765 5766
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5778
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5779 5780 5781 5782
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5783
  }
H
hjxilinx 已提交
5784
}
5785

H
hjxilinx 已提交
5786 5787 5788
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5789

H
hjxilinx 已提交
5790 5791 5792
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5793

H
hjxilinx 已提交
5794 5795 5796
    // make sure file exist
    if (FD_VALID(fd)) {
      size_t s = lseek(fd, 0, SEEK_END);
S
slguan 已提交
5797
      qTrace("QInfo:%p ts comp data return, file:%s, size:%zu", pQInfo, pQuery->sdata[0]->data, s);
5798

H
hjxilinx 已提交
5799 5800 5801
      lseek(fd, 0, SEEK_SET);
      read(fd, data, s);
      close(fd);
5802

H
hjxilinx 已提交
5803 5804
      unlink(pQuery->sdata[0]->data);
    } else {
H
hjxilinx 已提交
5805
      // todo return the error code to client
S
slguan 已提交
5806
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5807 5808
             pQuery->sdata[0]->data, strerror(errno));
    }
5809

H
hjxilinx 已提交
5810 5811 5812 5813
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5814
  } else {
5815
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5816
  }
5817

5818
  pQuery->rec.total += pQuery->rec.rows;
5819
  qTrace("QInfo:%p current numOfRes rows:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5820

5821 5822 5823 5824 5825
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
    qTrace("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
5826
  return TSDB_CODE_SUCCESS;
5827

H
hjxilinx 已提交
5828
  // todo if interpolation exists, the result may be dump to client by several rounds
5829 5830
}

5831
int32_t qCreateQueryInfo(void *tsdb, int32_t vgId, SQueryTableMsg *pQueryMsg, qinfo_t *pQInfo) {
H
hjxilinx 已提交
5832
  assert(pQueryMsg != NULL);
5833 5834

  int32_t code = TSDB_CODE_SUCCESS;
5835

weixin_48148422's avatar
weixin_48148422 已提交
5836
  char *        tagCond = NULL, *tbnameCond = NULL;
5837
  SArray *      pTableIdList = NULL;
5838
  SSqlFuncMsg **pExprMsg = NULL;
5839 5840
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
5841

weixin_48148422's avatar
weixin_48148422 已提交
5842
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
5843
         TSDB_CODE_SUCCESS) {
5844 5845 5846
    return code;
  }

H
hjxilinx 已提交
5847
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5848
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
5849
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
5850
    goto _over;
5851 5852
  }

H
hjxilinx 已提交
5853
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
5854
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
5855
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
5856
    goto _over;
5857 5858
  }

H
hjxilinx 已提交
5859
  SExprInfo *pExprs = NULL;
H
Haojun Liao 已提交
5860
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5861
    goto _over;
5862 5863
  }

5864
  SSqlGroupbyExpr *pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
5865
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5866
    goto _over;
5867
  }
5868

H
hjxilinx 已提交
5869
  bool isSTableQuery = false;
H
hjxilinx 已提交
5870
  STableGroupInfo groupInfo = {0};
5871
  
H
hjxilinx 已提交
5872
  //todo multitable_query??
5873 5874 5875
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_TABLE_QUERY)) {
    isSTableQuery = TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY);
    
weixin_48148422's avatar
weixin_48148422 已提交
5876
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
5877 5878
    qTrace("qmsg:%p query table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
    
5879
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &groupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5880
      goto _over;
5881 5882
    }
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_STABLE_QUERY)) {
5883
    isSTableQuery = true;
weixin_48148422's avatar
weixin_48148422 已提交
5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895
    // TODO: need a macro from TSDB to check if table is super table,
    // also note there's possiblity that only one table in the super table
    if (taosArrayGetSize(pTableIdList) == 1) {
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);
      // if array size is 1 and assert super table

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
5896
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &groupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
5897
                                          numOfGroupByCols);
5898 5899 5900
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
5901 5902 5903 5904 5905 5906 5907 5908 5909 5910
    } else {
      SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);

      SArray* sa = taosArrayInit(groupInfo.numOfTables, sizeof(STableId));
      for(int32_t i = 0; i < groupInfo.numOfTables; ++i) {
        STableIdInfo* tableId = taosArrayGet(pTableIdList, i);
        taosArrayPush(sa, tableId);
      }
      taosArrayPush(pTableGroup, &sa);
      groupInfo.pGroupList = pTableGroup;
5911
    }
H
hjxilinx 已提交
5912
  } else {
5913
    assert(0);
5914
  }
5915

weixin_48148422's avatar
weixin_48148422 已提交
5916
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &groupInfo, pTagColumnInfo);
5917
  if ((*pQInfo) == NULL) {
5918
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
5919
    goto _over;
5920
  }
5921

5922
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
5923

H
hjxilinx 已提交
5924
_over:
weixin_48148422's avatar
weixin_48148422 已提交
5925 5926
  tfree(tagCond);
  tfree(tbnameCond);
H
Haojun Liao 已提交
5927
  tfree(pGroupColIndex);
H
hjxilinx 已提交
5928
  taosArrayDestroy(pTableIdList);
5929

H
Haojun Liao 已提交
5930
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
5931 5932
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
H
Haojun Liao 已提交
5933 5934 5935 5936 5937
  } else {
    SQInfo* pq = (SQInfo*) (*pQInfo);

    T_REF_INC(pq);
    T_REF_INC(pq);
5938 5939
  }

5940
  // if failed to add ref for all meters in this query, abort current query
5941
  return code;
H
hjxilinx 已提交
5942 5943
}

H
Haojun Liao 已提交
5944 5945
static void doDestoryQueryInfo(SQInfo* pQInfo) {
  assert(pQInfo != NULL);
S
slguan 已提交
5946
  qTrace("QInfo:%p query completed", pQInfo);
H
Haojun Liao 已提交
5947
  queryCostStatis(pQInfo);   // print the query cost summary
5948 5949 5950
  freeQInfo(pQInfo);
}

H
Haojun Liao 已提交
5951 5952 5953 5954 5955 5956 5957
void qDestroyQueryInfo(qinfo_t qHandle) {
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

  int16_t ref = T_REF_DEC(pQInfo);
H
Haojun Liao 已提交
5958 5959
  qTrace("QInfo:%p dec refCount, value:%d", pQInfo, ref);

H
Haojun Liao 已提交
5960 5961 5962 5963 5964
  if (ref == 0) {
    doDestoryQueryInfo(pQInfo);
  }
}

H
hjxilinx 已提交
5965
void qTableQuery(qinfo_t qinfo) {
5966 5967
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5968
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
5969
    qTrace("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
5970 5971
    return;
  }
5972

H
hjxilinx 已提交
5973
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
5974
    qTrace("QInfo:%p it is already killed, abort", pQInfo);
H
Haojun Liao 已提交
5975
    qDestroyQueryInfo(pQInfo);
H
hjxilinx 已提交
5976 5977
    return;
  }
5978

S
slguan 已提交
5979
  qTrace("QInfo:%p query task is launched", pQInfo);
5980

H
hjxilinx 已提交
5981
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
5982
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
5983
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
5984
  } else if (pQInfo->runtimeEnv.stableQuery) {
5985
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
5986
  } else {
5987
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
5988
  }
5989

H
hjxilinx 已提交
5990
  sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
5991
  qDestroyQueryInfo(pQInfo);
H
hjxilinx 已提交
5992 5993
}

H
hjxilinx 已提交
5994
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
5995 5996
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5997
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
5998
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
5999
  }
6000

H
hjxilinx 已提交
6001
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6002
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
6003
    qTrace("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6004
    return pQInfo->code;
H
hjxilinx 已提交
6005
  }
6006

H
hjxilinx 已提交
6007
  sem_wait(&pQInfo->dataReady);
S
slguan 已提交
6008
  qTrace("QInfo:%p retrieve result info, rowsize:%d, rows:%d, code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6009 6010
         pQInfo->code);

H
hjxilinx 已提交
6011
  return pQInfo->code;
H
hjxilinx 已提交
6012
}
6013

H
hjxilinx 已提交
6014
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6015 6016
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6017 6018
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
    qTrace("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6019 6020
    return false;
  }
6021 6022

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6023
  bool ret = false;
H
hjxilinx 已提交
6024
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6025
    ret = false;
H
hjxilinx 已提交
6026
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6027
    ret = true;
H
hjxilinx 已提交
6028
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6029
    ret = true;
H
hjxilinx 已提交
6030 6031
  } else {
    assert(0);
6032
  }
H
Haojun Liao 已提交
6033 6034 6035

  if (ret) {
    T_REF_INC(pQInfo);
H
Haojun Liao 已提交
6036
    qTrace("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6037 6038 6039
  }

  return ret;
6040 6041
}

6042 6043 6044
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6045
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6046
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6047
  }
6048

6049
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6050 6051
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6052 6053
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6054
  *contLen = size + sizeof(SRetrieveTableRsp);
6055

6056 6057
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6058
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6059

6060 6061 6062
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6063
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6064 6065 6066 6067
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6068 6069
  
  (*pRsp)->precision = htons(pQuery->precision);
6070
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6071
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6072
  } else {
H
hjxilinx 已提交
6073
    setQueryStatus(pQuery, QUERY_OVER);
6074
    code = pQInfo->code;
6075
  }
6076

H
hjxilinx 已提交
6077
  if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6078
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6079
  }
6080

H
hjxilinx 已提交
6081
  return code;
6082
}
H
hjxilinx 已提交
6083

H
Haojun Liao 已提交
6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096
int32_t qKillQuery(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
  qDestroyQueryInfo(pQInfo);

  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6097 6098 6099
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6100

H
Haojun Liao 已提交
6101 6102
  size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
  assert(numOfGroup == 0 || numOfGroup == 1);
6103

H
Haojun Liao 已提交
6104
  if (numOfGroup == 0) {
6105 6106
    return;
  }
H
hjxilinx 已提交
6107 6108
  
  SArray* pa = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
6109

H
Haojun Liao 已提交
6110
  size_t num = taosArrayGetSize(pa);
6111
  assert(num == pQInfo->groupInfo.numOfTables);
6112

H
Haojun Liao 已提交
6113
  int32_t count = 0;
6114 6115 6116
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6117

6118 6119
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6120
    count = 0;
6121

H
Haojun Liao 已提交
6122 6123
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6124
      SGroupItem *item = taosArrayGet(pa, i);
6125

6126
      char *output = pQuery->sdata[0]->data + i * rsize;
6127
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6128

6129
      output = varDataVal(output);
6130
      *(int64_t *)output = item->id.uid;  // memory align problem, todo serialize
6131
      output += sizeof(item->id.uid);
6132

6133
      *(int32_t *)output = item->id.tid;
6134
      output += sizeof(item->id.tid);
6135

6136
      *(int32_t *)output = pQInfo->vgId;
6137
      output += sizeof(pQInfo->vgId);
6138

H
[td-90]  
Haojun Liao 已提交
6139 6140
      int16_t bytes = pExprInfo->bytes;
      int16_t type = pExprInfo->type;
6141 6142 6143 6144

      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
        char *data = tsdbGetTableName(pQInfo->tsdb, &item->id);
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6145
      } else {
6146 6147 6148 6149 6150 6151 6152 6153 6154
        char *val = tsdbGetTableTagVal(pQInfo->tsdb, &item->id, pExprInfo->base.colInfo.colId, type, bytes);

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6155
        } else {
6156 6157 6158 6159 6160
          if (val == NULL) {
            setNull(output, type, bytes);
          } else {
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6161 6162
        }
      }
6163

H
Haojun Liao 已提交
6164
      count += 1;
6165
    }
6166

H
Haojun Liao 已提交
6167
    qTrace("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6168

H
Haojun Liao 已提交
6169 6170 6171 6172 6173 6174
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
    qTrace("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6175
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6176
    count = 0;
H
Haojun Liao 已提交
6177
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6178 6179
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6180

6181 6182
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
      SGroupItem* item = taosArrayGet(pa, i);
6183

6184 6185
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
B
Bomin Zhang 已提交
6186
          char* data = tsdbGetTableName(pQInfo->tsdb, &item->id);
H
Haojun Liao 已提交
6187
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6188
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6189 6190 6191 6192 6193
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
          char* data = tsdbGetTableTagVal(pQInfo->tsdb, &item->id, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6194
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6195

H
hjxilinx 已提交
6196
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6197 6198 6199 6200 6201
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6202
          } else {
H
[td-90]  
Haojun Liao 已提交
6203 6204 6205 6206 6207
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6208
          }
6209
        }
H
hjxilinx 已提交
6210
      }
H
Haojun Liao 已提交
6211
      count += 1;
H
hjxilinx 已提交
6212
    }
6213

H
Haojun Liao 已提交
6214
    qTrace("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6215
  }
6216

H
Haojun Liao 已提交
6217
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6218
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6219 6220
}