qExecutor.c 211.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
H
Haojun Liao 已提交
15
#include <taosmsg.h>
16
#include "os.h"
H
Haojun Liao 已提交
17
#include "qfill.h"
18 19 20

#include "hash.h"
#include "hashfunc.h"
21 22
#include "qExecutor.h"
#include "qUtil.h"
H
hjxilinx 已提交
23
#include "qast.h"
24
#include "qresultBuf.h"
H
hjxilinx 已提交
25
#include "query.h"
S
slguan 已提交
26
#include "queryLog.h"
27
#include "taosmsg.h"
28
#include "tdataformat.h"
29
#include "tlosertree.h"
30
#include "tscUtil.h"  // todo move the function to common module
31 32
#include "tscompression.h"
#include "ttime.h"
33 34 35 36 37 38 39 40 41

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

42
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
43
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
44
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
45
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
46

47
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
48

49
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
50
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
51 52 53

/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
54 55
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
56

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

93
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
94
static void setQueryStatus(SQuery *pQuery, int8_t status);
95

H
hjxilinx 已提交
96
static bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; }
97

H
hjxilinx 已提交
98
// todo move to utility
99
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
100

H
hjxilinx 已提交
101
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
102
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
103 104 105
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
106

107 108 109
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

110
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
111
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
112 113
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
114
static void buildTagQueryResult(SQInfo *pQInfo);
115

116
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
117
static int32_t flushFromResultBuf(SQInfo *pQInfo);
118

119
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
120 121
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
122

123 124
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
125 126
      return false;
    }
127

128 129
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
130
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
131

132 133 134 135 136
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
137

138 139 140 141
    if (!qualified) {
      return false;
    }
  }
142

143 144 145 146 147 148
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
149

150
  int64_t maxOutput = 0;
151
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
152
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
153

154 155 156 157 158 159 160 161
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
162

163 164 165 166 167
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
168

169
  assert(maxOutput >= 0);
170 171 172
  return maxOutput;
}

173 174 175 176 177 178 179 180 181
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
182 183 184 185 186 187 188
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
189 190 191 192
    pResInfo->numOfRes = numOfRes;
  }
}

193 194 195 196 197 198 199 200 201
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
202

203
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
204
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
205 206 207 208 209
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
210
        assert(pColIndex->colIndex > 0);
211
      }
212

213 214 215
      return true;
    }
  }
216

217 218 219 220 221
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
222

223 224
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
225

226
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
227
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
228 229 230 231 232
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
233

234
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
235 236
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
237 238 239
      break;
    }
  }
240

241 242 243 244 245 246
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
247

248
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
249
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
250 251 252 253
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
254

255 256 257 258
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
259

260 261 262
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
263

264 265 266
  return false;
}

267
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
268

269 270 271 272
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
273 274
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
275
    
B
Bomin Zhang 已提交
276
    qTrace("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
277 278
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
279 280 281
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
282

283 284 285 286
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
287
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
288
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
289 290 291
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
292

293 294 295 296
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
297

298 299 300
  return false;
}

H
Haojun Liao 已提交
301
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, int32_t numOfCols, int32_t index) {
302
  // for a tag column, no corresponding field info
H
Haojun Liao 已提交
303 304
  SColIndex *pColIndex = &pQuery->pSelectExpr[index].base.colInfo;
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
305 306
    return NULL;
  }
H
Haojun Liao 已提交
307
  
308 309 310
  /*
   * Choose the right column field info by field id, since the file block may be out of date,
   * which means the newest table schema is not equalled to the schema of this block.
H
Haojun Liao 已提交
311
   * TODO: speedup by using bsearch
312
   */
H
Haojun Liao 已提交
313 314
  for (int32_t i = 0; i < numOfCols; ++i) {
    if (pColIndex->colId == pStatis[i].colId) {
315 316 317
      return &pStatis[i];
    }
  }
H
Haojun Liao 已提交
318
  
319 320 321
  return NULL;
}

322 323 324 325 326 327 328 329
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
330
static bool hasNullValue(SQuery *pQuery, int32_t col, int32_t numOfCols, SDataStatis *pStatis, SDataStatis **pColStatis) {
331
  SColIndex *pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
332
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
333 334
    return false;
  }
335

336 337 338 339
  // query on primary timestamp column, not null value at all
  if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }
340

341
  if (pStatis != NULL) {
H
Haojun Liao 已提交
342
    *pColStatis = getStatisInfo(pQuery, pStatis, numOfCols, col);
H
hjxilinx 已提交
343 344
  } else {
    *pColStatis = NULL;
345
  }
346

347 348 349
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
350

351 352 353 354 355 356
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
                                             int16_t bytes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
357

358
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
359 360 361 362 363
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
  } else {  // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 2;
364

365 366 367 368 369 370 371
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
      } else {
        // todo
      }
372

373 374 375 376 377 378
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        SPosInfo pos = {-1, -1};
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
      }
      pWindowResInfo->capacity = newCap;
    }
379

380 381 382 383
    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
  }
384

385 386 387 388 389 390
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
391

392 393 394 395 396 397 398
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
399

400 401
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
402

403 404 405
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
406

407 408 409 410
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
411

412 413 414
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
415

416 417 418 419 420 421 422
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
423

424
  assert(ts >= w.skey && ts <= w.ekey);
425

426 427 428 429 430 431 432 433
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
434

435
  tFilePage *pData = NULL;
436

437 438 439
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
440

441 442 443 444
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
H
Haojun Liao 已提交
445
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
446

447
    if (pData->num >= numOfRowsPerPage) {
448 449
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
450
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
451 452 453
      }
    }
  }
454

455 456 457
  if (pData == NULL) {
    return -1;
  }
458

459 460 461
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
462
    pWindowRes->pos.rowId = pData->num++;
463
  }
464

465 466 467 468 469 470 471
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
                                       STimeWindow *win) {
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
472

473 474 475 476
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE);
  if (pWindowRes == NULL) {
    return -1;
  }
477

478 479 480 481 482 483 484
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
485

486 487
  // set time window for current result
  pWindowRes->window = *win;
488

H
Haojun Liao 已提交
489
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
490 491 492 493 494 495 496 497
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
498
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
499
                                      int16_t order, int64_t *pData) {
H
Haojun Liao 已提交
500
  int32_t endPos = searchFn((char *)pData, numOfRows, ekey, order);
501
  int32_t forwardStep = 0;
502

503
  if (endPos >= 0) {
504
    forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
505
    assert(forwardStep >= 0);
506

507 508 509 510 511
    // endPos data is equalled to the key so, we do need to read the element in endPos
    if (pData[endPos] == ekey) {
      forwardStep += 1;
    }
  }
512

513 514 515 516 517 518
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
519
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
520 521
  SQuery *pQuery = pRuntimeEnv->pQuery;
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) {
522
    return pWindowResInfo->size;
523
  }
524

525
  // no qualified results exist, abort check
526 527
  int32_t numOfClosed = 0;
  
528
  if (pWindowResInfo->size == 0) {
529
    return pWindowResInfo->size;
530
  }
531

532
  // query completed
H
hjxilinx 已提交
533 534
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
535
    closeAllTimeWindow(pWindowResInfo);
536

537 538 539 540
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
541
    int64_t skey = TSKEY_INITIAL_VAL;
542

543 544 545
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
546
        numOfClosed += 1;
547 548
        continue;
      }
549

550 551 552 553 554 555 556 557
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
558

559
    // all windows are closed, set the last one to be the skey
560
    if (skey == TSKEY_INITIAL_VAL) {
561 562 563 564 565
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
566

567
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
568

569 570 571 572 573
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
      qTrace("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
574
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
575 576 577
    } else {
      qTrace("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
             numOfClosed);
578 579
    }
  }
580 581 582 583 584 585 586
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
587
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
588
  return numOfClosed;
589 590 591
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
592
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
593
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
594

595 596 597
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
598

H
hjxilinx 已提交
599 600
  STableQueryInfo* item = pQuery->current;
  
601 602
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
603
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
604 605 606 607
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
608
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
609 610 611
        }
      }
    } else {
612
      num = pDataBlockInfo->rows - startPos;
613
      if (updateLastKey) {
H
hjxilinx 已提交
614
        item->lastKey = pDataBlockInfo->window.ekey + step;
615 616 617 618
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
619
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
620 621 622 623
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
624
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
625 626 627 628 629
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
630
        item->lastKey = pDataBlockInfo->window.skey + step;
631 632 633
      }
    }
  }
634

635 636 637 638 639
  assert(num >= 0);
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
640
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
641 642
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
643

644
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
645
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
646
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
647

648 649
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
H
Haojun Liao 已提交
650
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
651

652
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
653
        pCtx[k].ptsList = &tsBuf[offset];
654
      }
655

H
Haojun Liao 已提交
656 657 658 659 660
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
      
661 662 663 664 665 666 667 668 669 670 671
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
672

673
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
674
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
675
      pCtx[k].nStartQueryTimestamp = pWin->skey;
676

677
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
678 679 680 681 682 683 684 685
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
    }
  }
}

static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
686 687
                                      SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
                                      __block_search_fn_t searchFn) {
688
  SQuery *pQuery = pRuntimeEnv->pQuery;
689

H
Haojun Liao 已提交
690 691 692 693
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime) {
    // todo opt
  }
694

H
Haojun Liao 已提交
695
  getNextTimeWindow(pQuery, pNextWin);
696

H
Haojun Liao 已提交
697 698 699 700 701
  // next time window is not in current block
  if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
    return -1;
  }
702

H
Haojun Liao 已提交
703 704 705 706 707
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    startKey = pNextWin->skey;
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
708
    }
H
Haojun Liao 已提交
709 710 711 712
  } else {
    startKey = pNextWin->ekey;
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
713
    }
H
Haojun Liao 已提交
714
  }
715

H
Haojun Liao 已提交
716
  int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
717

H
Haojun Liao 已提交
718 719 720 721 722 723
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNextWin->ekey) {
    TSKEY next = primaryKeys[startPos];
724

H
Haojun Liao 已提交
725 726 727 728
    pNextWin->ekey += ((next - pNextWin->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->skey = pNextWin->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNextWin->skey) {
    TSKEY next = primaryKeys[startPos];
729

H
Haojun Liao 已提交
730 731
    pNextWin->skey -= ((pNextWin->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->ekey = pNextWin->skey + pQuery->intervalTime - 1;
732
  }
733

H
Haojun Liao 已提交
734
  return startPos;
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
}

static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
750

751 752 753
  return ekey;
}

H
hjxilinx 已提交
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
769
                    SArray *pDataBlock) {
770
  char *dataBlock = NULL;
771
  SQuery *pQuery = pRuntimeEnv->pQuery;
772

773
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
774

775
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
776
  if (functionId == TSDB_FUNC_ARITHM) {
777
    sas->pArithExpr = &pQuery->pSelectExpr[col];
778

779 780 781 782 783 784
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
785

786 787 788 789
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
790

791
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
792
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
793
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
794
      SColumnInfo *pColMsg = &pQuery->colList[i];
795

796 797 798 799 800 801 802 803
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
804

805
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
806
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
807
    }
808

809
  } else {  // other type of query function
810
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
811
    if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
812 813
      dataBlock = NULL;
    } else {
H
hjxilinx 已提交
814
      dataBlock = getDataBlockImpl(pDataBlock, pCol->colId);
815 816
    }
  }
817

818 819 820 821 822 823 824
  return dataBlock;
}

/**
 *
 * @param pRuntimeEnv
 * @param forwardStep
825
 * @param tsCols
826 827 828 829 830
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
831
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
832 833
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
834
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
835 836 837
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
838
  if (pDataBlock != NULL) {
839
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
840
    tsCols = (TSKEY *)(pColInfo->pData);
841
  }
842

843
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
844

845
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
846
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
847
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
848
  }
849

850
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
dengyihao's avatar
dengyihao 已提交
851
  if (isIntervalQuery(pQuery) && tsCols != NULL) {
852
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
853
    TSKEY   ts = tsCols[offset];
854

855
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
hjxilinx 已提交
856
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
857
      return;
858
    }
859

860 861
    TSKEY   ekey = reviseWindowEkey(pQuery, &win);
    int32_t forwardStep =
862
        getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
863

864
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
865
    doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, tsCols, pDataBlockInfo->rows);
866

867 868
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
869

870
    while (1) {
871
      int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn);
872 873 874
      if (startPos < 0) {
        break;
      }
875

876
      // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
877
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
878 879
        break;
      }
880

881
      ekey = reviseWindowEkey(pQuery, &nextWin);
882
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
883

884
      pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
885
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
886
    }
887

888 889 890 891 892 893 894
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
895
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
896
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
897 898 899 900 901
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
902

903 904 905 906
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
907

908 909
    tfree(sasArray[i].data);
  }
910

911 912 913 914 915 916 917
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
918

919
  int32_t GROUPRESULTID = 1;
920

921
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
922

923 924 925 926 927 928 929 930 931 932 933
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
934 935 936 937
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes);
  if (pWindowRes == NULL) {
    return -1;
  }
938

939 940 941
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

942 943 944 945 946 947
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
948

949 950 951 952 953
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

954
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
955
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
956

957
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
958 959
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
960 961
      continue;
    }
962

963
    int16_t colIndex = -1;
964
    int32_t colId = pColIndex->colId;
965

966
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
967
      if (pQuery->colList[i].colId == colId) {
968 969 970 971
        colIndex = i;
        break;
      }
    }
972

973
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
974

975 976
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
977 978 979 980 981 982
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
983

984 985 986 987 988 989
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
990
  }
991

992
  return NULL;
993 994 995 996
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
997

998 999
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1000

1001 1002 1003 1004
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1005

1006 1007 1008
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1009 1010
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1011 1012
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1013

1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1027

1028 1029 1030 1031 1032
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1033 1034
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
1035 1036 1037
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1038

1039
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1040 1041
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1042 1043 1044 1045 1046 1047 1048

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
//    return !QUERY_IS_ASC_QUERY(pQuery);
  }

1049
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1050
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1051 1052
    return false;
  }
1053

1054 1055 1056
  return true;
}

1057 1058
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1059
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1060

1061
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1062 1063
  STableQueryInfo* item = pQuery->current;
  
1064
  TSKEY  *tsCols = (TSKEY*) ((SColumnInfoData *)taosArrayGet(pDataBlock, 0))->pData;
H
hjxilinx 已提交
1065
  bool    groupbyStateValue = isGroupbyNormalCol(pQuery->pGroupbyExpr);
1066
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
1067

1068 1069
  int16_t type = 0;
  int16_t bytes = 0;
1070

1071 1072
  char *groupbyColumnData = NULL;
  if (groupbyStateValue) {
1073
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1074
  }
1075

1076
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1077
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1078
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1079
  }
1080

1081 1082
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1083
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1084 1085
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1086
  }
1087

1088
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1089

1090 1091 1092
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1093
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1094
    qTrace("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1095 1096
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1097

1098
  int32_t j = 0;
H
hjxilinx 已提交
1099
  int32_t offset = -1;
1100

1101
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1102
    offset = GET_COL_DATA_POS(pQuery, j, step);
1103

1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1114

1115
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1116 1117
      continue;
    }
1118

1119 1120 1121
    // interval window query
    if (isIntervalQuery(pQuery)) {
      // decide the time window according to the primary timestamp
1122
      int64_t     ts = tsCols[offset];
1123
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1124

H
hjxilinx 已提交
1125
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win);
1126 1127 1128
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1129

1130 1131
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1132

1133 1134
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1135

1136 1137
      while (1) {
        getNextTimeWindow(pQuery, &nextWin);
H
Haojun Liao 已提交
1138
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1139
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1140
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1141 1142
          break;
        }
1143

1144 1145 1146
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1147

1148
        // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
1149
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
1150 1151
          break;
        }
1152

1153 1154 1155
        pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
        doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
      }
1156

1157 1158 1159 1160
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
      if (groupbyStateValue) {
H
hjxilinx 已提交
1161
        char *val = groupbyColumnData + bytes * offset;
1162

H
hjxilinx 已提交
1163
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1164 1165 1166 1167
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1168

1169
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1170
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1171 1172 1173 1174 1175
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1176

1177 1178 1179
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1180
        setQueryStatus(pQuery, QUERY_COMPLETED);
1181 1182 1183 1184
        break;
      }
    }
  }
1185
  
1186
  item->lastKey = tsCols[offset] + step;
1187 1188 1189 1190 1191 1192
  
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1193

1194 1195
    tfree(sasArray[i].data);
  }
1196

1197 1198 1199 1200
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1201
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1202
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1203 1204 1205
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1206
  
1207
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1208
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1209
  } else {
1210
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1211
  }
1212

1213
  // update the lastkey of current table
1214
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1215
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1216

1217
  // interval query with limit applied
1218 1219 1220 1221 1222
  int32_t numOfRes = 0;
  if (isIntervalQuery(pQuery)) {
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1223

1224 1225 1226 1227
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1228

1229 1230 1231
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1232

1233 1234 1235
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1236
    }
1237
  }
1238

1239
  return numOfRes;
1240 1241
}

H
Haojun Liao 已提交
1242
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1243 1244 1245 1246 1247 1248 1249
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
  pCtx->hasNull = hasNullValue(pQuery, colIndex, pBlockInfo->numOfCols, pStatis, &tpField);
1250
  pCtx->aInputElemBuf = inputData;
1251

1252
  if (tpField != NULL) {
H
Haojun Liao 已提交
1253
    pCtx->preAggVals.isSet  = true;
1254 1255
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1256 1257 1258
  } else {
    pCtx->preAggVals.isSet = false;
  }
1259

H
Haojun Liao 已提交
1260 1261 1262
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1263

1264 1265
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1266
    pCtx->ptsList = tsCol;
1267
  }
1268

1269 1270 1271 1272 1273
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1274
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1275
    /*
H
Haojun Liao 已提交
1276
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1287

1288 1289
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1290 1291 1292 1293 1294 1295
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1309
  }
1310

1311 1312 1313 1314 1315 1316
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
S
slguan 已提交
1317
//        qTrace("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1318 1319 1320
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
S
slguan 已提交
1321
//        qTrace("QInfo:%p block not loaded, bstatus:%d",
1322 1323 1324 1325 1326 1327 1328 1329
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) {
  if (isSelectivityWithTagsQuery(pQuery)) {
1330
    int32_t num = 0;
1331
    int16_t tagLen = 0;
1332 1333
    
    SQLFunctionCtx *p = NULL;
1334
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
1335
    
1336
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1337
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1338
      
1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
1352

1353 1354 1355 1356 1357 1358 1359
    p->tagInfo.pTagCtxList = pTagCtx;
    p->tagInfo.numOfTagCols = num;
    p->tagInfo.tagsLen = tagLen;
  }
}

static void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
1360
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1361 1362
    assert(pQuery->pSelectExpr[i].interBytes <= DEFAULT_INTERN_BUF_PAGE_SIZE);
    
1363
    setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interBytes, isStableQuery);
1364 1365 1366
  }
}

1367
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
S
slguan 已提交
1368
  qTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1369 1370
  SQuery *pQuery = pRuntimeEnv->pQuery;

1371 1372
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1373

1374
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1375
    goto _clean;
1376
  }
1377

1378
  pRuntimeEnv->offset[0] = 0;
1379
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1380
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1381

1382
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1383
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1384

1385 1386
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1387
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1388 1389 1390 1391
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1392 1393 1394 1395
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1396
      
1397 1398 1399 1400
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1401
  
1402
    assert(isValidDataType(pCtx->inputType));
1403
    pCtx->ptsOutputBuf = NULL;
1404

1405 1406
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1407

1408 1409
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1410

1411 1412 1413 1414 1415 1416 1417 1418 1419 1420
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1421

1422 1423
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1424

1425
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1426
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1427
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1428

1429 1430 1431 1432
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1433

1434 1435
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1436

1437 1438 1439 1440
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1441

1442
  // set the intermediate result output buffer
1443
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
1444

1445
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1446
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1447 1448
    resetCtxOutputBuf(pRuntimeEnv);
  }
1449

1450 1451
  setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx);
  return TSDB_CODE_SUCCESS;
1452

1453
_clean:
1454 1455
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1456

1457
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1458 1459 1460 1461 1462 1463
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1464

1465
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1466
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1467

H
hjxilinx 已提交
1468
  qTrace("QInfo:%p teardown runtime env", pQInfo);
1469
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1470

1471
  if (pRuntimeEnv->pCtx != NULL) {
1472
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1473
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1474

1475 1476 1477
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1478

1479 1480 1481 1482
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
      tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
    }
1483

1484 1485 1486
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1487

H
Haojun Liao 已提交
1488
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1489

H
hjxilinx 已提交
1490
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1491
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1492
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1493

1494 1495 1496
  pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
}

1497
static bool isQueryKilled(SQInfo *pQInfo) {
1498
  return false;
1499
  return (pQInfo->code == TSDB_CODE_TSC_QUERY_CANCELLED);
1500 1501
}

1502
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
H
hjxilinx 已提交
1503

H
hjxilinx 已提交
1504
static bool isFixedOutputQuery(SQuery *pQuery) {
1505 1506 1507
  if (pQuery->intervalTime != 0) {
    return false;
  }
1508

1509 1510 1511 1512
  // Note:top/bottom query is fixed output query
  if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    return true;
  }
1513

1514
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1515
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1516

1517 1518
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1519
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1520 1521
      continue;
    }
1522

1523 1524 1525
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1526

1527 1528 1529 1530
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1531

1532 1533 1534
  return false;
}

1535
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1536
static bool isPointInterpoQuery(SQuery *pQuery) {
1537
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1538
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1539
    if (functionID == TSDB_FUNC_INTERP) {
1540 1541 1542
      return true;
    }
  }
1543

1544 1545 1546 1547
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1548
static bool isSumAvgRateQuery(SQuery *pQuery) {
1549
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1550
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1551 1552 1553
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1554

1555 1556 1557 1558 1559
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1560

1561 1562 1563
  return false;
}

H
hjxilinx 已提交
1564
static bool isFirstLastRowQuery(SQuery *pQuery) {
1565
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1566
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1567 1568 1569 1570
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1571

1572 1573 1574
  return false;
}

H
hjxilinx 已提交
1575
static bool needReverseScan(SQuery *pQuery) {
1576
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1577
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1578 1579 1580
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1581

1582
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1583 1584
      return true;
    }
1585 1586 1587 1588 1589

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1590
  }
1591

1592 1593
  return false;
}
H
hjxilinx 已提交
1594 1595 1596

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1597 1598 1599 1600 1601
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1602 1603 1604
      return false;
    }
  }
1605

H
hjxilinx 已提交
1606 1607 1608
  return true;
}

1609 1610
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1611
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *realWin, STimeWindow *win) {
1612
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
1613

1614
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision);
1615

1616 1617 1618 1619 1620 1621
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    /*
     * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
     * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
     */
    assert(keyLast - keyFirst < pQuery->intervalTime);
1622

H
Haojun Liao 已提交
1623 1624
    realWin->skey = keyFirst;
    realWin->ekey = keyLast;
1625

1626 1627 1628
    win->ekey = INT64_MAX;
    return;
  }
1629

1630
  win->ekey = win->skey + pQuery->intervalTime - 1;
1631

H
Haojun Liao 已提交
1632 1633
  realWin->skey = (win->skey < keyFirst)? keyFirst : win->skey;
  realWin->ekey = (win->ekey < keyLast) ? win->ekey : keyLast;
1634 1635 1636 1637
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1638
    pQuery->checkBuffer = 0;
1639
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1640
    pQuery->checkBuffer = 0;
1641 1642
  } else {
    bool hasMultioutput = false;
1643
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1644
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1645 1646 1647
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1648

1649 1650 1651 1652 1653
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1654

1655
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1656 1657 1658 1659 1660 1661
  }
}

/*
 * todo add more parameters to check soon..
 */
1662
bool colIdCheck(SQuery *pQuery) {
1663 1664
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1665
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1666
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1667 1668 1669
      return false;
    }
  }
1670
  
1671 1672 1673 1674 1675 1676
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1677
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1678
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1679

1680 1681 1682 1683
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1684

1685 1686 1687 1688
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1689

1690 1691 1692 1693 1694 1695 1696
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

1697
static void changeExecuteScanOrder(SQuery *pQuery, bool stableQuery) {
1698 1699 1700
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1701

1702 1703 1704
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
S
slguan 已提交
1705
    qTrace("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1706
           pQuery->order.order, TSDB_ORDER_DESC);
1707

1708
    pQuery->order.order = TSDB_ORDER_DESC;
1709

1710 1711
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1712

1713 1714
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1715

1716 1717
    return;
  }
1718

1719 1720
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1721
      qTrace(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1722
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1723 1724
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1725

1726
    pQuery->order.order = TSDB_ORDER_ASC;
1727 1728
    return;
  }
1729

1730 1731 1732
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1733
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1734 1735
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1736 1737
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1738

1739
      pQuery->order.order = TSDB_ORDER_ASC;
1740 1741
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1742
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1743 1744
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1745 1746
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1747

1748
      pQuery->order.order = TSDB_ORDER_DESC;
1749
    }
1750

1751
  } else {  // interval query
1752
    if (stableQuery) {
1753 1754
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1755
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1756 1757
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1758 1759
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1760

1761
        pQuery->order.order = TSDB_ORDER_ASC;
1762 1763
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1764
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1765 1766
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1767 1768
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1769

1770
        pQuery->order.order = TSDB_ORDER_DESC;
1771 1772 1773 1774 1775 1776 1777 1778
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1779

1780
  int32_t num = 0;
1781

1782 1783 1784
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
  } else if (isIntervalQuery(pQuery)) {  // time window query, allocate one page for each table
1785
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1786
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1787 1788
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1789
  }
1790

1791 1792 1793 1794
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1795
#define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pSelectExpr[1].base.arg->argValue.i64:1)
1796

H
Haojun Liao 已提交
1797 1798
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1799
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1800 1801 1802 1803
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1804

H
Haojun Liao 已提交
1805 1806 1807
  SQuery    *pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = GET_RES_BUF_PAGE_BY_ID(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
  int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
1808

H
Haojun Liao 已提交
1809
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
1810
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1811 1812 1813 1814 1815 1816
}

/**
 * decrease the refcount for each table involved in this query
 * @param pQInfo
 */
1817
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
1818
  if (pQInfo != NULL) {
1819
    //    assert(taosHashGetSize(pQInfo->tableqinfoGroupInfo) >= 1);
1820 1821 1822
  }

#if 0
1823
  if (pQInfo == NULL || pQInfo->tableqinfoGroupInfo.numOfTables == 1) {
1824
    atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
S
slguan 已提交
1825
    qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
1826 1827 1828
           pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
  } else {
    int32_t num = 0;
1829 1830
    for (int32_t i = 0; i < pQInfo->tableqinfoGroupInfo.numOfTables; ++i) {
      SMeterObj *pMeter = getMeterObj(pQInfo->tableqinfoGroupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
1831
      atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
1832

1833
      if (pMeter->numOfQueries > 0) {
S
slguan 已提交
1834
        qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
1835 1836 1837 1838
               pMeter->meterId, pMeter->numOfQueries);
        num++;
      }
    }
1839

1840 1841 1842 1843
    /*
     * in order to reduce log output, for all meters of which numOfQueries count are 0,
     * we do not output corresponding information
     */
1844
    num = pQInfo->tableqinfoGroupInfo.numOfTables - num;
S
slguan 已提交
1845
    qTrace("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
1846
           pQInfo->tableqinfoGroupInfo.numOfTables, num);
1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859
  }
#endif
}

static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
                                int32_t numOfTotalPoints) {
  if (pDataStatis == NULL) {
    return true;
  }

#if 0
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1860
    int32_t                  colIndex = pFilterInfo->info.colIndex;
1861

1862 1863 1864 1865
    // this column not valid in current data block
    if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
      continue;
    }
1866

1867 1868 1869 1870
    // not support pre-filter operation on binary/nchar data type
    if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
      continue;
    }
1871

1872 1873 1874 1875
    // all points in current column are NULL, no need to check its boundary value
    if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
      continue;
    }
1876

1877 1878 1879
    if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataStatis[colIndex].min);
      float maxval = *(double *)(&pDataStatis[colIndex].max);
1880

1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
                                        (char *)&pDataStatis[colIndex].max)) {
          return true;
        }
      }
    }
  }
1895

1896
  // todo disable this opt code block temporarily
1897
  //  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1898
  //    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
1899 1900 1901 1902
  //    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
  //      return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
  //    }
  //  }
1903

1904 1905 1906 1907 1908 1909 1910
#endif
  return true;
}

// previous time window may not be of the same size of pQuery->intervalTime
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1911

1912 1913 1914 1915
  pTimeWindow->skey += (pQuery->slidingTime * factor);
  pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
}

H
hjxilinx 已提交
1916
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis) {
1917
  SQuery *pQuery = pRuntimeEnv->pQuery;
1918 1919 1920 1921

  uint32_t r = 0;
  SArray * pDataBlock = NULL;

1922 1923 1924
  if (pQuery->numOfFilterCols > 0) {
    r = BLK_DATA_ALL_NEEDED;
  } else {
1925
    // check if this data block is required to load
1926
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1927 1928 1929 1930
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
      
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
1931
      r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
1932
    }
1933

1934 1935 1936 1937
    if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) {
      r |= BLK_DATA_ALL_NEEDED;
    }
  }
1938

1939
  if (r == BLK_DATA_NO_NEEDED) {
1940
    qTrace("QInfo:%p data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
1941
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
1942 1943
    pRuntimeEnv->summary.discardBlocks += 1;
  } else if (r == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
1944
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
1945
      //        return DISK_DATA_LOAD_FAILED;
1946
    }
1947 1948 1949 1950
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
hjxilinx 已提交
1951
      pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
1952
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
1953 1954 1955
    }
  } else {
    assert(r == BLK_DATA_ALL_NEEDED);
1956 1957 1958
  
    // load the data block statistics to perform further filter
    pRuntimeEnv->summary.loadBlockStatis +=1;
H
hjxilinx 已提交
1959
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
1960
    }
1961 1962
    
    if (!needToLoadDataBlock(pQuery,*pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
1963
#if defined(_DEBUG_VIEW)
1964
      qTrace("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
1965
#endif
1966 1967
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
1968 1969
      //        return DISK_DATA_DISCARDED;
    }
1970
  
1971
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
1972
    pRuntimeEnv->summary.loadBlocks += 1;
H
hjxilinx 已提交
1973
    pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
1974
  }
1975

1976 1977 1978
  return pDataBlock;
}

H
hjxilinx 已提交
1979
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
1980
  int32_t midPos = -1;
H
Haojun Liao 已提交
1981
  int32_t numOfRows;
1982

1983 1984 1985
  if (num <= 0) {
    return -1;
  }
1986

1987
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
1988 1989

  TSKEY * keyList = (TSKEY *)pValue;
1990
  int32_t firstPos = 0;
1991
  int32_t lastPos = num - 1;
1992

1993
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
1994 1995 1996 1997 1998
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
1999

H
Haojun Liao 已提交
2000 2001
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2002

H
hjxilinx 已提交
2003 2004 2005 2006 2007 2008 2009 2010
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2011

H
hjxilinx 已提交
2012 2013 2014 2015 2016
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2017

H
hjxilinx 已提交
2018 2019 2020 2021 2022 2023 2024
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2025

H
Haojun Liao 已提交
2026 2027
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2028

H
hjxilinx 已提交
2029 2030 2031 2032 2033 2034 2035 2036 2037
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2038

H
hjxilinx 已提交
2039 2040 2041
  return midPos;
}

2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

B
Bomin Zhang 已提交
2064
  qTrace("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2065 2066 2067 2068 2069
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (!isIntervalQuery(pQuery) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isFixedOutputQuery(pQuery)) {
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2082 2083
        assert(bytes > 0 && newSize > 0);

2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
B
Bomin Zhang 已提交
2100
      qTrace("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2101 2102 2103 2104 2105 2106 2107
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2108 2109
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2110
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2111
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2112

S
slguan 已提交
2113
  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2114 2115
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2116

2117
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
2118
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2119
    summary->totalBlocks += 1;
2120
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
2121
      return 0;
2122
    }
2123

2124
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
2125

2126
    // todo extract methods
H
Haojun Liao 已提交
2127
    if (isIntervalQuery(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2128
      STimeWindow realWin = TSWINDOW_INITIALIZER, w = TSWINDOW_INITIALIZER;
2129 2130
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2131
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2132
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
2133 2134 2135 2136
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2137
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
2138

H
hjxilinx 已提交
2139
        pWindowResInfo->startTime = pQuery->window.skey;
2140 2141
        pWindowResInfo->prevSKey = w.skey;
      }
2142 2143 2144 2145
      
      if (pRuntimeEnv->pFillInfo != NULL) {
        pRuntimeEnv->pFillInfo->start = w.skey;
      }
2146
    }
2147

H
hjxilinx 已提交
2148
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2149
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2150

2151
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2152
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
2153

H
Haojun Liao 已提交
2154 2155
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2156
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2157

H
Haojun Liao 已提交
2158
    summary->totalRows += blockInfo.rows;
2159 2160
    qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2161

2162 2163
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2164
      break;
2165 2166
    }
  }
2167

H
hjxilinx 已提交
2168
  // if the result buffer is not full, set the query complete
2169 2170 2171
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2172

2173
  if (isIntervalQuery(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2174
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2175 2176
      int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;

2177
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2178
      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2179
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2180 2181 2182 2183
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2184

2185
  return 0;
2186 2187 2188 2189 2190 2191
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2192
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2193
  tVariantDestroy(tag);
2194

2195
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2196
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2197 2198 2199
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2200
  } else {
2201
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2202 2203 2204 2205
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2206 2207
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Haojun Liao 已提交
2208 2209 2210 2211 2212
      if (isNull(varDataVal(val), type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2213
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2214
    } else {
H
Haojun Liao 已提交
2215 2216 2217 2218 2219
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2220
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2221
    }
2222
  }
2223 2224
}

2225
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2226
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2227
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2228

H
[td-90]  
Haojun Liao 已提交
2229 2230 2231
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2232 2233 2234 2235 2236 2237 2238 2239 2240 2241

    // todo refactor extract function.
    int16_t type = -1, bytes = -1;
    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
        type = pQuery->tagColList[i].type;
        bytes = pQuery->tagColList[i].bytes;
      }
    }

2242
    doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2243 2244
  } else {
    // set tag value, by which the results are aggregated.
2245
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2246
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2247
  
2248
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2249
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2250 2251
        continue;
      }
2252

2253
      // todo use tag column index to optimize performance
2254
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2255
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2256
    }
2257

2258
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2259
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2260
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2261 2262
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2263 2264 2265 2266 2267 2268 2269 2270 2271 2272

      // todo refactor
      int16_t type = -1, bytes = -1;
      for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
        if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
          type = pQuery->tagColList[i].type;
          bytes = pQuery->tagColList[i].bytes;
        }
      }

2273
      doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
B
Bomin Zhang 已提交
2274 2275
      qTrace("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
          pRuntimeEnv->pCtx[0].tag.i64Key)
2276 2277 2278 2279 2280 2281 2282
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2283

2284
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2285
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2286 2287 2288
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2289

2290 2291 2292
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2293

2294 2295 2296
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2297

2298 2299 2300
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2301 2302 2303 2304 2305 2306 2307 2308
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2309 2310
    }
  }
2311

2312
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2313
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2314 2315 2316
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2317

2318 2319 2320 2321
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2322
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2391
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2392
  SQuery* pQuery = pRuntimeEnv->pQuery;
2393
  int32_t numOfCols = pQuery->numOfOutput;
2394
  printf("super table query intermediate result, total:%d\n", numOfRows);
2395

2396 2397
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2398
      
2399
      switch (pQuery->pSelectExpr[i].type) {
2400
        case TSDB_DATA_TYPE_BINARY: {
2401 2402 2403 2404 2405
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2406
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2407 2408 2409 2410 2411
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2412
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2413 2414
          break;
        case TSDB_DATA_TYPE_INT:
2415
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2416 2417
          break;
        case TSDB_DATA_TYPE_FLOAT:
2418
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2419 2420
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2421
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2422 2423 2424 2425 2426 2427 2428 2429
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2430 2431 2432
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2433 2434 2435 2436 2437
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2438

2439 2440
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2441

2442 2443
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2444

2445 2446 2447 2448
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2449

2450 2451 2452 2453
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2454

H
hjxilinx 已提交
2455
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2456
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2457

2458 2459
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2460

H
hjxilinx 已提交
2461
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2462
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2463

2464 2465
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2466

2467 2468 2469
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2470

2471 2472 2473
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2474
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2475
  int64_t st = taosGetTimestampMs();
2476
  int32_t ret = TSDB_CODE_SUCCESS;
2477

2478
  int32_t numOfGroups = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
2479

2480
  while (pQInfo->groupIndex < numOfGroups) {
2481
    SArray *group = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, pQInfo->groupIndex);
2482
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2483 2484 2485 2486
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2487
    pQInfo->groupIndex += 1;
2488 2489

    // this group generates at least one result, return results
2490 2491 2492
    if (ret > 0) {
      break;
    }
2493 2494

    assert(pQInfo->numOfGroupResultPages == 0);
H
hjxilinx 已提交
2495
    qTrace("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2496
  }
2497

B
Bomin Zhang 已提交
2498
  qTrace("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2499
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2500

2501 2502 2503 2504 2505 2506
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2507

2508
    // current results of group has been sent to client, try next group
2509
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2510 2511
      return;  // failed to save data in the disk
    }
2512

2513
    // check if all results has been sent to client
2514
    int32_t numOfGroup = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
2515
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2516
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2517 2518
      return;
    }
2519
  }
2520 2521

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2522
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2523

2524
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2525
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2526

2527 2528
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
H
Haojun Liao 已提交
2529
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[i]);
2530
    total += pData->num;
2531
  }
2532

2533
  int32_t rows = total;
2534

2535 2536
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
H
Haojun Liao 已提交
2537
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[num]);
2538

2539
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2540
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2541
      char *  pDest = pQuery->sdata[i]->data;
2542

2543 2544
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2545
    }
2546

2547
    offset += pData->num;
2548
  }
2549

2550
  assert(pQuery->rec.rows == 0);
2551

2552
  pQuery->rec.rows += rows;
2553 2554 2555
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2556 2557
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
//  int64_t maxOutput = 0;
2558
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2559
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2560

2561 2562 2563 2564 2565 2566 2567
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2568

2569
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2570
    assert(pResultInfo != NULL);
2571

H
Haojun Liao 已提交
2572 2573
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2574
    }
H
Haojun Liao 已提交
2575 2576 2577 2578 2579 2580 2581 2582 2583
//    if (pResultInfo != NULL && maxOutput < pResultInfo->numOfRes) {
//      maxOutput = pResultInfo->numOfRes;
//
//      if (maxOutput > 0) {
//        break;
//      }
//    }
//
//    assert(pResultInfo != NULL);
2584
  }
2585

H
Haojun Liao 已提交
2586
  return 0;
2587 2588
}

2589
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2590
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2591
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2592

2593
  size_t size = taosArrayGetSize(pGroup);
2594

2595
  tFilePage **buffer = pQuery->sdata;
2596 2597
  int32_t *   posList = calloc(size, sizeof(int32_t));

H
hjxilinx 已提交
2598
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2599

2600
  // todo opt for the case of one table per group
2601
  int32_t numOfTables = 0;
2602
  for (int32_t i = 0; i < size; ++i) {
2603
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2604

2605 2606 2607
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, tsdbGetTableId(item->pTable).tid);
    if (list.size > 0 && item->windowResInfo.size > 0) {
      pTableList[numOfTables] = item;
2608
      numOfTables += 1;
2609 2610
    }
  }
2611

2612
  if (numOfTables == 0) {
2613 2614
    tfree(posList);
    tfree(pTableList);
2615

2616 2617
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2618
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2619

2620
  }
2621

2622
  SCompSupporter cs = {pTableList, posList, pQInfo};
2623

2624
  SLoserTreeInfo *pTree = NULL;
2625
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2626

2627
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
2628 2629
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2630

2631 2632
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2633

2634 2635
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2636

H
hjxilinx 已提交
2637
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2638
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2639

2640 2641
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2642

2643
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2644
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2645 2646
    if (num <= 0) {
      cs.position[pos] += 1;
2647

2648 2649
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2650

2651
        // all input sources are exhausted
2652
        if (--numOfTables == 0) {
2653 2654 2655 2656 2657 2658 2659
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2660
        if (buffer[0]->num == pQuery->rec.capacity) {
2661 2662 2663
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2664

2665 2666
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2667

2668
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2669
        buffer[0]->num += 1;
2670
      }
2671

2672
      lastTimestamp = ts;
2673

2674 2675 2676
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2677

2678
        // all input sources are exhausted
2679
        if (--numOfTables == 0) {
2680 2681 2682 2683
          break;
        }
      }
    }
2684

2685 2686
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2687

2688
  if (buffer[0]->num != 0) {  // there are data in buffer
2689
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2690
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2691

2692 2693 2694 2695
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2696

2697 2698 2699
      return -1;
    }
  }
2700

2701 2702 2703
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2704
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2705
#endif
2706

H
Haojun Liao 已提交
2707
  qTrace("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2708

2709 2710
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2711
  tfree(pTree);
2712

2713
  pQInfo->offset = 0;
2714
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2715 2716
    tfree(pResultInfo[i].interResultBuf);
  }
2717

2718 2719 2720 2721 2722
  tfree(pResultInfo);
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2723 2724 2725
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2726
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2727
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2728

2729 2730
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2731

2732
  int32_t remain = pQuery->sdata[0]->num;
2733
  int32_t offset = 0;
2734

2735 2736 2737 2738 2739
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2740

2741
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2742
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2743

2744
    // pagewise copy to dest buffer
2745
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2746
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2747
      buf->num = r;
2748

2749 2750
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2751
    }
2752

2753 2754 2755
    offset += r;
    remain -= r;
  }
2756

2757 2758 2759 2760 2761
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2762
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2763
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2764 2765 2766
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2767

2768
    pQuery->sdata[k]->num = 0;
2769 2770 2771
  }
}

2772 2773 2774 2775 2776 2777 2778
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2779 2780 2781 2782 2783 2784 2785
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2799 2800 2801 2802 2803
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2804

2805
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2806

2807
    // open/close the specified query for each group result
2808
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2809
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2810

2811 2812
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2813 2814 2815 2816 2817 2818 2819 2820
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2821 2822
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2823
  SQuery *pQuery = pRuntimeEnv->pQuery;
2824
  int32_t order = pQuery->order.order;
2825

2826 2827 2828
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
2829
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2830
  } else {  // for simple result of table query,
2831
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2832
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2833

2834
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2835 2836 2837
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2838

2839 2840
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2841 2842 2843 2844 2845 2846
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2847
  
2848
  int32_t numOfGroups = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
H
hjxilinx 已提交
2849 2850
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
2851
    SArray *group = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, i);
H
hjxilinx 已提交
2852 2853 2854
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
2855 2856
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
hjxilinx 已提交
2857 2858
    }
  }
2859 2860
}

2861
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2862
  SQuery *pQuery = pRuntimeEnv->pQuery;
2863
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2864
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2865 2866 2867 2868
  }
}

void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
2869
  int32_t numOfCols = pQuery->numOfOutput;
2870

2871 2872
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2873

2874 2875 2876 2877 2878 2879
  // set the intermediate result output buffer
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2880

2881
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2882 2883
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2884

2885 2886 2887 2888 2889 2890
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2891

2892
    // set the timestamp output buffer for top/bottom/diff query
2893
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2894 2895 2896
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2897

2898
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2899
  }
2900

2901 2902 2903 2904 2905
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2906

2907
  // reset the execution contexts
2908
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2909
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2910
    assert(functionId != TSDB_FUNC_DIFF);
2911

2912 2913 2914 2915
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
2916

2917 2918 2919 2920 2921 2922 2923 2924 2925 2926
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
2927

2928 2929 2930 2931 2932 2933
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2934

2935
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2936
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2937
    pRuntimeEnv->pCtx[j].currentStage = 0;
2938

H
Haojun Liao 已提交
2939 2940 2941 2942
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
2943

2944 2945 2946 2947
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

2948
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
2949
  SQuery *pQuery = pRuntimeEnv->pQuery;
2950
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
2951 2952
    return;
  }
2953

2954
  if (pQuery->rec.rows <= pQuery->limit.offset) {
B
Bomin Zhang 已提交
2955
    qTrace("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
2956 2957
        pQuery->limit.offset - pQuery->rec.rows);
    
2958 2959
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
2960

2961
    resetCtxOutputBuf(pRuntimeEnv);
2962

H
Haojun Liao 已提交
2963
    // clear the buffer full flag if exists
2964
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
2965
  } else {
2966
    int64_t numOfSkip = pQuery->limit.offset;
2967
    pQuery->rec.rows -= numOfSkip;
2968 2969 2970 2971 2972
    pQuery->limit.offset = 0;
  
    qTrace("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
           0, pQuery->rec.rows);
    
2973
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2974
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2975
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2976
      
H
Haojun Liao 已提交
2977 2978
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
2979

2980
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
2981
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
2982 2983
      }
    }
2984

2985
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
2986 2987 2988 2989 2990 2991 2992 2993
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
2994
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
2995 2996 2997 2998 2999 3000
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3001

H
hjxilinx 已提交
3002
  bool toContinue = false;
3003 3004 3005
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3006

3007 3008 3009 3010 3011
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3012

3013
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3014

3015
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3016
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3017 3018 3019
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3020

3021 3022
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3023

3024 3025 3026 3027
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3028
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3029
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3030 3031 3032
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3033

3034 3035
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3036

3037 3038 3039
      toContinue |= (!pResInfo->complete);
    }
  }
3040

3041 3042 3043
  return toContinue;
}

H
Haojun Liao 已提交
3044
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3045
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3046 3047
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3048 3049 3050
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3051
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3052
      .status      = pQuery->status,
3053
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3054
      .lastKey     = start,
H
hjxilinx 已提交
3055
      .w           = pQuery->window,
H
Haojun Liao 已提交
3056
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3057
  };
3058

3059 3060 3061
  return info;
}

3062 3063 3064 3065
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3066 3067 3068 3069 3070
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3071

3072
  // reverse order time range
3073 3074 3075
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3076
  SWITCH_ORDER(pQuery->order.order);
3077
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3078

3079
  STsdbQueryCond cond = {
3080
      .twindow = pQuery->window,
H
hjxilinx 已提交
3081
      .order   = pQuery->order.order,
3082
      .colList = pQuery->colList,
3083 3084
      .numOfCols = pQuery->numOfCols,
  };
3085

3086 3087 3088 3089
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3090

3091 3092
  // add ref for table
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3093

3094 3095
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3096
  disableFuncInReverseScan(pQInfo);
3097 3098
}

3099 3100
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3101
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3102

3103 3104
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3105

3106 3107 3108 3109
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3110

3111
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3112

3113 3114
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3115
  pTableQueryInfo->lastKey = pStatus->lastKey;
3116
  pQuery->status = pStatus->status;
3117
  
H
hjxilinx 已提交
3118
  pTableQueryInfo->win = pStatus->w;
3119
  pQuery->window = pTableQueryInfo->win;
3120 3121
}

3122
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3123
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3124
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3125 3126
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3127
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3128

3129
  // store the start query position
H
Haojun Liao 已提交
3130
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3131

3132 3133
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3134

3135 3136
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3137

3138 3139
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3140
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3141
      qstatus.lastKey = pTableQueryInfo->lastKey;
3142
    }
3143

3144
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3145
      // restore the status code and jump out of loop
3146
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3147
        pQuery->status = qstatus.status;
3148
      }
3149

3150 3151
      break;
    }
3152

3153
    STsdbQueryCond cond = {
3154
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3155
        .order   = pQuery->order.order,
3156
        .colList = pQuery->colList,
3157
        .numOfCols = pQuery->numOfCols,
3158
    };
3159

3160 3161
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3162
    }
3163

3164
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3165
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3166

3167 3168
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3169 3170 3171
    
    qTrace("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
        cond.twindow.skey, cond.twindow.ekey);
3172

3173
    // check if query is killed or not
3174
    if (isQueryKilled(pQInfo)) {
3175 3176 3177
      return;
    }
  }
3178

H
hjxilinx 已提交
3179
  if (!needReverseScan(pQuery)) {
3180 3181
    return;
  }
3182

3183
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3184

3185
  // reverse scan from current position
3186
  qTrace("QInfo:%p start to reverse scan", pQInfo);
3187
  doScanAllDataBlocks(pRuntimeEnv);
3188 3189

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3190 3191
}

H
hjxilinx 已提交
3192
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3193
  SQuery *pQuery = pRuntimeEnv->pQuery;
3194

3195 3196 3197 3198 3199 3200
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      closeAllTimeWindow(pWindowResInfo);
    }
3201

3202 3203 3204 3205 3206
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3207

3208
      setWindowResOutputBuf(pRuntimeEnv, buf);
3209

3210
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3211
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3212
      }
3213

3214 3215 3216 3217 3218 3219
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3220

3221
  } else {
3222
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3223
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3224 3225 3226 3227 3228
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3229
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3230
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3231

3232 3233 3234 3235
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3236

3237 3238 3239
  return false;
}

3240
static STableQueryInfo *createTableQueryInfo( SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win) {
3241
  STableQueryInfo *pTableQueryInfo = calloc(1, sizeof(STableQueryInfo));
3242

H
hjxilinx 已提交
3243 3244
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3245

3246
  pTableQueryInfo->pTable = pTable;
3247
  pTableQueryInfo->cur.vgroupIndex = -1;
3248

3249 3250 3251 3252
  initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, 100, 100, TSDB_DATA_TYPE_INT);
  return pTableQueryInfo;
}

3253
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3254 3255 3256
  if (pTableQueryInfo == NULL) {
    return;
  }
3257

3258 3259 3260 3261
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
  free(pTableQueryInfo);
}

3262
void setCurrentQueryTable(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3263
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3264 3265 3266 3267
  pQuery->current = pTableQueryInfo;
  
  assert(((pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) && QUERY_IS_ASC_QUERY(pQuery)) ||
         ((pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) && !QUERY_IS_ASC_QUERY(pQuery)));
3268 3269 3270 3271 3272
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3273
 * @param pDataBlockInfo
3274
 */
3275
void setExecutionContext(SQInfo *pQInfo, void* pTable, int32_t groupIndex, TSKEY nextKey) {
3276
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
3277 3278
  STableQueryInfo *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  
3279 3280
  SWindowResInfo *  pWindowResInfo = &pRuntimeEnv->windowResInfo;
  int32_t           GROUPRESULTID = 1;
3281

3282
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex, sizeof(groupIndex));
3283 3284 3285
  if (pWindowRes == NULL) {
    return;
  }
3286

3287 3288 3289 3290 3291 3292 3293 3294 3295 3296
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3297

3298 3299
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
3300

3301
  pTableQueryInfo->lastKey = nextKey;
3302
  setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
3303 3304
}

H
Haojun Liao 已提交
3305
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3306
  SQuery *pQuery = pRuntimeEnv->pQuery;
3307

3308
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3309
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3310 3311
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3312

3313
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3314 3315 3316
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3317

3318 3319 3320 3321 3322
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3323

3324 3325 3326 3327 3328 3329
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3330 3331
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3332

H
Haojun Liao 已提交
3333 3334 3335 3336 3337 3338 3339 3340
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
    if (pCtx->resultInfo->complete) {
      continue;
    }
3341

H
Haojun Liao 已提交
3342 3343
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3344

H
Haojun Liao 已提交
3345 3346 3347 3348
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3349

H
Haojun Liao 已提交
3350 3351 3352 3353 3354
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3355

H
Haojun Liao 已提交
3356 3357 3358 3359 3360 3361
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3362
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3363
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3364

3365
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3366

3367 3368
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3369
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3370
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3371

3372
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3373

3374 3375 3376 3377 3378 3379
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3380

3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3393
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3394 3395
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3396 3397
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3398 3399 3400
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3401
    pTableQueryInfo->win.skey = key;
3402
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3403

3404 3405 3406 3407 3408
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3409

3410 3411 3412 3413 3414 3415
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3416
    STimeWindow     w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3417
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3418

H
Haojun Liao 已提交
3419 3420
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3421
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &realWin, &w);
3422
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3423

3424 3425
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3426
        assert(win.ekey == pQuery->window.ekey);
3427
      }
3428 3429
      
      pWindowResInfo->prevSKey = w.skey;
3430
    }
3431

3432
    pTableQueryInfo->queryRangeSet = 1;
3433
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3434 3435 3436 3437
  }
}

bool requireTimestamp(SQuery *pQuery) {
3438
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3439
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3453 3454 3455
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3456 3457
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3458 3459 3460 3461 3462
  return loadPrimaryTS;
}

static int32_t getNumOfSubset(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3463

3464 3465 3466 3467
  int32_t totalSubset = 0;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (isIntervalQuery(pQuery))) {
    totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  } else {
3468
    totalSubset = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
3469
  }
3470

3471 3472 3473 3474 3475 3476
  return totalSubset;
}

static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3477

3478 3479 3480
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3481

3482
  qTrace("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3483
  int32_t totalSubset = getNumOfSubset(pQInfo);
3484

3485
  if (orderType == TSDB_ORDER_ASC) {
3486
    startIdx = pQInfo->groupIndex;
3487 3488
    step = 1;
  } else {  // desc order copy all data
3489
    startIdx = totalSubset - pQInfo->groupIndex - 1;
3490 3491
    step = -1;
  }
3492

3493 3494 3495
  for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3496
      pQInfo->groupIndex += 1;
3497 3498
      continue;
    }
3499

3500
    assert(result[i].numOfRows >= 0 && pQInfo->offset <= 1);
3501

3502 3503
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3504

3505 3506 3507 3508
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3509 3510 3511 3512 3513
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3514
      pQInfo->groupIndex += 1;
3515
    }
3516

3517
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3518
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3519

3520 3521 3522 3523
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3524

3525
    numOfResult += numOfRowsToCopy;
3526 3527 3528
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3529
  }
3530

S
slguan 已提交
3531
  qTrace("QInfo:%p copy data to query buf completed", pQInfo);
3532 3533

#ifdef _DEBUG_VIEW
3534
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3550

3551
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3552
  int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
3553

3554
  pQuery->rec.rows += numOfResult;
3555

3556
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3557 3558
}

3559
static UNUSED_FUNC void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3560
  SQuery *pQuery = pRuntimeEnv->pQuery;
3561

3562 3563
  // update the number of result for each, only update the number of rows for the corresponding window result.
  if (pQuery->intervalTime == 0) {
3564

3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575
    for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
      SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
        if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
          continue;
        }

        pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
      }
3576
    }
3577 3578 3579 3580 3581 3582 3583 3584

//    int32_t g = pTableQueryInfo->groupIndex;
//    assert(pRuntimeEnv->windowResInfo.size > 0);
//
//    SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&g, sizeof(g));
//    if (pWindowRes->numOfRows == 0) {
//      pWindowRes->numOfRows = getNumOfResult(pRuntimeEnv);
//    }
3585 3586 3587
  }
}

3588 3589
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3590
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3591 3592
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3593
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3594
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3595

3596
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
3597
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3598
  } else {
3599
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3600
  }
3601

H
hjxilinx 已提交
3602
  updateWindowResNumOfRes(pRuntimeEnv, pTableQueryInfo);
3603 3604
}

3605 3606 3607 3608
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
  
3609 3610
  // todo refactor
  if (pQuery->fillType == TSDB_FILL_NONE || (pQuery->fillType != TSDB_FILL_NONE && isPointInterpoQuery(pQuery))) {
3611
    assert(pFillInfo == NULL);
3612 3613
    return false;
  }
3614

3615
  if (pQuery->limit.limit > 0 && pQuery->rec.rows >= pQuery->limit.limit) {
3616 3617
    return false;
  }
3618

3619 3620 3621
  // There are results not returned to client, fill operation applied to the remain result set in the
  // first place is required.
  int32_t remain = taosNumOfRemainRows(pFillInfo);
3622 3623 3624 3625
  if (remain > 0) {
    return true;
  }
  
3626
  /*
3627
   * While the code reaches here, there are no results returned to client now.
3628 3629 3630 3631 3632 3633 3634 3635
   * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
   * is retrieved from TSDB.
   *
   * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
   * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
   * first result row in the actual result set will fill nothing.
   */
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
3636
    int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
3637
    return numOfTotal > 0;
3638
  }
3639 3640

  return false;
3641 3642 3643
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3644
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3645

3646 3647
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3648

3649 3650 3651
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3652

weixin_48148422's avatar
weixin_48148422 已提交
3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3665 3666
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3667
    if (pQInfo->runtimeEnv.stableQuery) {
3668
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3669 3670 3671
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3672 3673 3674
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3675
    }
H
hjxilinx 已提交
3676
  }
3677 3678
}

H
Haojun Liao 已提交
3679
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfInterpo) {
3680
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3681
  SQuery *pQuery = pRuntimeEnv->pQuery;
3682 3683
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3684
  while (1) {
3685
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3686
    
3687
    // todo apply limit output function
3688 3689
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3690
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3691 3692
      return ret;
    }
3693

3694
    if (pQuery->limit.offset < ret) {
B
Bomin Zhang 已提交
3695
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3696 3697
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3698 3699 3700
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3701 3702 3703
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3704
      }
3705
      
3706 3707 3708
      pQuery->limit.offset = 0;
      return ret;
    } else {
B
Bomin Zhang 已提交
3709 3710
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3711 3712
          pQuery->limit.offset - ret);
      
3713
      pQuery->limit.offset -= ret;
3714
      pQuery->rec.rows = 0;
3715 3716
      ret = 0;
    }
3717 3718

    if (!queryHasRemainResults(pRuntimeEnv)) {
3719 3720 3721 3722 3723
      return ret;
    }
  }
}

3724
static void queryCostStatis(SQInfo *pQInfo) {
3725
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
//  qTrace("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
//  qTrace("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
//  qTrace(
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3744
  
3745 3746 3747
  qTrace("QInfo:%p :cost summary: elpased time:%"PRId64" us, total blocks:%d, use block statis:%d, use block data:%d, "
         "total rows:%"PRId64 ", check rows:%"PRId64, pQInfo, pSummary->elapsedTime, pSummary->totalBlocks,
         pSummary->loadBlockStatis, pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3748 3749 3750 3751 3752 3753 3754 3755

//  qTrace("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
//
//  qTrace("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qTrace("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3756 3757
  
  // todo add the intermediate result save cost!!
3758 3759 3760 3761 3762 3763 3764 3765
//  double computing = total - io;
//
//  qTrace(
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3766 3767
}

3768 3769
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3770 3771
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3772
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3773

3774
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3775
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3776 3777 3778
    pQuery->limit.offset = 0;
    return;
  }
3779

3780 3781 3782 3783 3784
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3785

3786
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3787

3788
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3789
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3790

3791
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3792
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3793 3794

  // update the offset value
H
hjxilinx 已提交
3795
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3796
  pQuery->limit.offset = 0;
3797

H
hjxilinx 已提交
3798
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3799

3800 3801
  qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3802
}
3803

3804 3805 3806 3807 3808
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3809
  }
3810

3811 3812 3813
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3814
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3815
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3816

3817 3818 3819
  while (tsdbNextDataBlock(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      return;
3820
    }
3821

3822
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
3823

3824 3825
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3826 3827
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3828

3829
      qTrace("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3830 3831
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3832 3833 3834
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3835
  }
3836
}
3837

H
Haojun Liao 已提交
3838
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3839
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3840
  *start = pQuery->current->lastKey;
3841

3842
  // if queried with value filter, do NOT forward query start position
3843
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3844
    return true;
3845
  }
3846

3847 3848 3849 3850 3851
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3852
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3853

H
Haojun Liao 已提交
3854
  STimeWindow w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3855
  
3856
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3857
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3858

3859 3860
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle);
3861

H
Haojun Liao 已提交
3862 3863
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3864
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
H
Haojun Liao 已提交
3865 3866 3867
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3868
    } else {
H
Haojun Liao 已提交
3869
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
3870

3871 3872 3873
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3874

3875 3876
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3877

3878 3879 3880 3881 3882 3883
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3884

3885 3886
      STimeWindow tw = win;
      getNextTimeWindow(pQuery, &tw);
3887

3888
      if (pQuery->limit.offset == 0) {
3889 3890
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3891 3892
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3893 3894 3895
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

3896 3897 3898
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
3899 3900 3901 3902
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
3903 3904 3905 3906 3907 3908
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
3909
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
3910 3911
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
3912
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
3913 3914
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
3915 3916 3917
          qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
3918
          return true;
H
Haojun Liao 已提交
3919 3920 3921 3922
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
3923
          return true;
3924 3925 3926
        }
      }

H
Haojun Liao 已提交
3927 3928 3929 3930 3931 3932 3933
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
3946
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
3947 3948
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
3949
      } else {
H
Haojun Liao 已提交
3950
        break;  // offset is not 0, and next time window begins or ends in the next block.
3951 3952 3953
      }
    }
  }
3954

3955 3956 3957
  return true;
}

B
Bomin Zhang 已提交
3958 3959
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3960 3961
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
3962 3963 3964 3965 3966 3967 3968
  if (onlyQueryTags(pQuery)) {
    return;
  }

  if (isSTableQuery && (!isIntervalQuery(pQuery)) && (!isFixedOutputQuery(pQuery))) {
    return;
  }
3969 3970

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
3971 3972 3973 3974
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
3975
  };
weixin_48148422's avatar
weixin_48148422 已提交
3976

B
Bomin Zhang 已提交
3977
  if (!isSTableQuery
3978
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
3979 3980 3981 3982 3983
    && (cond.order == TSDB_ORDER_ASC) 
    && (!isIntervalQuery(pQuery))
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
    && (!isFixedOutputQuery(pQuery))
  ) {
3984 3985 3986
    SArray* pa = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, 0);
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
3987
  }
B
Bomin Zhang 已提交
3988

H
Haojun Liao 已提交
3989
  if (isFirstLastRowQuery(pQuery)) {
3990
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3991
  } else if (isPointInterpoQuery(pQuery)) {
3992
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
3993
  } else {
3994
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
3995
  }
B
Bomin Zhang 已提交
3996 3997
}

3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4011
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4012 4013 4014 4015 4016 4017 4018
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4019
int32_t doInitQInfo(SQInfo *pQInfo, void *param, void *tsdb, int32_t vgId, bool isSTableQuery) {
4020 4021
  int32_t code = TSDB_CODE_SUCCESS;
  
4022 4023 4024
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4025
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
4026 4027 4028

  setScanLimitationByResultBuffer(pQuery);
  changeExecuteScanOrder(pQuery, false);
B
Bomin Zhang 已提交
4029
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4030
  
4031
  pQInfo->tsdb = tsdb;
4032
  pQInfo->vgId = vgId;
4033 4034 4035

  pRuntimeEnv->pQuery = pQuery;
  pRuntimeEnv->pTSBuf = param;
4036
  pRuntimeEnv->cur.vgroupIndex = -1;
4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049
  pRuntimeEnv->stableQuery = isSTableQuery;

  if (param != NULL) {
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4050
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4051 4052 4053

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4054
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

      if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // group by columns not tags;
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
    }

  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    int32_t rows = getInitialPageNum(pQInfo);
4073
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

4088
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4089 4090
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, 0, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4091 4092
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4093
  }
4094

4095 4096 4097
  // todo refactor
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4098

4099
  return TSDB_CODE_SUCCESS;
4100 4101
}

4102
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4103
  SQuery *pQuery = pRuntimeEnv->pQuery;
4104

4105
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4106 4107 4108 4109 4110 4111 4112
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4113
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4114
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4115 4116
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4117
  
H
hjxilinx 已提交
4118
  int64_t st = taosGetTimestampMs();
4119

4120
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
4121
  while (tsdbNextDataBlock(pQueryHandle)) {
4122
    summary->totalBlocks += 1;
4123
    if (isQueryKilled(pQInfo)) {
4124 4125
      break;
    }
4126

4127
    SDataBlockInfo  blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
H
hjxilinx 已提交
4128
    STableQueryInfo *pTableQueryInfo = NULL;
4129

4130
    // todo opt performance using hash table
4131
    size_t numOfGroup = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
4132
    for (int32_t i = 0; i < numOfGroup; ++i) {
4133
      SArray *group = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, i);
4134

4135
      size_t num = taosArrayGetSize(group);
4136
      for (int32_t j = 0; j < num; ++j) {
4137
        STableQueryInfo *p = taosArrayGetP(group, j);
4138

4139 4140 4141 4142
        STableId id = tsdbGetTableId(p->pTable);
        if (id.tid == blockInfo.tid) {
          assert(id.uid == blockInfo.uid);
          pTableQueryInfo = p;
4143

4144 4145
          break;
        }
H
hjxilinx 已提交
4146
      }
4147

H
hjxilinx 已提交
4148 4149 4150
      if (pTableQueryInfo != NULL) {
        break;
      }
H
hjxilinx 已提交
4151
    }
H
hjxilinx 已提交
4152
  
4153
    assert(pTableQueryInfo != NULL);
4154
    setCurrentQueryTable(pRuntimeEnv, pTableQueryInfo);
4155

4156
    SDataStatis *pStatis = NULL;
H
hjxilinx 已提交
4157
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
4158

4159 4160 4161
    if (!isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      if (!isIntervalQuery(pQuery)) {
        int32_t step = QUERY_IS_ASC_QUERY(pQuery)? 1:-1;
4162
        setExecutionContext(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo->groupIndex, blockInfo.window.ekey + step);
4163 4164 4165
      } else {  // interval query
        TSKEY nextKey = blockInfo.window.skey;
        setIntervalQueryRange(pQInfo, nextKey);
4166
        /*int32_t ret = */setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
4167
      }
4168
    }
4169

4170 4171 4172
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
H
Haojun Liao 已提交
4173
    qTrace("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4174
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4175
  }
4176

H
hjxilinx 已提交
4177 4178
  int64_t et = taosGetTimestampMs();
  return et - st;
4179 4180
}

4181 4182
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4183
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4184

4185
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4186 4187
  SArray *group = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, 0);
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4188

4189
  setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
4190

4191
  STableId id = tsdbGetTableId(pCheckInfo->pTable);
S
slguan 已提交
4192
  qTrace("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
4193
         id.uid, id.tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4194

4195
  STsdbQueryCond cond = {
4196
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4197 4198
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4199
      .numOfCols = pQuery->numOfCols,
4200
  };
4201

H
hjxilinx 已提交
4202
  // todo refactor
4203
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
4204
  SArray *tx = taosArrayInit(1, POINTER_BYTES);
4205

4206
  taosArrayPush(tx, &pCheckInfo->pTable);
4207
  taosArrayPush(g1, &tx);
4208
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4209

4210
  // include only current table
4211 4212 4213 4214
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4215

H
Haojun Liao 已提交
4216
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4217 4218
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4219

4220
  if (pRuntimeEnv->pTSBuf != NULL) {
4221
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4222 4223
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4224

4225 4226 4227 4228 4229 4230 4231 4232
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4233

4234
  initCtxOutputBuf(pRuntimeEnv);
4235 4236 4237 4238 4239 4240 4241 4242 4243 4244
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4245
static void sequentialTableProcess(SQInfo *pQInfo) {
4246
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4247
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4248
  setQueryStatus(pQuery, QUERY_COMPLETED);
4249

4250
  size_t numOfGroups = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
4251

H
Haojun Liao 已提交
4252
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4253 4254
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4255

4256
    while (pQInfo->groupIndex < numOfGroups) {
4257
      SArray* group = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, pQInfo->groupIndex);
4258

B
Bomin Zhang 已提交
4259
      qTrace("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4260
             numOfGroups, group);
H
Haojun Liao 已提交
4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4281
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4282
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4283
      } else {
H
Haojun Liao 已提交
4284
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4285
      }
H
Haojun Liao 已提交
4286 4287
      
      initCtxOutputBuf(pRuntimeEnv);
4288
      
4289
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4290 4291
      assert(taosArrayGetSize(s) >= 1);
      
4292
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4293 4294 4295
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4296

dengyihao's avatar
dengyihao 已提交
4297
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4298

H
Haojun Liao 已提交
4299
      // here we simply set the first table as current table
4300
      pQuery->current = (STableQueryInfo*) taosArrayGet(group, 0);
4301
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4314 4315 4316 4317 4318 4319 4320 4321

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // group-by on normal columns query
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4322
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4323

B
Bomin Zhang 已提交
4324
      qTrace("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);

4347
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4348 4349
      assert(taosArrayGetSize(s) >= 1);

4350
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4351 4352 4353 4354 4355 4356 4357 4358

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4359
      taosArrayDestroy(s); 
4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

      qTrace("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
      copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4389 4390 4391
    }
  } else {
    /*
4392
     * 1. super table projection query, 2. ts-comp query
4393 4394 4395
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4396
    if (pQInfo->groupIndex > 0) {
4397
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4398
      pQuery->rec.total += pQuery->rec.rows;
4399

4400
      if (pQuery->rec.rows > 0) {
4401 4402 4403
        return;
      }
    }
4404

4405
    // all data have returned already
4406
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4407 4408
      return;
    }
4409

4410 4411
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4412

4413 4414 4415
    SArray *group = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, 0);
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4416

4417
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
4418
      if (isQueryKilled(pQInfo)) {
4419 4420
        return;
      }
4421

4422
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4423
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4424
        pQInfo->tableIndex++;
4425 4426
        continue;
      }
4427

H
hjxilinx 已提交
4428
      // TODO handle the limit offset problem
4429
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4430
        //        skipBlocks(pRuntimeEnv);
4431 4432
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4433 4434 4435
          continue;
        }
      }
4436

4437
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4438
      skipResults(pRuntimeEnv);
4439

4440
      // the limitation of output result is reached, set the query completed
4441
      if (limitResults(pRuntimeEnv)) {
4442
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4443 4444
        break;
      }
4445

4446 4447
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4448

4449
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4450 4451 4452 4453 4454 4455
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4456
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4457 4458

        STableIdInfo tidInfo;
4459 4460 4461 4462
        STableId id = tsdbGetTableId(pQuery->current->pTable);

        tidInfo.uid = id.uid;
        tidInfo.tid = id.tid;
weixin_48148422's avatar
weixin_48148422 已提交
4463
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4464 4465
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4466
        // if the buffer is full or group by each table, we need to jump out of the loop
4467 4468
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4469 4470
          break;
        }
4471

4472
      } else {
4473
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4474 4475
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4476 4477
          continue;
        } else {
4478 4479 4480
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4481 4482 4483
        }
      }
    }
H
Haojun Liao 已提交
4484

4485
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4486 4487
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4488
  }
4489

4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4502
    finalizeQueryResult(pRuntimeEnv);
4503
  }
4504

4505 4506 4507
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4508 4509

  qTrace(
B
Bomin Zhang 已提交
4510
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4511
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4512
      pQuery->limit.offset);
4513 4514
}

4515 4516 4517 4518
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4519 4520 4521 4522
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4523
  if (pRuntimeEnv->pTSBuf != NULL) {
4524
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4525
  }
4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
  
4539
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4540 4541 4542 4543
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4544 4545
}

4546 4547 4548 4549
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4550
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4551

4552
  if (pRuntimeEnv->pTSBuf != NULL) {
4553
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4554
  }
4555

4556
  switchCtxOrder(pRuntimeEnv);
4557 4558 4559
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4560 4561 4562
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
4563
  if (isIntervalQuery(pQuery)) {
4564
    size_t numOfGroup = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
4565
    for (int32_t i = 0; i < numOfGroup; ++i) {
4566
      SArray *group = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, i);
4567

4568
      size_t num = taosArrayGetSize(group);
4569
      for (int32_t j = 0; j < num; ++j) {
4570 4571
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
4572
      }
H
hjxilinx 已提交
4573 4574 4575 4576 4577 4578 4579
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4580 4581 4582
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4583
  if (pQInfo->groupIndex > 0) {
4584
    /*
4585
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4586 4587
     * copy the data into output buffer
     */
H
hjxilinx 已提交
4588
    if (isIntervalQuery(pQuery)) {
4589 4590
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4591
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4592 4593 4594 4595
#endif
    } else {
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
    }
4596

dengyihao's avatar
dengyihao 已提交
4597
    qTrace("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4598 4599
    return;
  }
4600 4601 4602 4603

  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4604
  // do check all qualified data blocks
H
Haojun Liao 已提交
4605
  int64_t el = scanMultiTableDataBlocks(pQInfo);
B
Bomin Zhang 已提交
4606
  qTrace("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4607

H
hjxilinx 已提交
4608 4609
  // query error occurred or query is killed, abort current execution
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4610
    qTrace("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4611
    return;
4612
  }
4613

H
hjxilinx 已提交
4614 4615
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4616

H
hjxilinx 已提交
4617 4618
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4619

H
Haojun Liao 已提交
4620
    el = scanMultiTableDataBlocks(pQInfo);
B
Bomin Zhang 已提交
4621
    qTrace("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4622

H
hjxilinx 已提交
4623 4624
    doRestoreContext(pQInfo);
  } else {
S
slguan 已提交
4625
    qTrace("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4626
  }
4627

4628
  setQueryStatus(pQuery, QUERY_COMPLETED);
4629

H
hjxilinx 已提交
4630
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
H
Haojun Liao 已提交
4631
    qTrace("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4632 4633
    return;
  }
4634

H
hjxilinx 已提交
4635
  if (isIntervalQuery(pQuery) || isSumAvgRateQuery(pQuery)) {
4636
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4637
      copyResToQueryResultBuf(pQInfo, pQuery);
4638 4639

#ifdef _DEBUG_VIEW
4640
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4641 4642 4643 4644 4645
#endif
    }
  } else {  // not a interval query
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
  }
4646

4647
  // handle the limitation of output buffer
B
Bomin Zhang 已提交
4648
  qTrace("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4649 4650 4651 4652 4653 4654 4655 4656
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4657
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4658
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4659 4660
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4661 4662 4663 4664
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4665 4666
  pQuery->current = pTableInfo;  // set current query table info
  
4667
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4668
  finalizeQueryResult(pRuntimeEnv);
4669

4670
  if (isQueryKilled(pQInfo)) {
4671 4672
    return;
  }
4673

H
Haojun Liao 已提交
4674
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4675
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4676

4677
  skipResults(pRuntimeEnv);
4678
  limitResults(pRuntimeEnv);
4679 4680
}

H
hjxilinx 已提交
4681
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4682
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4683 4684 4685 4686
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4687 4688 4689 4690
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4691

4692 4693 4694 4695 4696 4697
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4698 4699

  while (1) {
4700
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4701
    finalizeQueryResult(pRuntimeEnv);
4702

4703
    if (isQueryKilled(pQInfo)) {
4704 4705 4706
      return;
    }

4707 4708
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4709
      skipResults(pRuntimeEnv);
4710 4711 4712
    }

    /*
H
hjxilinx 已提交
4713 4714
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4715
     */
4716
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4717 4718 4719
      break;
    }

B
Bomin Zhang 已提交
4720 4721
    qTrace("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4722 4723 4724 4725

    resetCtxOutputBuf(pRuntimeEnv);
  }

4726
  limitResults(pRuntimeEnv);
4727
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
hjxilinx 已提交
4728 4729
    qTrace("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4730 4731
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
4732 4733 4734 4735
    STableId id = tsdbGetTableId(pQuery->current);

    tidInfo.uid = id.uid;
    tidInfo.tid = id.tid;
weixin_48148422's avatar
weixin_48148422 已提交
4736 4737
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4738 4739
  }

4740 4741 4742
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4743 4744
}

H
Haojun Liao 已提交
4745
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4746
  SQuery *pQuery = pRuntimeEnv->pQuery;
4747

4748
  while (1) {
4749
    scanOneTableDataBlocks(pRuntimeEnv, start);
4750

4751
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
4752 4753
      return;
    }
4754

4755
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4756
    finalizeQueryResult(pRuntimeEnv);
4757

4758 4759 4760
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4761
        pQuery->fillType == TSDB_FILL_NONE) {
4762 4763
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4764

4765 4766 4767 4768
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4769

4770
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4771 4772 4773 4774 4775
      break;
    }
  }
}

4776
// handle time interval query on table
H
hjxilinx 已提交
4777
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4778 4779
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4780 4781
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4782

H
Haojun Liao 已提交
4783 4784 4785
  int32_t numOfInterpo = 0;
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4786
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4787
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4788
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4789 4790 4791 4792
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4793
  while (1) {
H
Haojun Liao 已提交
4794
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4795

H
hjxilinx 已提交
4796
    if (isIntervalQuery(pQuery)) {
4797
      pQInfo->groupIndex = 0;  // always start from 0
4798
      pQuery->rec.rows = 0;
4799
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4800

4801
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4802
    }
4803

4804
    // the offset is handled at prepare stage if no interpolation involved
4805
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4806
      limitResults(pRuntimeEnv);
4807 4808
      break;
    } else {
H
Haojun Liao 已提交
4809
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4810
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
4811
      numOfInterpo = 0;
4812
      
H
Haojun Liao 已提交
4813
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfInterpo);
4814
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4815
        limitResults(pRuntimeEnv);
4816 4817
        break;
      }
4818

4819
      // no result generated yet, continue retrieve data
4820
      pQuery->rec.rows = 0;
4821 4822
    }
  }
4823

4824 4825
  // all data scanned, the group by normal column can return
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // todo refactor with merge interval time result
4826
    pQInfo->groupIndex = 0;
4827
    pQuery->rec.rows = 0;
4828
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4829
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4830
  }
4831

4832 4833 4834
  pQInfo->pointsInterpo += numOfInterpo;
}

4835 4836 4837 4838
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4839
  if (queryHasRemainResults(pRuntimeEnv)) {
4840 4841 4842 4843 4844
    /*
     * There are remain results that are not returned due to result interpolation
     * So, we do keep in this procedure instead of launching retrieve procedure for next results.
     */
    int32_t numOfInterpo = 0;
H
Haojun Liao 已提交
4845
    pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfInterpo);
4846 4847
  
    if (pQuery->rec.rows > 0) {
4848
      limitResults(pRuntimeEnv);
4849 4850
    }
    
B
Bomin Zhang 已提交
4851
    qTrace("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
4852 4853
    return;
  }
4854

4855
  // here we have scan all qualified data in both data file and cache
H
hjxilinx 已提交
4856
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4857 4858
    // continue to get push data from the group result
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr) ||
4859
        ((isIntervalQuery(pQuery) && pQuery->rec.total < pQuery->limit.limit))) {
4860
      // todo limit the output for interval query?
4861
      pQuery->rec.rows = 0;
4862
      pQInfo->groupIndex = 0;  // always start from 0
4863

4864 4865
      if (pRuntimeEnv->windowResInfo.size > 0) {
        copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4866
        pQuery->rec.rows += pQuery->rec.rows;
4867

4868
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4869

4870
        if (pQuery->rec.rows > 0) {
dengyihao's avatar
dengyihao 已提交
4871
          qTrace("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4872 4873 4874 4875
          return;
        }
      }
    }
4876

dengyihao's avatar
dengyihao 已提交
4877
    qTrace("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
4878 4879
    return;
  }
4880

H
hjxilinx 已提交
4881
  // number of points returned during this query
4882
  pQuery->rec.rows = 0;
4883
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
4884
  
4885 4886 4887
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
  SArray* g = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, 0);
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
4888
  
4889
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
[td-98]  
hjxilinx 已提交
4890
  if (isIntervalQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // interval (down sampling operation)
4891
    tableIntervalProcess(pQInfo, item);
4892
  } else if (isFixedOutputQuery(pQuery)) {
4893
    tableFixedOutputProcess(pQInfo, item);
4894 4895
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
4896
    tableMultiOutputProcess(pQInfo, item);
4897
  }
4898

4899
  // record the total elapsed time
4900
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
4901
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
4902

4903
  /* check if query is killed or not */
4904
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
4905
    qTrace("QInfo:%p query is killed", pQInfo);
H
Haojun Liao 已提交
4906
  } else {
H
hjxilinx 已提交
4907 4908
    qTrace("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
        pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4909 4910 4911
  }
}

4912 4913
static void stableQueryImpl(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4914
  pQuery->rec.rows = 0;
4915

4916
  int64_t st = taosGetTimestampUs();
4917

H
hjxilinx 已提交
4918
  if (isIntervalQuery(pQuery) ||
4919 4920
      (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) &&
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
4921
    multiTableQueryProcess(pQInfo);
4922
  } else {
4923
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
4924
            isFirstLastRowQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr));
4925

4926
    sequentialTableProcess(pQInfo);
4927
  }
4928

H
hjxilinx 已提交
4929
  // record the total elapsed time
4930 4931
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
  
4932
  if (pQuery->rec.rows == 0) {
4933
    qTrace("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
4934
  }
H
hjxilinx 已提交
4935 4936
}

4937
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
4938
  int32_t j = 0;
4939

4940
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
4941 4942 4943 4944
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

4945 4946 4947 4948
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
4949

4950 4951
      j += 1;
    }
4952

4953 4954 4955 4956 4957
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
4958

4959
      j += 1;
4960 4961 4962
    }
  }

4963
  assert(0);
4964 4965
}

4966 4967 4968
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
4969 4970
}

4971
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
4972
  if (pQueryMsg->intervalTime < 0) {
4973
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
4974
    return false;
4975 4976
  }

H
hjxilinx 已提交
4977
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
4978
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
4979
    return false;
4980 4981
  }

H
hjxilinx 已提交
4982
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
4983
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
4984
    return false;
4985 4986
  }

4987 4988
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
4989
    return false;
4990 4991
  }

4992 4993 4994 4995 4996 4997 4998 4999 5000 5001
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5002 5003 5004 5005 5006
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5007
        continue;
5008
      }
5009

5010
      return false;
5011 5012
    }
  }
5013

5014
  return true;
5015 5016
}

5017
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5018
  assert(pQueryMsg->numOfTables > 0);
5019

weixin_48148422's avatar
weixin_48148422 已提交
5020
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5021

weixin_48148422's avatar
weixin_48148422 已提交
5022 5023
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5024

5025
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5026 5027
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5028

H
hjxilinx 已提交
5029 5030 5031
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5032

H
hjxilinx 已提交
5033 5034
  return pMsg;
}
5035

5036
/**
H
hjxilinx 已提交
5037
 * pQueryMsg->head has been converted before this function is called.
5038
 *
H
hjxilinx 已提交
5039
 * @param pQueryMsg
5040 5041 5042 5043
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5044
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5045
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5046 5047 5048 5049 5050 5051 5052 5053
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5054

5055 5056
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5057
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5058
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5059 5060

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5061
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5062
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5063 5064 5065
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5066
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5067
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5068
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5069

5070
  // query msg safety check
5071
  if (!validateQueryMsg(pQueryMsg)) {
5072
    return TSDB_CODE_QRY_INVALID_MSG;
5073 5074
  }

H
hjxilinx 已提交
5075 5076
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5077 5078
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5079
    pColInfo->colId = htons(pColInfo->colId);
5080
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5081 5082
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5083

H
hjxilinx 已提交
5084
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5085

H
hjxilinx 已提交
5086
    int32_t numOfFilters = pColInfo->numOfFilters;
5087
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5088
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5089 5090 5091
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5092 5093 5094 5095
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5096 5097 5098

      pMsg += sizeof(SColumnFilterInfo);

5099 5100
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5101

5102
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5103 5104
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5105
      } else {
5106 5107
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5108 5109
      }

5110 5111
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5112 5113 5114
    }
  }

5115 5116
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5117

5118
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5119
    (*pExpr)[i] = pExprMsg;
5120

5121
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5122 5123 5124 5125
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5126

5127
    pMsg += sizeof(SSqlFuncMsg);
5128 5129

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5130
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5131 5132 5133 5134
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5135
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5136 5137 5138 5139 5140
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5141 5142
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5143
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5144
        return TSDB_CODE_QRY_INVALID_MSG;
5145 5146
      }
    } else {
5147
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5148
//        return TSDB_CODE_QRY_INVALID_MSG;
5149
//      }
5150 5151
    }

5152
    pExprMsg = (SSqlFuncMsg *)pMsg;
5153
  }
5154

5155 5156
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
    tfree(*pExpr);
5157

5158
    return TSDB_CODE_QRY_INVALID_MSG;
5159
  }
5160

H
hjxilinx 已提交
5161
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5162

H
hjxilinx 已提交
5163
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5164 5165 5166 5167
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5168
      pMsg += sizeof((*groupbyCols)[i].colId);
5169 5170

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5171 5172
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5173
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5174 5175 5176 5177 5178
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5179

H
hjxilinx 已提交
5180 5181
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5182 5183
  }

5184 5185
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5186
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5187 5188

    int64_t *v = (int64_t *)pMsg;
5189
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5190 5191
      v[i] = htobe64(v[i]);
    }
5192

5193
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5194
  }
5195

5196 5197 5198 5199
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5200

5201 5202 5203 5204
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5205

5206
      (*tagCols)[i] = *pTagCol;
5207
      pMsg += sizeof(SColumnInfo);
5208
    }
H
hjxilinx 已提交
5209
  }
5210

5211 5212 5213 5214 5215 5216
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5217

weixin_48148422's avatar
weixin_48148422 已提交
5218
  if (*pMsg != 0) {
5219 5220
    size_t len = strlen(pMsg) + 1;
    *tbnameCond = malloc(len);
weixin_48148422's avatar
weixin_48148422 已提交
5221
    strcpy(*tbnameCond, pMsg);
5222
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5223
  }
5224

H
Haojun Liao 已提交
5225 5226 5227
  qTrace("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5228
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5229
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5230 5231 5232 5233

  return 0;
}

H
hjxilinx 已提交
5234
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
B
Bomin Zhang 已提交
5235
  qTrace("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5236 5237 5238 5239 5240 5241 5242 5243 5244

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5245
  if (pExprNode == NULL) {
5246
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5247
    return TSDB_CODE_QRY_APP_ERROR;
5248
  }
5249

5250
  pArithExprInfo->pExpr = pExprNode;
5251 5252 5253
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5254
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5255 5256
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5257
  int32_t code = TSDB_CODE_SUCCESS;
5258

H
Haojun Liao 已提交
5259
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5260
  if (pExprs == NULL) {
5261
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5262 5263 5264 5265 5266
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5267
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5268
    pExprs[i].base = *pExprMsg[i];
5269
    pExprs[i].bytes = 0;
5270 5271 5272 5273

    int16_t type = 0;
    int16_t bytes = 0;

5274
    // parse the arithmetic expression
5275
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5276
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5277

5278 5279 5280
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5281 5282
      }

5283
      type  = TSDB_DATA_TYPE_DOUBLE;
5284
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5285
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5286 5287 5288
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5289
    } else{
5290
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
H
Haojun Liao 已提交
5291 5292 5293 5294 5295 5296 5297
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags || j == TSDB_TBNAME_COLUMN_INDEX);

      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX) {
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5298
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5299

H
Haojun Liao 已提交
5300 5301 5302
        type  = s.type;
        bytes = s.bytes;
      }
5303 5304
    }

5305 5306
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5307
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5308
      tfree(pExprs);
5309
      return TSDB_CODE_QRY_INVALID_MSG;
5310 5311
    }

5312
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5313
      tagLen += pExprs[i].bytes;
5314
    }
5315
    assert(isValidDataType(pExprs[i].type));
5316 5317 5318
  }

  // TODO refactor
5319
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5320 5321
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5322

5323
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5324
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5325 5326 5327 5328 5329
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5330
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5331
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5332 5333 5334 5335
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }

5336
  tfree(pExprMsg);
5337
  *pExprInfo = pExprs;
5338 5339 5340 5341

  return TSDB_CODE_SUCCESS;
}

5342
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5343 5344 5345 5346 5347
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5348
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5349
  if (pGroupbyExpr == NULL) {
5350
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5351 5352 5353 5354 5355 5356 5357
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5358 5359 5360 5361
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5362

5363 5364 5365
  return pGroupbyExpr;
}

5366
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5367
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5368
    if (pQuery->colList[i].numOfFilters > 0) {
5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5380
    if (pQuery->colList[i].numOfFilters > 0) {
5381 5382
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5383
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5384
      pFilterInfo->info = pQuery->colList[i];
5385

5386
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5387 5388 5389 5390
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5391
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5392 5393 5394 5395 5396

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5397
          qError("QInfo:%p invalid filter info", pQInfo);
5398
          return TSDB_CODE_QRY_INVALID_MSG;
5399 5400
        }

5401 5402
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5403

5404 5405 5406
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5407 5408

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5409
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5410
          return TSDB_CODE_QRY_INVALID_MSG;
5411 5412
        }

5413
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5414
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5415
          assert(rangeFilterArray != NULL);
5416
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5430
          assert(filterArray != NULL);
5431 5432 5433 5434
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5435
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5436
              return TSDB_CODE_QRY_INVALID_MSG;
5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5453
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5454
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5455

5456
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5457
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5458
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5459 5460
      continue;
    }
5461

5462
    // todo opt performance
H
Haojun Liao 已提交
5463 5464
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5465 5466
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5467 5468
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5469 5470 5471
          break;
        }
      }
5472 5473
      
      assert (f < pQuery->numOfCols);
5474
    } else {
5475 5476
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5477 5478
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5479 5480
          break;
        }
5481
      }
5482 5483
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5484 5485 5486 5487
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5488

5489
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5490 5491 5492 5493 5494 5495 5496 5497
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5498
                               STableGroupInfo *tableqinfoGroupInfo, SColumnInfo* pTagCols) {
5499 5500
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5501
    return NULL;
5502 5503 5504 5505 5506 5507
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5508
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5509

5510
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5511
  pQuery->numOfOutput     = numOfOutput;
5512 5513 5514
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5515
  pQuery->order.orderColId = pQueryMsg->orderColId;
5516 5517 5518 5519
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5520
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5521
  pQuery->fillType        = pQueryMsg->fillType;
5522
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5523
  
5524
  // todo do not allocate ??
5525
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5526
  if (pQuery->colList == NULL) {
5527
    goto _cleanup;
5528
  }
5529

H
hjxilinx 已提交
5530
  for (int16_t i = 0; i < numOfCols; ++i) {
5531
    pQuery->colList[i] = pQueryMsg->colList[i];
5532
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5533
  }
5534

5535
  pQuery->tagColList = pTagCols;
5536

5537
  // calculate the result row size
5538 5539 5540
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5541
  }
5542

5543
  doUpdateExprColumnIndex(pQuery);
5544

5545
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5546
  if (ret != TSDB_CODE_SUCCESS) {
5547
    goto _cleanup;
5548 5549 5550
  }

  // prepare the result buffer
5551
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5552
  if (pQuery->sdata == NULL) {
5553
    goto _cleanup;
5554 5555
  }

H
hjxilinx 已提交
5556
  // set the output buffer capacity
H
hjxilinx 已提交
5557
  pQuery->rec.capacity = 4096;
5558
  pQuery->rec.threshold = 4000;
5559

5560
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5561
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5562 5563

    // allocate additional memory for interResults that are usually larger then final results
5564 5565
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5566
    if (pQuery->sdata[col] == NULL) {
5567
      goto _cleanup;
5568 5569 5570
    }
  }

5571
  if (pQuery->fillType != TSDB_FILL_NONE) {
5572 5573
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5574
      goto _cleanup;
5575 5576 5577
    }

    // the first column is the timestamp
5578
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5579 5580 5581
  }

  // to make sure third party won't overwrite this structure
5582
  pQInfo->signature = pQInfo;
5583

5584 5585
  pQInfo->tableGroupInfo = *tableqinfoGroupInfo;
  size_t numOfGroups = taosArrayGetSize(tableqinfoGroupInfo->pGroupList);
5586

5587 5588
  pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
  pQInfo->tableqinfoGroupInfo.numOfTables = tableqinfoGroupInfo->numOfTables;
H
hjxilinx 已提交
5589
  
weixin_48148422's avatar
weixin_48148422 已提交
5590 5591
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5592
  taosArraySort(pTableIdList, compareTableIdInfo);
5593

H
hjxilinx 已提交
5594
  for(int32_t i = 0; i < numOfGroups; ++i) {
5595
    SArray* pa = taosArrayGetP(tableqinfoGroupInfo->pGroupList, i);
H
hjxilinx 已提交
5596
    size_t s = taosArrayGetSize(pa);
5597

5598
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
5599

H
hjxilinx 已提交
5600
    for(int32_t j = 0; j < s; ++j) {
5601 5602
      void* pTable = taosArrayGetP(pa, j);

weixin_48148422's avatar
weixin_48148422 已提交
5603
      // NOTE: compare STableIdInfo with STableId
5604 5605
      STableId id = tsdbGetTableId(pTable);
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, &id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5606 5607 5608
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5609
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5610
      }
5611 5612 5613 5614

      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, pTable, window);
      item->groupIndex = i;
      item->tableIndex = tableIndex++;
H
hjxilinx 已提交
5615 5616
      taosArrayPush(p1, &item);
    }
5617

5618
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5619
  }
5620

weixin_48148422's avatar
weixin_48148422 已提交
5621 5622
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5623
  pQuery->pos = -1;
5624
  pQuery->window = pQueryMsg->window;
5625

5626
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
S
slguan 已提交
5627
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, strerror(errno));
5628
    goto _cleanup;
5629
  }
5630

5631
  colIdCheck(pQuery);
5632

S
slguan 已提交
5633
  qTrace("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5634 5635
  return pQInfo;

5636
_cleanup:
5637
  tfree(pQuery->fillVal);
5638 5639

  if (pQuery->sdata != NULL) {
5640
    for (int16_t col = 0; col < pQuery->numOfOutput; ++col) {
5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656
      tfree(pQuery->sdata[col]);
    }
  }

  tfree(pQuery->sdata);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);

  tfree(pExprs);
  tfree(pGroupbyExpr);

  tfree(pQInfo);

  return NULL;
}

H
hjxilinx 已提交
5657
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5658 5659 5660 5661
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5662

H
hjxilinx 已提交
5663 5664 5665 5666
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5667
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5668 5669 5670
  return (sig == (uint64_t)pQInfo);
}

H
hjxilinx 已提交
5671 5672
static void freeQInfo(SQInfo *pQInfo);

5673
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
5674
  int32_t code = TSDB_CODE_SUCCESS;
5675
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5676

H
hjxilinx 已提交
5677 5678
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5679
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5680
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5681

H
hjxilinx 已提交
5682 5683 5684
    tsBufResetPos(pTSBuf);
    tsBufNextPos(pTSBuf);
  }
5685

5686 5687 5688
  // only the successful complete requries the sem_post/over = 1 operations.
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
S
slguan 已提交
5689
    qTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5690
           pQuery->window.ekey, pQuery->order.order);
5691
    setQueryStatus(pQuery, QUERY_COMPLETED);
5692

5693 5694 5695
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5696
  
5697
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
5698 5699 5700 5701 5702 5703
    qTrace("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5704 5705

  // filter the qualified
5706
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5707 5708
    goto _error;
  }
H
hjxilinx 已提交
5709
  
H
hjxilinx 已提交
5710 5711 5712 5713
  return code;

_error:
  // table query ref will be decrease during error handling
5714
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5715 5716 5717 5718 5719 5720 5721
  return code;
}

static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5722 5723

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5724
  setQueryKilled(pQInfo);
5725

S
slguan 已提交
5726
  qTrace("QInfo:%p start to free QInfo", pQInfo);
5727
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5728 5729
    tfree(pQuery->sdata[col]);
  }
5730

H
hjxilinx 已提交
5731
  sem_destroy(&(pQInfo->dataReady));
5732
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5733

H
hjxilinx 已提交
5734 5735 5736 5737 5738 5739
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5740

H
hjxilinx 已提交
5741
  if (pQuery->pSelectExpr != NULL) {
5742
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5743
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5744

H
hjxilinx 已提交
5745 5746 5747
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5748
    }
5749

H
hjxilinx 已提交
5750 5751
    tfree(pQuery->pSelectExpr);
  }
5752

5753 5754
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5755
  }
5756

5757
  // todo refactor, extract method to destroytableDataInfo
5758
  int32_t numOfGroups = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
5759
  for (int32_t i = 0; i < numOfGroups; ++i) {
5760
    SArray *p = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, i);
5761

5762 5763
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
5764 5765 5766
      STableQueryInfo* item = taosArrayGetP(p, j);
      if (item != NULL) {
        destroyTableQueryInfo(item, pQuery->numOfOutput);
5767 5768
      }
    }
5769

H
hjxilinx 已提交
5770 5771
    taosArrayDestroy(p);
  }
5772

5773
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
5774

5775
  tsdbDestoryTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5776
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5777
  
5778 5779 5780 5781
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5782

5783 5784 5785 5786
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);
  tfree(pQuery->sdata);
5787

5788
  tfree(pQuery);
5789

S
slguan 已提交
5790
  qTrace("QInfo:%p QInfo is freed", pQInfo);
5791

5792
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5793 5794 5795 5796
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5797
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5798 5799
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5811
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5812 5813 5814 5815
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5816
  }
H
hjxilinx 已提交
5817
}
5818

H
hjxilinx 已提交
5819 5820 5821
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5822

H
hjxilinx 已提交
5823 5824 5825
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5826

H
hjxilinx 已提交
5827 5828 5829
    // make sure file exist
    if (FD_VALID(fd)) {
      size_t s = lseek(fd, 0, SEEK_END);
S
slguan 已提交
5830
      qTrace("QInfo:%p ts comp data return, file:%s, size:%zu", pQInfo, pQuery->sdata[0]->data, s);
5831

H
hjxilinx 已提交
5832 5833 5834
      lseek(fd, 0, SEEK_SET);
      read(fd, data, s);
      close(fd);
5835

H
hjxilinx 已提交
5836 5837
      unlink(pQuery->sdata[0]->data);
    } else {
H
hjxilinx 已提交
5838
      // todo return the error code to client
S
slguan 已提交
5839
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5840 5841
             pQuery->sdata[0]->data, strerror(errno));
    }
5842

H
hjxilinx 已提交
5843 5844 5845 5846
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5847
  } else {
5848
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5849
  }
5850

5851
  pQuery->rec.total += pQuery->rec.rows;
B
Bomin Zhang 已提交
5852
  qTrace("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5853

5854 5855 5856 5857 5858
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
    qTrace("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
5859
  return TSDB_CODE_SUCCESS;
5860 5861
}

5862
int32_t qCreateQueryInfo(void *tsdb, int32_t vgId, SQueryTableMsg *pQueryMsg, qinfo_t *pQInfo) {
H
hjxilinx 已提交
5863
  assert(pQueryMsg != NULL);
5864 5865

  int32_t code = TSDB_CODE_SUCCESS;
5866

weixin_48148422's avatar
weixin_48148422 已提交
5867
  char *        tagCond = NULL, *tbnameCond = NULL;
5868
  SArray *      pTableIdList = NULL;
5869
  SSqlFuncMsg **pExprMsg = NULL;
5870 5871
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
5872

weixin_48148422's avatar
weixin_48148422 已提交
5873
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
5874
         TSDB_CODE_SUCCESS) {
5875 5876 5877
    return code;
  }

H
hjxilinx 已提交
5878
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5879
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
5880
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
5881
    goto _over;
5882 5883
  }

H
hjxilinx 已提交
5884
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
5885
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
5886
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
5887
    goto _over;
5888 5889
  }

H
hjxilinx 已提交
5890
  SExprInfo *pExprs = NULL;
H
Haojun Liao 已提交
5891
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5892
    goto _over;
5893 5894
  }

5895
  SSqlGroupbyExpr *pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
5896
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5897
    goto _over;
5898
  }
5899

H
hjxilinx 已提交
5900
  bool isSTableQuery = false;
5901
  STableGroupInfo tableqinfoGroupInfo = {0};
5902
  
H
Haojun Liao 已提交
5903
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
5904
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
5905 5906

    qTrace("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
5907
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &tableqinfoGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5908
      goto _over;
5909
    }
H
Haojun Liao 已提交
5910
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
5911
    isSTableQuery = true;
H
Haojun Liao 已提交
5912 5913 5914 5915
    // TODO: need a macro from TSDB to check if table is super table

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
5916 5917 5918 5919 5920 5921 5922 5923
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
5924
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &tableqinfoGroupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
5925
                                          numOfGroupByCols);
5926 5927 5928
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
5929
    } else {
5930 5931
      tableqinfoGroupInfo.pGroupList = taosArrayInit(1, POINTER_BYTES);
      tableqinfoGroupInfo.numOfTables = taosArrayGetSize(pTableIdList);
weixin_48148422's avatar
weixin_48148422 已提交
5932

H
Haojun Liao 已提交
5933
      SArray* p = taosArrayClone(pTableIdList);
5934
      taosArrayPush(tableqinfoGroupInfo.pGroupList, &p);
H
Haojun Liao 已提交
5935

5936
      qTrace("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableqinfoGroupInfo.numOfTables);
5937
    }
H
hjxilinx 已提交
5938
  } else {
5939
    assert(0);
5940
  }
5941

5942
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableqinfoGroupInfo, pTagColumnInfo);
5943
  if ((*pQInfo) == NULL) {
5944
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
5945
    goto _over;
5946
  }
5947

5948
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
5949

H
hjxilinx 已提交
5950
_over:
weixin_48148422's avatar
weixin_48148422 已提交
5951 5952
  tfree(tagCond);
  tfree(tbnameCond);
H
Haojun Liao 已提交
5953
  tfree(pGroupColIndex);
H
hjxilinx 已提交
5954
  taosArrayDestroy(pTableIdList);
5955

H
Haojun Liao 已提交
5956
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
5957 5958
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
H
Haojun Liao 已提交
5959 5960 5961 5962 5963
  } else {
    SQInfo* pq = (SQInfo*) (*pQInfo);

    T_REF_INC(pq);
    T_REF_INC(pq);
5964 5965
  }

5966
  // if failed to add ref for all meters in this query, abort current query
5967
  return code;
H
hjxilinx 已提交
5968 5969
}

H
Haojun Liao 已提交
5970 5971
static void doDestoryQueryInfo(SQInfo* pQInfo) {
  assert(pQInfo != NULL);
S
slguan 已提交
5972
  qTrace("QInfo:%p query completed", pQInfo);
H
Haojun Liao 已提交
5973
  queryCostStatis(pQInfo);   // print the query cost summary
5974 5975 5976
  freeQInfo(pQInfo);
}

5977
void qDestroyQueryInfo(qinfo_t qHandle, void (*fp)(void*), void* param) {
H
Haojun Liao 已提交
5978 5979 5980 5981 5982 5983
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

  int16_t ref = T_REF_DEC(pQInfo);
H
Haojun Liao 已提交
5984 5985
  qTrace("QInfo:%p dec refCount, value:%d", pQInfo, ref);

H
Haojun Liao 已提交
5986 5987
  if (ref == 0) {
    doDestoryQueryInfo(pQInfo);
5988 5989 5990 5991

    if (fp != NULL) {
      fp(param);
    }
H
Haojun Liao 已提交
5992 5993 5994
  }
}

5995
void qTableQuery(qinfo_t qinfo, void (*fp)(void*), void* param) {
5996 5997
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5998
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
5999
    qTrace("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
6000 6001
    return;
  }
6002

H
hjxilinx 已提交
6003
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
6004
    qTrace("QInfo:%p it is already killed, abort", pQInfo);
6005
    qDestroyQueryInfo(pQInfo, fp, param);
H
hjxilinx 已提交
6006 6007
    return;
  }
6008

S
slguan 已提交
6009
  qTrace("QInfo:%p query task is launched", pQInfo);
6010

H
hjxilinx 已提交
6011
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6012
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
6013
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
6014
  } else if (pQInfo->runtimeEnv.stableQuery) {
6015
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6016
  } else {
6017
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6018
  }
6019

H
hjxilinx 已提交
6020
  sem_post(&pQInfo->dataReady);
6021
  qDestroyQueryInfo(pQInfo, fp, param);
H
hjxilinx 已提交
6022 6023
}

H
hjxilinx 已提交
6024
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
6025 6026
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6027
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6028
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6029
  }
6030

H
hjxilinx 已提交
6031
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6032
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
6033
    qTrace("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6034
    return pQInfo->code;
H
hjxilinx 已提交
6035
  }
6036

H
hjxilinx 已提交
6037
  sem_wait(&pQInfo->dataReady);
dengyihao's avatar
dengyihao 已提交
6038
  qTrace("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6039 6040
         pQInfo->code);

H
hjxilinx 已提交
6041
  return pQInfo->code;
H
hjxilinx 已提交
6042
}
6043

H
hjxilinx 已提交
6044
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6045 6046
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6047 6048
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
    qTrace("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6049 6050
    return false;
  }
6051 6052

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6053
  bool ret = false;
H
hjxilinx 已提交
6054
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6055
    ret = false;
H
hjxilinx 已提交
6056
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6057
    ret = true;
H
hjxilinx 已提交
6058
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6059
    ret = true;
H
hjxilinx 已提交
6060 6061
  } else {
    assert(0);
6062
  }
H
Haojun Liao 已提交
6063 6064 6065

  if (ret) {
    T_REF_INC(pQInfo);
H
Haojun Liao 已提交
6066
    qTrace("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6067 6068 6069
  }

  return ret;
6070 6071
}

6072 6073 6074
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6075
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6076
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6077
  }
6078

6079
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6080 6081
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6082 6083
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6084
  *contLen = size + sizeof(SRetrieveTableRsp);
6085

6086 6087
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6088
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6089

6090 6091 6092
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6093
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6094 6095 6096 6097
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6098 6099
  
  (*pRsp)->precision = htons(pQuery->precision);
6100
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6101
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6102
  } else {
H
hjxilinx 已提交
6103
    setQueryStatus(pQuery, QUERY_OVER);
6104
    code = pQInfo->code;
6105
  }
6106

H
hjxilinx 已提交
6107
  if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6108
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6109
  }
6110

H
hjxilinx 已提交
6111
  return code;
6112
}
H
hjxilinx 已提交
6113

6114
int32_t qKillQuery(qinfo_t qinfo, void (*fp)(void*), void* param) {
H
Haojun Liao 已提交
6115 6116 6117 6118 6119 6120 6121
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6122
  qDestroyQueryInfo(pQInfo, fp, param);
H
Haojun Liao 已提交
6123 6124 6125 6126

  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6127 6128 6129
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6130

6131
  size_t numOfGroup = taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
6132
  assert(numOfGroup == 0 || numOfGroup == 1);
6133

H
Haojun Liao 已提交
6134
  if (numOfGroup == 0) {
6135 6136
    return;
  }
H
hjxilinx 已提交
6137
  
6138
  SArray* pa = taosArrayGetP(pQInfo->tableqinfoGroupInfo.pGroupList, 0);
6139

H
Haojun Liao 已提交
6140
  size_t num = taosArrayGetSize(pa);
6141
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6142

H
Haojun Liao 已提交
6143
  int32_t count = 0;
6144 6145 6146
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6147

6148 6149
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6150
    count = 0;
6151

H
Haojun Liao 已提交
6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6163 6164
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6165
      STableQueryInfo *item = taosArrayGet(pa, i);
6166

6167
      char *output = pQuery->sdata[0]->data + i * rsize;
6168
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6169

6170
      output = varDataVal(output);
6171 6172 6173 6174
      STableId id = tsdbGetTableId(item->pTable);

      *(int64_t *)output = id.uid;  // memory align problem, todo serialize
      output += sizeof(id.uid);
6175

6176 6177
      *(int32_t *)output = id.tid;
      output += sizeof(id.tid);
6178

6179
      *(int32_t *)output = pQInfo->vgId;
6180
      output += sizeof(pQInfo->vgId);
6181

6182
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6183
        char *data = tsdbGetTableName(item->pTable);
6184
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6185
      } else {
6186
        char *val = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
6187 6188 6189 6190 6191 6192 6193 6194

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6195
        } else {
6196 6197
          if (val == NULL) {
            setNull(output, type, bytes);
H
Haojun Liao 已提交
6198
          } else {  // todo here stop will cause client crash
6199 6200
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6201 6202
        }
      }
6203

H
Haojun Liao 已提交
6204
      count += 1;
6205
    }
6206

H
Haojun Liao 已提交
6207
    qTrace("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6208

H
Haojun Liao 已提交
6209 6210 6211 6212 6213 6214
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
    qTrace("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6215
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6216
    count = 0;
H
Haojun Liao 已提交
6217
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6218 6219
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6220

6221
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6222
      STableQueryInfo* item = taosArrayGetP(pa, i);
6223

6224 6225
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6226
          char* data = tsdbGetTableName(item->pTable);
H
Haojun Liao 已提交
6227
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6228
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6229 6230 6231 6232
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
6233
          char* data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6234
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6235

H
hjxilinx 已提交
6236
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6237 6238 6239 6240 6241
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6242
          } else {
H
[td-90]  
Haojun Liao 已提交
6243 6244 6245 6246 6247
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6248
          }
6249
        }
H
hjxilinx 已提交
6250
      }
H
Haojun Liao 已提交
6251
      count += 1;
H
hjxilinx 已提交
6252
    }
6253

H
Haojun Liao 已提交
6254
    qTrace("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6255
  }
6256

H
Haojun Liao 已提交
6257
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6258
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6259 6260
}