qExecutor.c 203.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
15
#include "qfill.h"
16 17 18 19
#include "os.h"

#include "hash.h"
#include "hashfunc.h"
20 21
#include "qExecutor.h"
#include "qUtil.h"
H
hjxilinx 已提交
22
#include "qast.h"
23
#include "qresultBuf.h"
H
hjxilinx 已提交
24
#include "query.h"
S
slguan 已提交
25
#include "queryLog.h"
26
#include "taosmsg.h"
27
#include "tdataformat.h"
28
#include "tlosertree.h"
29
#include "tscUtil.h"  // todo move the function to common module
30 31
#include "tscompression.h"
#include "ttime.h"
32 33 34 35 36 37 38 39 40

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

46
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50 51 52

/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
53 54
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
55

56
enum {
H
hjxilinx 已提交
57
  // when query starts to execute, this status will set
58 59
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
60 61
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
62
   */
63 64
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
65 66 67
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
68
   */
69
  QUERY_COMPLETED = 0x4u,
70

H
hjxilinx 已提交
71 72
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
73
   */
74
  QUERY_OVER = 0x8u,
75
};
76 77

enum {
78 79
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
80 81 82
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

83
typedef struct {
84 85 86 87 88 89
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
90 91
} SQueryStatusInfo;

92
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
93
static void setQueryStatus(SQuery *pQuery, int8_t status);
94

H
hjxilinx 已提交
95
static bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; }
96

H
hjxilinx 已提交
97
// todo move to utility
98
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
99

H
hjxilinx 已提交
100
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
101 102 103
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
104

105 106 107
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

108
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
109
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
110 111
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
112
static void buildTagQueryResult(SQInfo *pQInfo);
113

H
hjxilinx 已提交
114
static int32_t setAdditionalInfo(SQInfo *pQInfo, STableId *pTableId, STableQueryInfo *pTableQueryInfo);
115
static int32_t flushFromResultBuf(SQInfo *pQInfo);
116

117
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
118 119
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
120 121 122
    
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
123 124
      return false;
    }
125

126 127
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
128
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
129
      
130 131 132 133 134
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
135

136 137 138 139
    if (!qualified) {
      return false;
    }
  }
140

141 142 143 144 145 146
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
147

148
  int64_t maxOutput = 0;
149
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
150
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
151

152 153 154 155 156 157 158 159
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
160

161 162 163 164 165
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
166

167
  assert(maxOutput >= 0);
168 169 170
  return maxOutput;
}

171 172 173 174 175 176 177 178 179
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
180 181 182 183 184 185 186
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
187 188 189 190
    pResInfo->numOfRes = numOfRes;
  }
}

191 192 193 194 195 196 197 198 199
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
200

201
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
202
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
203 204 205 206 207
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
208
        assert(pColIndex->colIndex > 0);
209
      }
210

211 212 213
      return true;
    }
  }
214

215 216 217 218 219
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
220

221 222
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
223

224
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
225
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
226 227 228 229 230
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
231

232
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
233 234
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
235 236 237
      break;
    }
  }
238

239 240 241 242 243 244
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
245

246
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
247
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
248 249 250 251
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
252

253 254 255 256
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
257

258 259 260
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
261

262 263 264
  return false;
}

265
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
266

267 268 269 270
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
271 272
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
273 274 275 276
    
    qTrace("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%d, total:%"PRId64,
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
277 278 279
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
280

281 282 283 284
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
285
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
286
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
287 288 289
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
290

291 292 293 294
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
295

296 297 298
  return false;
}

H
Haojun Liao 已提交
299
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, int32_t numOfCols, int32_t index) {
300
  // for a tag column, no corresponding field info
H
Haojun Liao 已提交
301 302
  SColIndex *pColIndex = &pQuery->pSelectExpr[index].base.colInfo;
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
303 304
    return NULL;
  }
H
Haojun Liao 已提交
305
  
306 307 308
  /*
   * Choose the right column field info by field id, since the file block may be out of date,
   * which means the newest table schema is not equalled to the schema of this block.
H
Haojun Liao 已提交
309
   * TODO: speedup by using bsearch
310
   */
H
Haojun Liao 已提交
311 312
  for (int32_t i = 0; i < numOfCols; ++i) {
    if (pColIndex->colId == pStatis[i].colId) {
313 314 315
      return &pStatis[i];
    }
  }
H
Haojun Liao 已提交
316
  
317 318 319
  return NULL;
}

320 321 322 323 324 325 326 327
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
328
static bool hasNullValue(SQuery *pQuery, int32_t col, int32_t numOfCols, SDataStatis *pStatis, SDataStatis **pColStatis) {
329
  SColIndex *pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
330
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
331 332
    return false;
  }
333

334 335 336 337
  // query on primary timestamp column, not null value at all
  if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }
338

339
  if (pStatis != NULL) {
H
Haojun Liao 已提交
340
    *pColStatis = getStatisInfo(pQuery, pStatis, numOfCols, col);
H
hjxilinx 已提交
341 342
  } else {
    *pColStatis = NULL;
343
  }
344

345 346 347
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
348

349 350 351 352 353 354
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
                                             int16_t bytes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
355

356 357 358 359 360 361
  int32_t *p1 = (int32_t *)taosHashGet(pWindowResInfo->hashList, pData, bytes);
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
  } else {  // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 2;
362

363 364 365 366 367 368 369
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
      } else {
        // todo
      }
370

371 372 373 374
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        SPosInfo pos = {-1, -1};
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
      }
375

376 377
      pWindowResInfo->capacity = newCap;
    }
378

379 380 381 382
    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
  }
383

384 385 386 387 388 389
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
390

391 392 393 394 395 396 397
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
398

399 400
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
401

402 403 404
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
405

406 407 408 409
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
410

411 412 413
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
414

415 416 417 418 419 420 421
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
422

423
  assert(ts >= w.skey && ts <= w.ekey);
424

425 426 427 428 429 430 431 432
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
433

434
  tFilePage *pData = NULL;
435

436 437 438
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
439

440 441 442 443 444
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
    pData = getResultBufferPageById(pResultBuf, pageId);
445

446
    if (pData->num >= numOfRowsPerPage) {
447 448
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
449
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
450 451 452
      }
    }
  }
453

454 455 456
  if (pData == NULL) {
    return -1;
  }
457

458 459 460
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
461
    pWindowRes->pos.rowId = pData->num++;
462
  }
463

464 465 466 467 468 469 470
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
                                       STimeWindow *win) {
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
471

472 473 474 475
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE);
  if (pWindowRes == NULL) {
    return -1;
  }
476

477 478 479 480 481 482 483
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
484

485 486
  // set time window for current result
  pWindowRes->window = *win;
487

488 489
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
490

491 492 493 494 495 496 497 498
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
499
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
500
                                      int16_t order, int64_t *pData) {
H
Haojun Liao 已提交
501
  int32_t endPos = searchFn((char *)pData, numOfRows, ekey, order);
502
  int32_t forwardStep = 0;
503

504
  if (endPos >= 0) {
505
    forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
506
    assert(forwardStep >= 0);
507

508 509 510 511 512
    // endPos data is equalled to the key so, we do need to read the element in endPos
    if (pData[endPos] == ekey) {
      forwardStep += 1;
    }
  }
513

514 515 516 517 518 519
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
520
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
521 522
  SQuery *pQuery = pRuntimeEnv->pQuery;
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) {
523
    return pWindowResInfo->size;
524
  }
525

526
  // no qualified results exist, abort check
527 528
  int32_t numOfClosed = 0;
  
529
  if (pWindowResInfo->size == 0) {
530
    return pWindowResInfo->size;
531
  }
532

533
  // query completed
H
hjxilinx 已提交
534 535
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
536
    closeAllTimeWindow(pWindowResInfo);
537

538 539 540 541
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
542
    int64_t skey = TSKEY_INITIAL_VAL;
543

544 545 546
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
547
        numOfClosed += 1;
548 549
        continue;
      }
550

551 552 553 554 555 556 557 558
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
559

560
    // all windows are closed, set the last one to be the skey
561
    if (skey == TSKEY_INITIAL_VAL) {
562 563 564 565 566
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
567

568
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
569

570 571 572 573 574
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
      qTrace("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
575
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
576 577 578
    } else {
      qTrace("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
             numOfClosed);
579 580
    }
  }
581 582 583 584 585 586 587
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
588
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
589
  return numOfClosed;
590 591 592
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
593
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
594
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
595

596 597 598
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
599

H
hjxilinx 已提交
600 601
  STableQueryInfo* item = pQuery->current;
  
602 603
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
604
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
605 606 607 608
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
609
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
610 611 612
        }
      }
    } else {
613
      num = pDataBlockInfo->rows - startPos;
614
      if (updateLastKey) {
H
hjxilinx 已提交
615
        item->lastKey = pDataBlockInfo->window.ekey + step;
616 617 618 619
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
620
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
621 622 623 624
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
625
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
626 627 628 629 630
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
631
        item->lastKey = pDataBlockInfo->window.skey + step;
632 633 634
      }
    }
  }
635

636 637 638 639 640
  assert(num >= 0);
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
641
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
642 643
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
644

645
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
646
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
647
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
648

649 650
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
H
Haojun Liao 已提交
651
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
652

653
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
654
        pCtx[k].ptsList = tsBuf;
655
      }
656

H
Haojun Liao 已提交
657 658 659 660 661
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
      
662 663 664 665 666 667 668 669 670 671 672
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
673

674
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
675
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
676
      pCtx[k].nStartQueryTimestamp = pWin->skey;
677

678
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
679 680 681 682 683 684 685 686
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
    }
  }
}

static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
687 688
                                      SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
                                      __block_search_fn_t searchFn) {
689
  SQuery *pQuery = pRuntimeEnv->pQuery;
690

691 692 693 694 695
  while (1) {
    if ((pNextWin->ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
        (pNextWin->skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
      return -1;
    }
696

697
    getNextTimeWindow(pQuery, pNextWin);
698

699 700 701 702 703
    // next time window is not in current block
    if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
        (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
      return -1;
    }
704

705 706 707 708 709 710 711 712 713 714 715 716
    TSKEY startKey = -1;
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      startKey = pNextWin->skey;
      if (startKey < pQuery->window.skey) {
        startKey = pQuery->window.skey;
      }
    } else {
      startKey = pNextWin->ekey;
      if (startKey > pQuery->window.skey) {
        startKey = pQuery->window.skey;
      }
    }
717

718
    int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
719

720 721 722 723 724 725 726 727
    /*
     * This time window does not cover any data, try next time window,
     * this case may happen when the time window is too small
     */
    if ((primaryKeys[startPos] > pNextWin->ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
        (primaryKeys[startPos] < pNextWin->skey && !QUERY_IS_ASC_QUERY(pQuery))) {
      continue;
    }
728

729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
    return startPos;
  }
}

static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
746

747 748 749
  return ekey;
}

H
hjxilinx 已提交
750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
765
                    SArray *pDataBlock) {
766
  char *dataBlock = NULL;
767 768 769
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
770

771
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
772
  if (functionId == TSDB_FUNC_ARITHM) {
773
    sas->pArithExpr = &pQuery->pSelectExpr[col];
774

775 776 777 778 779 780
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
781 782 783 784 785 786 787
  
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
    
    // here the pQuery->colList and sas->colList are identical
788
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
789
      SColumnInfo *pColMsg = &pQuery->colList[i];
790 791 792 793 794 795 796 797 798 799 800 801 802
  
      int32_t numOfCols = taosArrayGetSize(pDataBlock);
      
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
      
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
803
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
804
    }
805

806
  } else {  // other type of query function
807
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
808
    if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
809 810
      dataBlock = NULL;
    } else {
H
hjxilinx 已提交
811
      dataBlock = getDataBlockImpl(pDataBlock, pCol->colId);
812 813
    }
  }
814

815 816 817 818 819 820 821
  return dataBlock;
}

/**
 *
 * @param pRuntimeEnv
 * @param forwardStep
822
 * @param tsCols
823 824 825 826 827
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
828
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
829 830
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
831
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
832 833 834
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
835
  if (pDataBlock != NULL) {
836
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
837
    tsCols = (TSKEY *)(pColInfo->pData);
838
  }
839

840
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
841
  
842
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
843
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
844
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
845
  }
846

847 848 849
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  if (isIntervalQuery(pQuery)) {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
850
    TSKEY   ts = tsCols[offset];
851

852
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
hjxilinx 已提交
853
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
854
      return;
855
    }
856

857 858
    TSKEY   ekey = reviseWindowEkey(pQuery, &win);
    int32_t forwardStep =
859
        getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
860

861
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
862
    doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, tsCols, pDataBlockInfo->rows);
863

864 865
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
866

867
    while (1) {
868
      int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn);
869 870 871
      if (startPos < 0) {
        break;
      }
872

873
      // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
874
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
875 876
        break;
      }
877

878
      ekey = reviseWindowEkey(pQuery, &nextWin);
879
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
880

881
      pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
882
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
883
    }
884

885 886 887 888 889 890 891
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
892
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
893
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
894 895 896 897 898
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
899

900 901 902 903 904 905 906 907
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
    
    tfree(sasArray[i].data);
  }
  
908 909 910 911 912 913 914
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
915

916
  int32_t GROUPRESULTID = 1;
917

918
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
919

920 921 922 923
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes);
  if (pWindowRes == NULL) {
    return -1;
  }
924

925 926 927 928 929 930 931
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
932

933 934 935 936 937
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

938
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
939
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
940

941
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
942 943
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
944 945
      continue;
    }
946

947
    int16_t colIndex = -1;
948
    int32_t colId = pColIndex->colId;
949

950
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
951
      if (pQuery->colList[i].colId == colId) {
952 953 954 955
        colIndex = i;
        break;
      }
    }
956

957
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
958

959 960
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
961 962 963 964 965 966 967 968 969 970 971 972 973
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
974
  }
975 976
  
  return NULL;
977 978 979 980
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
981

982 983
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
984

985 986 987 988
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
989

990 991 992
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
993 994
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
995 996
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
997

998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1011

1012 1013 1014 1015 1016
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1017 1018
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
1019 1020 1021
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1022

H
hjxilinx 已提交
1023 1024 1025 1026 1027 1028
  if (functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST) {
    return !QUERY_IS_ASC_QUERY(pQuery);
  } else if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
    return QUERY_IS_ASC_QUERY(pQuery);
  }
  
1029
  // in the supplementary scan, only the following functions need to be executed
H
hjxilinx 已提交
1030
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {// && (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS)) {
1031 1032
    return false;
  }
1033

1034 1035 1036
  return true;
}

1037 1038
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1039
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1040 1041
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1042 1043
  STableQueryInfo* item = pQuery->current;
  
1044
  TSKEY  *tsCols = (TSKEY*) ((SColumnInfoData *)taosArrayGet(pDataBlock, 0))->pData;
H
hjxilinx 已提交
1045
  bool    groupbyStateValue = isGroupbyNormalCol(pQuery->pGroupbyExpr);
1046
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
1047

1048 1049
  int16_t type = 0;
  int16_t bytes = 0;
1050

1051 1052
  char *groupbyColumnData = NULL;
  if (groupbyStateValue) {
1053
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1054
  }
1055

1056
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1057
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1058
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1059
  }
1060

1061 1062
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1063
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1064 1065
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1066
  }
1067

1068
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1069

1070 1071 1072 1073
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery);
1074
    qTrace("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1075 1076
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1077

1078
  int32_t j = 0;
H
hjxilinx 已提交
1079 1080
  int32_t offset = -1;
  
1081
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1082
    offset = GET_COL_DATA_POS(pQuery, j, step);
1083

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1094

1095
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1096 1097
      continue;
    }
1098

1099 1100 1101
    // interval window query
    if (isIntervalQuery(pQuery)) {
      // decide the time window according to the primary timestamp
1102
      int64_t     ts = tsCols[offset];
1103
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1104

H
hjxilinx 已提交
1105
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win);
1106 1107 1108
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1109

1110 1111
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1112

1113 1114
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1115

1116 1117
      while (1) {
        getNextTimeWindow(pQuery, &nextWin);
H
Haojun Liao 已提交
1118
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1119
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1120
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1121 1122
          break;
        }
1123

1124 1125 1126
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1127

1128
        // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
1129
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
1130 1131
          break;
        }
1132

1133 1134 1135
        pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
        doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
      }
1136

1137 1138 1139 1140
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
      if (groupbyStateValue) {
H
hjxilinx 已提交
1141
        char *val = groupbyColumnData + bytes * offset;
1142

H
hjxilinx 已提交
1143
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1144 1145 1146 1147
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1148

1149
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1150
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1151 1152 1153 1154 1155
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1156

1157 1158 1159
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1160
        setQueryStatus(pQuery, QUERY_COMPLETED);
1161 1162 1163 1164
        break;
      }
    }
  }
1165
  
1166
  item->lastKey = tsCols[offset] + step;
1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
  
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
    
    tfree(sasArray[i].data);
  }
  
1177 1178 1179 1180
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1181
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1182
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1183 1184 1185
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1186
  
1187
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1188
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1189
  } else {
1190
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1191
  }
1192 1193

  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1194
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1195 1196


1197
  // interval query with limit applied
1198 1199 1200 1201 1202 1203
  int32_t numOfRes = 0;
  
  if (isIntervalQuery(pQuery)) {
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1204

1205 1206 1207 1208
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1209

1210 1211 1212
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1213

1214 1215 1216
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1217
    }
1218
  }
1219

1220
  return numOfRes;
1221 1222
}

H
Haojun Liao 已提交
1223
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1224 1225 1226 1227 1228 1229 1230
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
  pCtx->hasNull = hasNullValue(pQuery, colIndex, pBlockInfo->numOfCols, pStatis, &tpField);
1231
  pCtx->aInputElemBuf = inputData;
1232

1233
  if (tpField != NULL) {
H
Haojun Liao 已提交
1234
    pCtx->preAggVals.isSet  = true;
1235 1236
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1237 1238 1239
  } else {
    pCtx->preAggVals.isSet = false;
  }
1240

H
Haojun Liao 已提交
1241 1242 1243
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1244

1245 1246
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1247
    pCtx->ptsList = tsCol;
1248
  }
1249

1250 1251 1252 1253 1254
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1255
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1256
    /*
H
Haojun Liao 已提交
1257
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1258 1259 1260 1261 1262 1263 1264 1265 1266 1267
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1268

1269 1270
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1271 1272 1273 1274 1275 1276
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1290
  }
1291

1292 1293 1294 1295 1296 1297
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
S
slguan 已提交
1298
//        qTrace("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1299 1300 1301
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
S
slguan 已提交
1302
//        qTrace("QInfo:%p block not loaded, bstatus:%d",
1303 1304 1305 1306 1307 1308 1309 1310
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) {
  if (isSelectivityWithTagsQuery(pQuery)) {
1311
    int32_t num = 0;
1312
    int16_t tagLen = 0;
1313 1314
    
    SQLFunctionCtx *p = NULL;
1315
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
1316
    
1317
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1318
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1319
      
1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
1333

1334 1335 1336 1337 1338 1339 1340
    p->tagInfo.pTagCtxList = pTagCtx;
    p->tagInfo.numOfTagCols = num;
    p->tagInfo.tagsLen = tagLen;
  }
}

static void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
1341
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1342 1343
    assert(pQuery->pSelectExpr[i].interBytes <= DEFAULT_INTERN_BUF_PAGE_SIZE);
    
1344
    setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interBytes, isStableQuery);
1345 1346 1347
  }
}

1348
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
S
slguan 已提交
1349
  qTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1350 1351
  SQuery *pQuery = pRuntimeEnv->pQuery;

1352 1353
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1354

1355
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1356
    goto _clean;
1357
  }
1358

1359
  pRuntimeEnv->offset[0] = 0;
1360
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1361
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1362

1363
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1364 1365 1366 1367
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
    
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1368
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
hjxilinx 已提交
1369
        pCtx->inputBytes = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE;
1370 1371 1372 1373 1374
        pCtx->inputType = TSDB_DATA_TYPE_BINARY;
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1375
      
1376 1377 1378 1379
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1380 1381
  
    assert(isValidDataType(pCtx->inputType, pCtx->inputBytes));
1382
    pCtx->ptsOutputBuf = NULL;
1383

1384 1385
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1386

1387 1388
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1389

1390 1391 1392 1393 1394 1395 1396 1397 1398 1399
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1400

1401 1402
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1403

1404
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1405
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1406
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1407

1408 1409 1410 1411
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1412

1413 1414
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1415

1416 1417 1418 1419
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1420

1421
  // set the intermediate result output buffer
1422
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
1423

1424
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1425
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1426 1427
    resetCtxOutputBuf(pRuntimeEnv);
  }
1428

1429 1430
  setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx);
  return TSDB_CODE_SUCCESS;
1431

1432
_clean:
1433 1434
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1435

1436 1437 1438 1439 1440 1441 1442
  return TSDB_CODE_SERV_OUT_OF_MEMORY;
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1443

1444
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1445 1446 1447
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
  
  qTrace("QInfo:%p teardown runtime env", pQInfo);
1448
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1449

1450
  if (pRuntimeEnv->pCtx != NULL) {
1451
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1452
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1453

1454 1455 1456
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1457

1458 1459 1460 1461
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
      tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
    }
1462

1463 1464 1465
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1466

1467
  taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1468

H
hjxilinx 已提交
1469
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1470
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1471
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1472

1473 1474 1475
  pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
}

1476 1477
static bool isQueryKilled(SQInfo *pQInfo) {
  return (pQInfo->code == TSDB_CODE_QUERY_CANCELLED);
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492
#if 0
  /*
   * check if the queried meter is going to be deleted.
   * if it will be deleted soon, stop current query ASAP.
   */
  SMeterObj *pMeterObj = pQInfo->pObj;
  if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DROPPING)) {
    pQInfo->killed = 1;
    return true;
  }
  
  return (pQInfo->killed == 1);
#endif
}

1493
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_QUERY_CANCELLED; }
H
hjxilinx 已提交
1494

H
hjxilinx 已提交
1495
static bool isFixedOutputQuery(SQuery *pQuery) {
1496 1497 1498
  if (pQuery->intervalTime != 0) {
    return false;
  }
1499

1500 1501 1502 1503
  // Note:top/bottom query is fixed output query
  if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    return true;
  }
1504

1505
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1506
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1507

1508 1509
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1510
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1511 1512
      continue;
    }
1513

1514 1515 1516
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1517

1518 1519 1520 1521
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1522

1523 1524 1525
  return false;
}

1526
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1527
static bool isPointInterpoQuery(SQuery *pQuery) {
1528
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1529
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1530
    if (functionID == TSDB_FUNC_INTERP) {
1531 1532 1533
      return true;
    }
  }
1534

1535 1536 1537 1538
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1539
static bool isSumAvgRateQuery(SQuery *pQuery) {
1540
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1541
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1542 1543 1544
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1545

1546 1547 1548 1549 1550
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1551

1552 1553 1554
  return false;
}

H
hjxilinx 已提交
1555
static bool isFirstLastRowQuery(SQuery *pQuery) {
1556
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1557
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1558 1559 1560 1561
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1562

1563 1564 1565
  return false;
}

H
hjxilinx 已提交
1566
static UNUSED_FUNC bool notHasQueryTimeRange(SQuery *pQuery) {
1567
  return (pQuery->window.skey == 0 && pQuery->window.ekey == INT64_MAX && QUERY_IS_ASC_QUERY(pQuery)) ||
1568
         (pQuery->window.skey == INT64_MAX && pQuery->window.ekey == 0 && (!QUERY_IS_ASC_QUERY(pQuery)));
1569 1570
}

H
hjxilinx 已提交
1571
static bool needReverseScan(SQuery *pQuery) {
1572
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1573
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1574 1575 1576
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1577

1578 1579 1580 1581 1582
    if (((functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) && QUERY_IS_ASC_QUERY(pQuery)) ||
        ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery))) {
      return true;
    }
  }
1583

1584 1585
  return false;
}
H
hjxilinx 已提交
1586 1587 1588 1589

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1590
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG) {
H
hjxilinx 已提交
1591 1592 1593 1594 1595 1596 1597
      return false;
    }
  }
  
  return true;
}

1598 1599
/////////////////////////////////////////////////////////////////////////////////////////////

1600 1601
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, int64_t *realSkey,
                             int64_t *realEkey, STimeWindow *win) {
1602
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
1603

1604
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision);
1605

1606 1607 1608 1609 1610 1611
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    /*
     * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
     * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
     */
    assert(keyLast - keyFirst < pQuery->intervalTime);
1612

1613 1614
    *realSkey = keyFirst;
    *realEkey = keyLast;
1615

1616 1617 1618
    win->ekey = INT64_MAX;
    return;
  }
1619

1620
  win->ekey = win->skey + pQuery->intervalTime - 1;
1621

1622 1623 1624 1625 1626
  if (win->skey < keyFirst) {
    *realSkey = keyFirst;
  } else {
    *realSkey = win->skey;
  }
1627

1628 1629 1630 1631 1632 1633 1634 1635 1636
  if (win->ekey < keyLast) {
    *realEkey = win->ekey;
  } else {
    *realEkey = keyLast;
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1637
    pQuery->checkBuffer = 0;
1638
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1639
    pQuery->checkBuffer = 0;
1640 1641
  } else {
    bool hasMultioutput = false;
1642
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1643
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1644 1645 1646
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1647

1648 1649 1650 1651 1652
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1653

1654
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1655 1656 1657 1658 1659 1660
  }
}

/*
 * todo add more parameters to check soon..
 */
1661
bool colIdCheck(SQuery *pQuery) {
1662 1663
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1664
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1665
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1666 1667 1668
      return false;
    }
  }
1669
  
1670 1671 1672 1673 1674 1675
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1676
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1677
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1678

1679 1680 1681 1682
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1683

1684 1685 1686 1687
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1688

1689 1690 1691 1692 1693 1694 1695
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

1696
static void changeExecuteScanOrder(SQuery *pQuery, bool stableQuery) {
1697 1698 1699
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1700

1701 1702 1703
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
S
slguan 已提交
1704
    qTrace("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1705
           pQuery->order.order, TSDB_ORDER_DESC);
1706

1707
    pQuery->order.order = TSDB_ORDER_DESC;
1708

1709 1710
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1711

1712 1713
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1714

1715 1716
    return;
  }
1717

1718 1719
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1720
      qTrace(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1721
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1722 1723
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1724

1725
    pQuery->order.order = TSDB_ORDER_ASC;
1726 1727
    return;
  }
1728

1729 1730 1731
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1732
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1733 1734
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1735 1736
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1737

1738
      pQuery->order.order = TSDB_ORDER_ASC;
1739 1740
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1741
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1742 1743
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1744 1745
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1746

1747
      pQuery->order.order = TSDB_ORDER_DESC;
1748
    }
1749

1750
  } else {  // interval query
1751
    if (stableQuery) {
1752 1753
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1754
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1755 1756
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1757 1758
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1759

1760
        pQuery->order.order = TSDB_ORDER_ASC;
1761 1762
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1763
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1764 1765
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1766 1767
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1768

1769
        pQuery->order.order = TSDB_ORDER_DESC;
1770 1771 1772 1773 1774
      }
    }
  }
}

H
hjxilinx 已提交
1775
static UNUSED_FUNC void doSetInterpVal(SQLFunctionCtx *pCtx, TSKEY ts, int16_t type, int32_t index, char *data) {
1776
  assert(pCtx->param[index].pz == NULL);
1777

1778 1779
  int32_t len = 0;
  size_t  t = 0;
1780

1781 1782
  if (type == TSDB_DATA_TYPE_BINARY) {
    t = strlen(data);
1783

1784 1785 1786 1787
    len = t + 1 + TSDB_KEYSIZE;
    pCtx->param[index].pz = calloc(1, len);
  } else if (type == TSDB_DATA_TYPE_NCHAR) {
    t = wcslen((const wchar_t *)data);
1788

1789 1790 1791 1792 1793 1794
    len = (t + 1) * TSDB_NCHAR_SIZE + TSDB_KEYSIZE;
    pCtx->param[index].pz = calloc(1, len);
  } else {
    len = TSDB_KEYSIZE * 2;
    pCtx->param[index].pz = malloc(len);
  }
1795

1796
  pCtx->param[index].nType = TSDB_DATA_TYPE_BINARY;
1797

1798 1799 1800
  char *z = pCtx->param[index].pz;
  *(TSKEY *)z = ts;
  z += TSDB_KEYSIZE;
1801

1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
  switch (type) {
    case TSDB_DATA_TYPE_FLOAT:
      *(double *)z = GET_FLOAT_VAL(data);
      break;
    case TSDB_DATA_TYPE_DOUBLE:
      *(double *)z = GET_DOUBLE_VAL(data);
      break;
    case TSDB_DATA_TYPE_INT:
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_BIGINT:
    case TSDB_DATA_TYPE_TINYINT:
    case TSDB_DATA_TYPE_SMALLINT:
    case TSDB_DATA_TYPE_TIMESTAMP:
      *(int64_t *)z = GET_INT64_VAL(data);
      break;
    case TSDB_DATA_TYPE_BINARY:
      strncpy(z, data, t);
      break;
    case TSDB_DATA_TYPE_NCHAR: {
      wcsncpy((wchar_t *)z, (const wchar_t *)data, t);
    } break;
    default:
      assert(0);
  }
1826

1827 1828 1829 1830 1831 1832
  pCtx->param[index].nLen = len;
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1833

1834
  int32_t num = 0;
1835

1836 1837 1838
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
  } else if (isIntervalQuery(pQuery)) {  // time window query, allocate one page for each table
1839
    size_t s = pQInfo->groupInfo.numOfTables;
1840
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1841 1842
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1843
  }
1844

1845 1846 1847 1848 1849 1850
  assert(num > 0);
  return num;
}

static int32_t getRowParamForMultiRowsOutput(SQuery *pQuery, bool isSTableQuery) {
  int32_t rowparam = 1;
1851

1852
  if (isTopBottomQuery(pQuery) && (!isSTableQuery)) {
1853
    rowparam = pQuery->pSelectExpr[1].base.arg->argValue.i64;
1854
  }
1855

1856 1857 1858 1859 1860
  return rowparam;
}

static int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * getRowParamForMultiRowsOutput(pQuery, isSTableQuery);
1861
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1862 1863 1864 1865
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1866

1867 1868
  SQuery *   pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = getResultBufferPageById(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
1869

1870 1871
  int32_t numOfRows = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->stableQuery);
  int32_t realRowId = pResult->pos.rowId * getRowParamForMultiRowsOutput(pQuery, pRuntimeEnv->stableQuery);
1872

1873
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * numOfRows +
1874
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1875 1876 1877 1878 1879 1880
}

/**
 * decrease the refcount for each table involved in this query
 * @param pQInfo
 */
1881
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
1882
  if (pQInfo != NULL) {
1883
    //    assert(taosHashGetSize(pQInfo->groupInfo) >= 1);
1884 1885 1886
  }

#if 0
1887
  if (pQInfo == NULL || pQInfo->groupInfo.numOfTables == 1) {
1888
    atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
S
slguan 已提交
1889
    qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
1890 1891 1892
           pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
  } else {
    int32_t num = 0;
1893 1894
    for (int32_t i = 0; i < pQInfo->groupInfo.numOfTables; ++i) {
      SMeterObj *pMeter = getMeterObj(pQInfo->groupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
1895 1896 1897
      atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
      
      if (pMeter->numOfQueries > 0) {
S
slguan 已提交
1898
        qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
1899 1900 1901 1902 1903 1904 1905 1906 1907
               pMeter->meterId, pMeter->numOfQueries);
        num++;
      }
    }
    
    /*
     * in order to reduce log output, for all meters of which numOfQueries count are 0,
     * we do not output corresponding information
     */
1908
    num = pQInfo->groupInfo.numOfTables - num;
S
slguan 已提交
1909
    qTrace("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
1910
           pQInfo->groupInfo.numOfTables, num);
1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923
  }
#endif
}

static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
                                int32_t numOfTotalPoints) {
  if (pDataStatis == NULL) {
    return true;
  }

#if 0
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1924
    int32_t                  colIndex = pFilterInfo->info.colIndex;
1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960
    
    // this column not valid in current data block
    if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
      continue;
    }
    
    // not support pre-filter operation on binary/nchar data type
    if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
      continue;
    }
    
    // all points in current column are NULL, no need to check its boundary value
    if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
      continue;
    }
    
    if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataStatis[colIndex].min);
      float maxval = *(double *)(&pDataStatis[colIndex].max);
      
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
                                        (char *)&pDataStatis[colIndex].max)) {
          return true;
        }
      }
    }
  }
  
  // todo disable this opt code block temporarily
1961
  //  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1962
  //    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
1963 1964 1965 1966
  //    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
  //      return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
  //    }
  //  }
1967

1968 1969 1970 1971 1972 1973 1974
#endif
  return true;
}

// previous time window may not be of the same size of pQuery->intervalTime
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1975

1976 1977 1978 1979
  pTimeWindow->skey += (pQuery->slidingTime * factor);
  pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
}

H
hjxilinx 已提交
1980
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis) {
1981
  SQuery *pQuery = pRuntimeEnv->pQuery;
1982 1983 1984 1985

  uint32_t r = 0;
  SArray * pDataBlock = NULL;

1986 1987 1988
  if (pQuery->numOfFilterCols > 0) {
    r = BLK_DATA_ALL_NEEDED;
  } else {
1989
    // check if this data block is required to load
1990
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1991 1992 1993 1994
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
      
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
1995
      r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
1996
    }
1997

1998 1999 2000 2001
    if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) {
      r |= BLK_DATA_ALL_NEEDED;
    }
  }
2002

2003
  if (r == BLK_DATA_NO_NEEDED) {
2004
    qTrace("QInfo:%p data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
2005
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2006 2007
    pRuntimeEnv->summary.discardBlocks += 1;
  } else if (r == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2008
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2009
      //        return DISK_DATA_LOAD_FAILED;
2010
    }
2011 2012 2013 2014
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
hjxilinx 已提交
2015
      pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2016
      pRuntimeEnv->summary.checkRows += pBlockInfo->rows;
2017 2018 2019
    }
  } else {
    assert(r == BLK_DATA_ALL_NEEDED);
2020 2021 2022
  
    // load the data block statistics to perform further filter
    pRuntimeEnv->summary.loadBlockStatis +=1;
H
hjxilinx 已提交
2023
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2024
    }
2025 2026
    
    if (!needToLoadDataBlock(pQuery,*pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2027
#if defined(_DEBUG_VIEW)
2028
      qTrace("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2029
#endif
2030 2031
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
2032 2033
      //        return DISK_DATA_DISCARDED;
    }
2034 2035
  
    pRuntimeEnv->summary.checkRows += pBlockInfo->rows;
H
hjxilinx 已提交
2036
    pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2037
  }
2038

2039 2040 2041
  return pDataBlock;
}

H
hjxilinx 已提交
2042
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2043
  int32_t midPos = -1;
H
Haojun Liao 已提交
2044
  int32_t numOfRows;
2045

2046 2047 2048
  if (num <= 0) {
    return -1;
  }
2049

2050
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2051 2052

  TSKEY * keyList = (TSKEY *)pValue;
2053
  int32_t firstPos = 0;
2054
  int32_t lastPos = num - 1;
2055

2056
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2057 2058 2059 2060 2061
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2062

H
Haojun Liao 已提交
2063 2064
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2065

H
hjxilinx 已提交
2066 2067 2068 2069 2070 2071 2072 2073
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2074

H
hjxilinx 已提交
2075 2076 2077 2078 2079
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2080

H
hjxilinx 已提交
2081 2082 2083 2084 2085 2086 2087
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2088

H
Haojun Liao 已提交
2089 2090
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2091

H
hjxilinx 已提交
2092 2093 2094 2095 2096 2097 2098 2099 2100
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2101

H
hjxilinx 已提交
2102 2103 2104
  return midPos;
}

2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (!isIntervalQuery(pQuery) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isFixedOutputQuery(pQuery)) {
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
        
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
      qTrace("QInfo:%p realloc output buffer, new size: %d rows, old:%d, remain:%d", GET_QINFO_ADDR(pRuntimeEnv),
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2142 2143
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2144
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
hjxilinx 已提交
2145
  
S
slguan 已提交
2146
  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2147 2148
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2149

2150
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
2151
  while (tsdbNextDataBlock(pQueryHandle)) {
2152
    pRuntimeEnv->summary.dataBlocks += 1;
2153
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
2154
      return 0;
2155
    }
2156

2157
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
2158

2159
    // todo extract methods
H
Haojun Liao 已提交
2160
    if (isIntervalQuery(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
2161
      TSKEY           skey1, ekey1;
H
hjLiao 已提交
2162
      STimeWindow     w = TSWINDOW_INITIALIZER;
2163 2164
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2165
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2166 2167
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &skey1,
                                &ekey1, &w);
2168 2169 2170 2171
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
2172 2173
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &skey1,
                                &ekey1, &w);
2174

H
hjxilinx 已提交
2175
        pWindowResInfo->startTime = pQuery->window.skey;
2176 2177
        pWindowResInfo->prevSKey = w.skey;
      }
2178 2179 2180 2181
      
      if (pRuntimeEnv->pFillInfo != NULL) {
        pRuntimeEnv->pFillInfo->start = w.skey;
      }
2182
    }
2183

H
hjxilinx 已提交
2184
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2185
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2186

2187
    SDataStatis *pStatis = NULL;
2188
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
Haojun Liao 已提交
2189 2190
    
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
H
hjxilinx 已提交
2191
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2192

2193
    pRuntimeEnv->summary.dataInRows += blockInfo.rows;
2194 2195
    qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2196

2197 2198
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2199
      break;
2200 2201
    }
  }
2202

H
hjxilinx 已提交
2203
  // if the result buffer is not full, set the query complete
2204 2205 2206
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2207

2208
  if (isIntervalQuery(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2209
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2210 2211
      int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;

2212
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2213
      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2214
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2215 2216 2217 2218
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2219

2220
  return 0;
2221 2222 2223 2224 2225 2226
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
H
hjxilinx 已提交
2227
static void doSetTagValueInParam(void *tsdb, STableId* pTableId, int32_t tagColId, tVariant *param) {
2228
  tVariantDestroy(param);
2229

2230
  char *  val = NULL;
2231
  int16_t bytes = 0;
2232
  int16_t type = 0;
2233

2234
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
H
hjxilinx 已提交
2235
    val = tsdbGetTableName(tsdb, pTableId, &bytes);
2236
    type = TSDB_DATA_TYPE_BINARY;
H
hjxilinx 已提交
2237
    tVariantCreateFromBinary(param, varDataVal(val), varDataLen(val), type);
2238
  } else {
H
hjxilinx 已提交
2239
    tsdbGetTableTagVal(tsdb, pTableId, tagColId, &type, &bytes, &val);
H
hjxilinx 已提交
2240 2241 2242 2243 2244 2245
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
      tVariantCreateFromBinary(param, varDataVal(val), varDataLen(val), type);
    } else {
      tVariantCreateFromBinary(param, val, bytes, type);
    }
2246
  }
2247 2248
}

H
hjxilinx 已提交
2249
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, STableId* pTableId, void *tsdb) {
2250
  SQuery *pQuery = pRuntimeEnv->pQuery;
2251

2252
  SSqlFuncMsg *pFuncMsg = &pQuery->pSelectExpr[0].base;
2253
  if (pQuery->numOfOutput == 1 && pFuncMsg->functionId == TSDB_FUNC_TS_COMP) {
2254
    assert(pFuncMsg->numOfParams == 1);
H
hjxilinx 已提交
2255
    doSetTagValueInParam(tsdb, pTableId, pFuncMsg->arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag);
2256 2257
  } else {
    // set tag value, by which the results are aggregated.
2258
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
2259
      SColIndex *pCol = &pQuery->pSelectExpr[idx].base.colInfo;
2260

2261
      // ts_comp column required the tag value for join filter
2262
      if (!TSDB_COL_IS_TAG(pCol->flag)) {
2263 2264
        continue;
      }
2265

2266
      // todo use tag column index to optimize performance
H
hjxilinx 已提交
2267
      doSetTagValueInParam(tsdb, pTableId, pCol->colId, &pRuntimeEnv->pCtx[idx].tag);
2268
    }
2269

2270
    // set the join tag for first column
2271
    if (pFuncMsg->functionId == TSDB_FUNC_TS && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2272 2273
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
2274 2275
      assert(0);  // to do fix me
      //      doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag);
2276 2277 2278 2279 2280 2281 2282
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2283

2284
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2285
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2286 2287 2288
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2289

2290 2291 2292
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2293

2294 2295 2296
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2297

2298 2299 2300
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2301 2302 2303 2304 2305 2306 2307 2308
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2309 2310
    }
  }
2311

2312
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2313
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2314 2315 2316
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2317

2318 2319 2320 2321
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2322
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2391
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2392
  SQuery* pQuery = pRuntimeEnv->pQuery;
2393
  int32_t numOfCols = pQuery->numOfOutput;
2394 2395 2396 2397
  printf("super table query intermediate result, total:%d\n", numOfRows);
  
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2398
      
2399
      switch (pQuery->pSelectExpr[i].type) {
2400
        case TSDB_DATA_TYPE_BINARY: {
2401 2402 2403 2404 2405
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2406
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2407 2408 2409 2410 2411
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2412
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2413 2414
          break;
        case TSDB_DATA_TYPE_INT:
2415
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2416 2417
          break;
        case TSDB_DATA_TYPE_FLOAT:
2418
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2419 2420
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2421
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2422 2423 2424 2425 2426 2427 2428 2429
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2430 2431 2432
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2433 2434 2435 2436 2437
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2438

2439 2440
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2441

2442 2443
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2444

2445 2446 2447 2448
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2449

2450 2451 2452 2453
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2454

H
hjxilinx 已提交
2455
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2456
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2457

2458 2459
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2460

H
hjxilinx 已提交
2461
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2462
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2463

2464 2465
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2466

2467 2468 2469
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2470

2471 2472 2473
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2474
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2475
  int64_t st = taosGetTimestampMs();
2476
  int32_t ret = TSDB_CODE_SUCCESS;
2477

2478
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
2479

2480
  while (pQInfo->groupIndex < numOfGroups) {
2481
    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
2482
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2483 2484 2485 2486
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2487
    pQInfo->groupIndex += 1;
2488 2489

    // this group generates at least one result, return results
2490 2491 2492
    if (ret > 0) {
      break;
    }
2493 2494

    assert(pQInfo->numOfGroupResultPages == 0);
H
hjxilinx 已提交
2495
    qTrace("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2496
  }
2497

2498 2499
  qTrace("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%lldms", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2500

2501 2502 2503 2504 2505 2506
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2507

2508
    // current results of group has been sent to client, try next group
2509
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2510 2511
      return;  // failed to save data in the disk
    }
2512

2513 2514 2515 2516 2517 2518
    // check if all results has been sent to client
    int32_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
      pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;  // set query completed
      return;
    }
2519
  }
2520 2521

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2522
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2523

2524
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2525
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2526

2527 2528 2529
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
    tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[i]);
2530
    total += pData->num;
2531
  }
2532

2533
  int32_t rows = total;
2534

2535 2536 2537
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
    tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[num]);
2538

2539
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2540
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2541
      char *  pDest = pQuery->sdata[i]->data;
2542

2543 2544
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2545
    }
2546

2547
    offset += pData->num;
2548
  }
2549

2550
  assert(pQuery->rec.rows == 0);
2551

2552
  pQuery->rec.rows += rows;
2553 2554 2555 2556 2557
  pQInfo->offset += 1;
}

int64_t getNumOfResultWindowRes(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindowRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2558

2559
  int64_t maxOutput = 0;
2560
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2561
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2562

2563 2564 2565 2566 2567 2568 2569
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2570

2571 2572 2573 2574 2575
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
    if (pResultInfo != NULL && maxOutput < pResultInfo->numOfRes) {
      maxOutput = pResultInfo->numOfRes;
    }
  }
2576

2577 2578 2579
  return maxOutput;
}

2580
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2581
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2582
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2583

2584
  size_t size = taosArrayGetSize(pGroup);
2585

2586
  tFilePage **buffer = pQuery->sdata;
2587 2588
  int32_t *   posList = calloc(size, sizeof(int32_t));

H
hjxilinx 已提交
2589
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2590

2591
  // todo opt for the case of one table per group
2592
  int32_t numOfTables = 0;
2593
  for (int32_t i = 0; i < size; ++i) {
H
hjxilinx 已提交
2594 2595
    SGroupItem *item = taosArrayGet(pGroup, i);
    STableQueryInfo *pInfo = item->info;
2596

H
hjxilinx 已提交
2597 2598
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, pInfo->id.tid);
    if (list.size > 0 && pInfo->windowResInfo.size > 0) {
2599
      pTableList[numOfTables] = pInfo;
2600
      numOfTables += 1;
2601 2602
    }
  }
2603

2604
  if (numOfTables == 0) {
2605 2606
    tfree(posList);
    tfree(pTableList);
2607

2608 2609 2610
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
  }
2611

2612
  SCompSupporter cs = {pTableList, posList, pQInfo};
2613

2614
  SLoserTreeInfo *pTree = NULL;
2615
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2616

2617
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
2618 2619
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2620

2621 2622
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2623

2624 2625
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2626

H
hjxilinx 已提交
2627
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2628
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2629

2630 2631
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2632

2633 2634 2635 2636
    assert(ts == pWindowRes->window.skey);
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
    if (num <= 0) {
      cs.position[pos] += 1;
2637

2638 2639
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2640

2641
        // all input sources are exhausted
2642
        if (--numOfTables == 0) {
2643 2644 2645 2646 2647 2648 2649
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2650
        if (buffer[0]->num == pQuery->rec.capacity) {
2651 2652 2653
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2654

2655 2656
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2657

2658
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2659
        buffer[0]->num += 1;
2660
      }
2661

2662
      lastTimestamp = ts;
2663

2664 2665 2666
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2667

2668
        // all input sources are exhausted
2669
        if (--numOfTables == 0) {
2670 2671 2672 2673
          break;
        }
      }
    }
2674

2675 2676
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2677

2678
  if (buffer[0]->num != 0) {  // there are data in buffer
2679
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2680
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2681

2682 2683 2684 2685
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2686

2687 2688 2689
      return -1;
    }
  }
2690

2691 2692 2693
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2694
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2695
#endif
2696

S
slguan 已提交
2697
  qTrace("QInfo:%p result merge completed, elapsed time:%" PRId64 " ms", GET_QINFO_ADDR(pQuery), endt - startt);
2698 2699 2700
  tfree(pTree);
  tfree(pTableList);
  tfree(posList);
2701

2702
  pQInfo->offset = 0;
2703
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2704 2705
    tfree(pResultInfo[i].interResultBuf);
  }
2706

2707 2708 2709 2710 2711
  tfree(pResultInfo);
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2712 2713 2714
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2715
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2716
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2717

2718 2719
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2720

2721
  int32_t remain = pQuery->sdata[0]->num;
2722
  int32_t offset = 0;
2723

2724 2725 2726 2727 2728
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2729

2730
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2731
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2732

2733
    // pagewise copy to dest buffer
2734
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2735
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2736
      buf->num = r;
2737

2738 2739
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2740
    }
2741

2742 2743 2744
    offset += r;
    remain -= r;
  }
2745

2746 2747 2748 2749 2750
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2751
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2752
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2753 2754 2755
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2756

2757
    pQuery->sdata[k]->num = 0;
2758 2759 2760
  }
}

2761 2762 2763 2764 2765 2766 2767
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2768 2769 2770 2771 2772 2773 2774
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2788 2789 2790 2791 2792
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2793

2794
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2795

2796
    // open/close the specified query for each group result
2797
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2798
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2799

2800 2801
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2802 2803 2804 2805 2806 2807 2808 2809
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2810 2811
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2812
  SQuery *pQuery = pRuntimeEnv->pQuery;
2813
  int32_t order = pQuery->order.order;
2814

2815 2816 2817
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
2818
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2819
  } else {  // for simple result of table query,
2820
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2821
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2822

2823
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2824 2825 2826
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2827

2828 2829
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2830 2831 2832 2833 2834 2835
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847
  
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
      SGroupItem *item = taosArrayGet(group, j);
      updateTableQueryInfoForReverseScan(pQuery, item->info);
    }
  }
2848 2849
}

2850
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2851
  SQuery *pQuery = pRuntimeEnv->pQuery;
2852
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2853
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2854 2855 2856 2857
  }
}

void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
2858
  int32_t numOfCols = pQuery->numOfOutput;
2859

2860 2861
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2862

2863 2864 2865 2866 2867 2868
  // set the intermediate result output buffer
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2869

2870
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2871 2872
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2873

2874 2875 2876 2877 2878 2879
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2880

2881
    // set the timestamp output buffer for top/bottom/diff query
2882
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2883 2884 2885
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2886

2887
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2888
  }
2889

2890 2891 2892 2893 2894
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2895

2896
  // reset the execution contexts
2897
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2898
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2899
    assert(functionId != TSDB_FUNC_DIFF);
2900

2901 2902 2903 2904
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
2905

2906 2907 2908 2909 2910 2911 2912 2913 2914 2915
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
2916

2917 2918 2919 2920 2921 2922
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2923

2924
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2925
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2926

2927 2928 2929 2930 2931
    pRuntimeEnv->pCtx[j].currentStage = 0;
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

2932
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
2933
  SQuery *pQuery = pRuntimeEnv->pQuery;
2934
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
2935 2936
    return;
  }
2937

2938
  if (pQuery->rec.rows <= pQuery->limit.offset) {
2939 2940 2941
    qTrace("QInfo:%p skip rows:%d, new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
        pQuery->limit.offset - pQuery->rec.rows);
    
2942 2943
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
2944

2945
    resetCtxOutputBuf(pRuntimeEnv);
2946

H
Haojun Liao 已提交
2947
    // clear the buffer full flag if exists
2948
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
2949
  } else {
2950
    int64_t numOfSkip = pQuery->limit.offset;
2951
    pQuery->rec.rows -= numOfSkip;
2952 2953 2954 2955 2956
    pQuery->limit.offset = 0;
  
    qTrace("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
           0, pQuery->rec.rows);
    
2957
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2958
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2959
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2960
      
H
Haojun Liao 已提交
2961 2962
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
2963

2964
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
2965
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
2966 2967
      }
    }
2968
  
H
Haojun Liao 已提交
2969
    
2970
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
2971 2972 2973 2974 2975 2976 2977 2978
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
2979
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
2980 2981 2982 2983 2984 2985
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2986

H
hjxilinx 已提交
2987
  bool toContinue = false;
2988 2989 2990
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2991

2992 2993 2994 2995 2996
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
2997

2998
      setWindowResOutputBuf(pRuntimeEnv, pResult);
2999

3000
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3001
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3002 3003 3004
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3005

3006 3007
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3008

3009 3010 3011 3012
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3013
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3014
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3015 3016 3017
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3018

3019 3020
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3021

3022 3023 3024
      toContinue |= (!pResInfo->complete);
    }
  }
3025

3026 3027 3028
  return toContinue;
}

H
Haojun Liao 已提交
3029
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3030
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3031 3032
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3033 3034 3035
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3036
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3037
      .status      = pQuery->status,
3038
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3039
      .lastKey     = start,
H
hjxilinx 已提交
3040
      .w           = pQuery->window,
H
Haojun Liao 已提交
3041
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3042
  };
3043

3044 3045 3046
  return info;
}

3047 3048 3049 3050
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3051 3052 3053 3054 3055
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3056

3057
  // reverse order time range
3058 3059 3060
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3061
  SWITCH_ORDER(pQuery->order.order);
3062
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3063

3064
  STsdbQueryCond cond = {
3065
      .twindow = pQuery->window,
H
hjxilinx 已提交
3066
      .order   = pQuery->order.order,
3067
      .colList = pQuery->colList,
3068 3069
      .numOfCols = pQuery->numOfCols,
  };
3070

3071 3072 3073 3074
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3075

H
hjxilinx 已提交
3076
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo);
3077

3078 3079
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3080
  disableFuncInReverseScan(pQInfo);
3081 3082
}

3083 3084
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3085
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3086

3087 3088
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3089

3090 3091 3092 3093
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3094

3095
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3096

3097 3098
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3099
  pTableQueryInfo->lastKey = pStatus->lastKey;
3100
  pQuery->status = pStatus->status;
3101
  
H
hjxilinx 已提交
3102
  pTableQueryInfo->win = pStatus->w;
3103
  pQuery->window = pTableQueryInfo->win;
3104 3105
}

H
Haojun Liao 已提交
3106
void scanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3107
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3108
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3109 3110
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3111
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3112

3113
  // store the start query position
H
Haojun Liao 已提交
3114
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3115

3116 3117
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3118

3119 3120
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3121

3122 3123
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3124
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3125
      qstatus.lastKey = pTableQueryInfo->lastKey;
3126
    }
3127

3128
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3129
      // restore the status code and jump out of loop
3130
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3131
        pQuery->status = qstatus.status;
3132
      }
3133

3134 3135
      break;
    }
3136

3137
    STsdbQueryCond cond = {
3138
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3139
        .order   = pQuery->order.order,
3140
        .colList = pQuery->colList,
3141
        .numOfCols = pQuery->numOfCols,
3142
    };
3143

3144 3145
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3146
    }
3147

H
hjxilinx 已提交
3148
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo);
3149
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3150

3151 3152
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3153 3154 3155
    
    qTrace("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
        cond.twindow.skey, cond.twindow.ekey);
3156

3157
    // check if query is killed or not
3158
    if (isQueryKilled(pQInfo)) {
3159 3160 3161
      return;
    }
  }
3162

H
hjxilinx 已提交
3163
  if (!needReverseScan(pQuery)) {
3164 3165
    return;
  }
3166

3167
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3168

3169
  // reverse scan from current position
3170
  qTrace("QInfo:%p start to reverse scan", pQInfo);
3171
  doScanAllDataBlocks(pRuntimeEnv);
3172 3173

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3174 3175
}

H
hjxilinx 已提交
3176
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3177
  SQuery *pQuery = pRuntimeEnv->pQuery;
3178

3179 3180 3181 3182 3183 3184
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      closeAllTimeWindow(pWindowResInfo);
    }
3185

3186 3187 3188 3189 3190
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3191

3192
      setWindowResOutputBuf(pRuntimeEnv, buf);
3193

3194
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3195
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3196
      }
3197

3198 3199 3200 3201 3202 3203
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3204

3205
  } else {
3206
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3207
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3208 3209 3210 3211 3212
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3213
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3214
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3215

3216 3217 3218 3219
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3220

3221 3222 3223
  return false;
}

weixin_48148422's avatar
weixin_48148422 已提交
3224 3225 3226 3227 3228
static STableQueryInfo *createTableQueryInfo(
  SQueryRuntimeEnv *pRuntimeEnv,
  STableId tableId,
  STimeWindow win
) {
3229
  STableQueryInfo *pTableQueryInfo = calloc(1, sizeof(STableQueryInfo));
3230

H
hjxilinx 已提交
3231 3232
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3233

H
hjxilinx 已提交
3234
  pTableQueryInfo->id = tableId;
3235
  pTableQueryInfo->cur.vgroupIndex = -1;
3236

3237 3238 3239 3240
  initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, 100, 100, TSDB_DATA_TYPE_INT);
  return pTableQueryInfo;
}

3241
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3242 3243 3244
  if (pTableQueryInfo == NULL) {
    return;
  }
3245

3246 3247 3248 3249 3250 3251
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
  free(pTableQueryInfo);
}

void restoreIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3252 3253 3254 3255
  pQuery->current = pTableQueryInfo;
  
  assert(((pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) && QUERY_IS_ASC_QUERY(pQuery)) ||
         ((pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) && !QUERY_IS_ASC_QUERY(pQuery)));
3256 3257 3258 3259 3260
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3261
 * @param pDataBlockInfo
3262
 */
3263
void setExecutionContext(SQInfo *pQInfo, STableId* pTableId, int32_t groupIndex, TSKEY nextKey) {
3264
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
3265 3266
  STableQueryInfo *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  
3267 3268
  SWindowResInfo *  pWindowResInfo = &pRuntimeEnv->windowResInfo;
  int32_t           GROUPRESULTID = 1;
3269

3270
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex, sizeof(groupIndex));
3271 3272 3273
  if (pWindowRes == NULL) {
    return;
  }
3274

3275 3276 3277 3278 3279 3280 3281 3282 3283 3284
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3285

3286 3287
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
3288

3289
  pTableQueryInfo->lastKey = nextKey;
H
hjxilinx 已提交
3290
  setAdditionalInfo(pQInfo, pTableId, pTableQueryInfo);
3291 3292 3293 3294
}

static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3295

3296
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3297
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3298 3299
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3300
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3301 3302 3303
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3304

3305 3306 3307 3308 3309
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3310

3311 3312 3313 3314 3315 3316
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
hjxilinx 已提交
3317
int32_t setAdditionalInfo(SQInfo *pQInfo, STableId* pTableId, STableQueryInfo *pTableQueryInfo) {
3318
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3319
  assert(pTableQueryInfo->lastKey >= 0);
3320

H
hjxilinx 已提交
3321
  setTagVal(pRuntimeEnv, pTableId, pQInfo->tsdb);
3322

3323 3324
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3325
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3326
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3327

3328
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3329

3330 3331 3332 3333 3334 3335
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3336

3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3349
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3350 3351
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3352 3353
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3354 3355 3356
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3357
    pTableQueryInfo->win.skey = key;
3358
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3359

3360 3361 3362 3363 3364
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3365

3366 3367 3368 3369 3370 3371
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
3372 3373
    TSKEY           skey1, ekey1;
    STimeWindow     w = {0};
3374
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3375

H
Haojun Liao 已提交
3376 3377 3378
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &skey1, &ekey1, &w);
3379
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3380

3381 3382
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3383
        assert(win.ekey == pQuery->window.ekey);
3384
      }
3385 3386
      
      pWindowResInfo->prevSKey = w.skey;
3387
    }
3388

3389
    pTableQueryInfo->queryRangeSet = 1;
3390
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3391 3392 3393 3394
  }
}

bool requireTimestamp(SQuery *pQuery) {
3395
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3396
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3410 3411 3412
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3413 3414
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3415 3416 3417 3418 3419
  return loadPrimaryTS;
}

static int32_t getNumOfSubset(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3420

3421 3422 3423 3424
  int32_t totalSubset = 0;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (isIntervalQuery(pQuery))) {
    totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  } else {
3425
    totalSubset = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
3426
  }
3427

3428 3429 3430 3431 3432 3433
  return totalSubset;
}

static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3434

3435 3436 3437
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3438

3439
  qTrace("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3440
  int32_t totalSubset = getNumOfSubset(pQInfo);
3441

3442
  if (orderType == TSDB_ORDER_ASC) {
3443
    startIdx = pQInfo->groupIndex;
3444 3445
    step = 1;
  } else {  // desc order copy all data
3446
    startIdx = totalSubset - pQInfo->groupIndex - 1;
3447 3448
    step = -1;
  }
3449

3450 3451 3452
  for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3453
      pQInfo->groupIndex += 1;
3454 3455
      continue;
    }
3456

3457
    assert(result[i].numOfRows >= 0 && pQInfo->offset <= 1);
3458

3459 3460
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3461

3462 3463 3464 3465
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3466 3467 3468 3469 3470
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3471
      pQInfo->groupIndex += 1;
3472
    }
3473

3474
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3475
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3476

3477 3478 3479 3480
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3481

3482
    numOfResult += numOfRowsToCopy;
3483 3484 3485
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3486
  }
3487

S
slguan 已提交
3488
  qTrace("QInfo:%p copy data to query buf completed", pQInfo);
3489 3490

#ifdef _DEBUG_VIEW
3491
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3507

3508
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3509
  int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
3510

3511
  pQuery->rec.rows += numOfResult;
3512

3513
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3514 3515
}

H
hjxilinx 已提交
3516
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3517
  SQuery *pQuery = pRuntimeEnv->pQuery;
3518

3519 3520
  // update the number of result for each, only update the number of rows for the corresponding window result.
  if (pQuery->intervalTime == 0) {
3521
    int32_t g = pTableQueryInfo->groupIndex;
3522
    assert(pRuntimeEnv->windowResInfo.size > 0);
3523

3524 3525 3526 3527 3528 3529 3530
    SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&g, sizeof(g));
    if (pWindowRes->numOfRows == 0) {
      pWindowRes->numOfRows = getNumOfResult(pRuntimeEnv);
    }
  }
}

H
hjxilinx 已提交
3531
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo,
3532 3533 3534 3535
                                 SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis, SArray *pDataBlock,
                                 __block_search_fn_t searchFn) {
  SQuery *         pQuery = pRuntimeEnv->pQuery;
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3536 3537
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
  
3538
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) {
3539
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3540
  } else {
3541
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3542
  }
3543

H
hjxilinx 已提交
3544
  updateWindowResNumOfRes(pRuntimeEnv, pTableQueryInfo);
3545 3546
}

3547 3548 3549 3550
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
  
3551 3552
  // todo refactor
  if (pQuery->fillType == TSDB_FILL_NONE || (pQuery->fillType != TSDB_FILL_NONE && isPointInterpoQuery(pQuery))) {
3553
    assert(pFillInfo == NULL);
3554 3555
    return false;
  }
3556

3557
  if (pQuery->limit.limit > 0 && pQuery->rec.rows >= pQuery->limit.limit) {
3558 3559
    return false;
  }
3560

3561 3562 3563
  // There are results not returned to client, fill operation applied to the remain result set in the
  // first place is required.
  int32_t remain = taosNumOfRemainRows(pFillInfo);
3564 3565 3566 3567
  if (remain > 0) {
    return true;
  }
  
3568
  /*
3569
   * While the code reaches here, there are no results returned to client now.
3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581
   * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
   * is retrieved from TSDB.
   *
   * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
   * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
   * first result row in the actual result set will fill nothing.
   */
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    TSKEY ekey = taosGetRevisedEndKey(pQuery->window.ekey, pQuery->order.order, pQuery->slidingTime,
                                         pQuery->slidingTimeUnit, pQuery->precision);
    int32_t numOfTotal = taosGetNumOfResultWithFill(pFillInfo, remain, ekey, pQuery->rec.capacity);
    return numOfTotal > 0;
3582
  }
3583 3584

  return false;
3585 3586 3587
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3588
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3589 3590
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3591

3592 3593 3594
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3595

weixin_48148422's avatar
weixin_48148422 已提交
3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3608 3609
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3610
    if (pQInfo->runtimeEnv.stableQuery) {
3611 3612 3613 3614
      if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3615 3616 3617
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3618
    }
H
hjxilinx 已提交
3619
  }
3620 3621
}

3622
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t numOfRows, int32_t *numOfInterpo) {
3623
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3624
  SQuery *pQuery = pRuntimeEnv->pQuery;
3625 3626
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3627
  while (1) {
3628
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3629
    
3630
    // todo apply limit output function
3631 3632
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3633
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3634 3635
      return ret;
    }
3636

3637
    if (pQuery->limit.offset < ret) {
3638 3639 3640
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%d. Discard due to offset, remain:%d, new offset:%d",
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3641 3642 3643
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3644 3645 3646
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3647
      }
3648
      
3649 3650 3651
      pQuery->limit.offset = 0;
      return ret;
    } else {
3652 3653 3654 3655
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%d. Discard due to offset, "
             "remain:%d, new offset:%d", pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
          pQuery->limit.offset - ret);
      
3656
      pQuery->limit.offset -= ret;
3657
      pQuery->rec.rows = 0;
3658 3659
      ret = 0;
    }
3660 3661

    if (!queryHasRemainResults(pRuntimeEnv)) {
3662 3663 3664 3665 3666
      return ret;
    }
  }
}

3667
void queryCostStatis(SQInfo *pQInfo) {
3668
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3669
//  SQuery *pQuery = pRuntimeEnv->pQuery;
3670

3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
//  qTrace("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
//  qTrace("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
//  qTrace(
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3689
  
3690 3691 3692 3693 3694 3695 3696 3697 3698 3699
  qTrace("QInfo:%p cost: check blocks:%d, statis:%d, rows:%"PRId64", check rows:%"PRId64, pQInfo, pSummary->dataBlocks,
         pSummary->loadBlockStatis, pSummary->dataInRows, pSummary->checkRows);

//  qTrace("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
//
//  qTrace("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qTrace("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3700 3701
  
  // todo add the intermediate result save cost!!
3702 3703 3704 3705 3706 3707 3708 3709
//  double computing = total - io;
//
//  qTrace(
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3710 3711
}

3712 3713
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3714 3715
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3716
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3717

3718
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3719
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3720 3721 3722
    pQuery->limit.offset = 0;
    return;
  }
3723

3724 3725 3726 3727 3728
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3729

3730
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3731

3732
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3733
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3734

3735 3736 3737 3738
  // update the pQuery->limit.offset value, and pQuery->pos value
  TSKEY *keys = (TSKEY *)pColInfoData->pData;

  // update the offset value
H
hjxilinx 已提交
3739
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3740
  pQuery->limit.offset = 0;
3741

H
hjxilinx 已提交
3742
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3743

3744 3745
  qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3746
}
3747

3748 3749 3750 3751 3752
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3753
  }
3754

3755 3756 3757
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3758
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3759
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3760

3761 3762 3763
  while (tsdbNextDataBlock(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      return;
3764
    }
3765

3766
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
3767

3768 3769
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3770 3771
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3772

3773
      qTrace("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3774 3775
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3776 3777 3778
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3779
  }
3780
}
3781

H
Haojun Liao 已提交
3782
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3783
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3784
  *start = pQuery->current->lastKey;
3785

3786
  // if queried with value filter, do NOT forward query start position
3787
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3788
    return true;
3789
  }
3790

3791 3792 3793 3794 3795
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3796
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3797

3798 3799 3800
  TSKEY       skey1, ekey1;
  STimeWindow w = TSWINDOW_INITIALIZER;
  
3801
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3802
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3803

3804 3805
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle);
3806

H
Haojun Liao 已提交
3807 3808 3809 3810 3811 3812 3813
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &skey1,
                                &ekey1, &w);
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3814 3815 3816
    } else {
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &skey1, &ekey1,
                              &w);
3817

3818 3819 3820
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3821

3822 3823
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3824

3825 3826 3827 3828 3829 3830
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3831

3832 3833
      STimeWindow tw = win;
      getNextTimeWindow(pQuery, &tw);
3834

3835
      if (pQuery->limit.offset == 0) {
3836 3837
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3838 3839
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3840 3841 3842
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

3843 3844 3845
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
3846 3847 3848 3849
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
3850 3851 3852 3853 3854 3855
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
3856
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
3857 3858
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
3859
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
3860 3861
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
3862 3863 3864
          qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
3865
          return true;
H
Haojun Liao 已提交
3866 3867 3868 3869
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
3870
          return true;
3871 3872 3873
        }
      }

H
Haojun Liao 已提交
3874 3875 3876 3877 3878 3879 3880
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
3893
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
3894 3895
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
3896
      } else {
H
Haojun Liao 已提交
3897
        break;  // offset is not 0, and next time window begins or ends in the next block.
3898 3899 3900
      }
    }
  }
3901

3902 3903 3904
  return true;
}

B
Bomin Zhang 已提交
3905 3906
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3907 3908
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
3909 3910 3911 3912 3913 3914 3915
  if (onlyQueryTags(pQuery)) {
    return;
  }

  if (isSTableQuery && (!isIntervalQuery(pQuery)) && (!isFixedOutputQuery(pQuery))) {
    return;
  }
3916 3917

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
3918 3919 3920 3921
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
3922
  };
weixin_48148422's avatar
weixin_48148422 已提交
3923

B
Bomin Zhang 已提交
3924 3925 3926 3927 3928 3929 3930 3931 3932 3933
  if (!isSTableQuery
    && (pQInfo->groupInfo.numOfTables == 1)
    && (cond.order == TSDB_ORDER_ASC) 
    && (!isIntervalQuery(pQuery))
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
    && (!isFixedOutputQuery(pQuery))
  ) {
    SArray* pa = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
    SGroupItem* pItem = taosArrayGet(pa, 0);
    cond.twindow = pItem->info->win;
3934
  }
B
Bomin Zhang 已提交
3935

H
Haojun Liao 已提交
3936 3937
  if (isFirstLastRowQuery(pQuery)) {
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableIdGroupInfo);
3938 3939
  } else if (isPointInterpoQuery(pQuery)) {
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableIdGroupInfo);
H
Haojun Liao 已提交
3940 3941 3942
  } else {
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableIdGroupInfo);
  }
B
Bomin Zhang 已提交
3943 3944
}

3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
3958
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
3959 3960 3961 3962 3963 3964 3965
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

3966
int32_t doInitQInfo(SQInfo *pQInfo, void *param, void *tsdb, int32_t vgId, bool isSTableQuery) {
3967 3968 3969 3970 3971 3972 3973
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t code = TSDB_CODE_SUCCESS;

  setScanLimitationByResultBuffer(pQuery);
  changeExecuteScanOrder(pQuery, false);
B
Bomin Zhang 已提交
3974
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
3975
  
3976
  pQInfo->tsdb = tsdb;
3977
  pQInfo->vgId = vgId;
3978 3979 3980

  pRuntimeEnv->pQuery = pQuery;
  pRuntimeEnv->pTSBuf = param;
3981
  pRuntimeEnv->cur.vgroupIndex = -1;
3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998
  pRuntimeEnv->stableQuery = isSTableQuery;

  if (param != NULL) {
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, isSTableQuery);

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
3999
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

      if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // group by columns not tags;
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
    }

  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    int32_t rows = getInitialPageNum(pQInfo);
4018
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);

  /*
H
hjxilinx 已提交
4036 4037
   * in case of last_row query without query range, we set the query timestamp to be
   * STable->lastKey. Otherwise, keep the initial query time range unchanged.
4038
   */
H
hjxilinx 已提交
4039 4040 4041 4042 4043 4044 4045
//  if (isFirstLastRowQuery(pQuery)) {
//    if (!normalizeUnBoundLastRowQuery(pQInfo, &interpInfo)) {
//      sem_post(&pQInfo->dataReady);
//      pointInterpSupporterDestroy(&interpInfo);
//      return TSDB_CODE_SUCCESS;
//    }
//  }
4046

4047
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4048 4049 4050 4051 4052
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, 0, 0, pQuery->rec.capacity, pQuery->numOfOutput,
                                              pQuery->slidingTime, pQuery->fillType, pColInfo);
  }
  
4053 4054 4055
  return TSDB_CODE_SUCCESS;
}

4056
static UNUSED_FUNC bool isGroupbyEachTable(SSqlGroupbyExpr *pGroupbyExpr, STableGroupInfo *pSidset) {
4057 4058 4059
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
4060

4061
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
4062
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
4063 4064 4065 4066
    if (pColIndex->flag == TSDB_COL_TAG) {
      return true;
    }
  }
4067

4068 4069 4070
  return false;
}

4071
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4072
  SQuery *pQuery = pRuntimeEnv->pQuery;
4073

4074
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4075 4076 4077 4078 4079 4080 4081
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
hjxilinx 已提交
4082
static int64_t queryOnDataBlocks(SQInfo *pQInfo) {
4083 4084
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4085

H
hjxilinx 已提交
4086
  int64_t st = taosGetTimestampMs();
4087

4088 4089
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
  
4090
  while (tsdbNextDataBlock(pQueryHandle)) {
4091
    if (isQueryKilled(pQInfo)) {
4092 4093
      break;
    }
4094

4095
    SDataBlockInfo  blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
H
hjxilinx 已提交
4096
    STableQueryInfo *pTableQueryInfo = NULL;
4097

4098 4099
    // todo opt performance using hash table
    size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4100 4101 4102
    for (int32_t i = 0; i < numOfGroup; ++i) {
      SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);

4103
      size_t num = taosArrayGetSize(group);
4104
      for (int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
4105 4106
        SGroupItem *item = taosArrayGet(group, j);
        STableQueryInfo *pInfo = item->info;
4107

H
hjxilinx 已提交
4108 4109 4110 4111
        if (pInfo->id.tid == blockInfo.tid) {
          assert(pInfo->id.uid == blockInfo.uid);
          pTableQueryInfo = item->info;
          
4112 4113
          break;
        }
H
hjxilinx 已提交
4114
      }
H
hjxilinx 已提交
4115 4116 4117 4118
      
      if (pTableQueryInfo != NULL) {
        break;
      }
H
hjxilinx 已提交
4119
    }
H
hjxilinx 已提交
4120
  
4121
    assert(pTableQueryInfo != NULL);
4122
    restoreIntervalQueryRange(pRuntimeEnv, pTableQueryInfo);
4123
    printf("table:%d, groupIndex:%d, rows:%d\n", pTableQueryInfo->id.tid, pTableQueryInfo->groupIndex, blockInfo.tid);
4124

4125
    SDataStatis *pStatis = NULL;
H
hjxilinx 已提交
4126 4127
    
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
4128

4129
    if (!isIntervalQuery(pQuery)) {
H
Haojun Liao 已提交
4130
      int32_t step = QUERY_IS_ASC_QUERY(pQuery)? 1:-1;
4131
      setExecutionContext(pQInfo, &pTableQueryInfo->id, pTableQueryInfo->groupIndex, blockInfo.window.ekey + step);
4132
    } else {  // interval query
H
Haojun Liao 已提交
4133
      TSKEY nextKey = blockInfo.window.skey;
H
hjxilinx 已提交
4134
      setIntervalQueryRange(pQInfo, nextKey);
H
hjxilinx 已提交
4135
      int32_t ret = setAdditionalInfo(pQInfo, &pTableQueryInfo->id, pTableQueryInfo);
4136

4137
      if (ret != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4138 4139
        pQInfo->code = ret;
        return taosGetTimestampMs() - st;
4140 4141
      }
    }
4142

H
hjxilinx 已提交
4143
    stableApplyFunctionsOnBlock(pRuntimeEnv, pTableQueryInfo, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
4144
  }
4145

H
hjxilinx 已提交
4146 4147
  int64_t et = taosGetTimestampMs();
  return et - st;
4148 4149
}

4150 4151
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4152
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4153

4154
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4155
  SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
H
hjxilinx 已提交
4156
  SGroupItem* item = taosArrayGet(group, index);
4157

H
hjxilinx 已提交
4158
  setTagVal(pRuntimeEnv, &item->id, pQInfo->tsdb);
4159

S
slguan 已提交
4160
  qTrace("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
hjxilinx 已提交
4161
         item->id.uid, item->id.tid, item->info->lastKey, item->info->win.ekey);
4162

4163
  STsdbQueryCond cond = {
H
hjxilinx 已提交
4164 4165 4166
      .twindow   = {item->info->lastKey, item->info->win.ekey},
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4167
      .numOfCols = pQuery->numOfCols,
4168
  };
4169

H
hjxilinx 已提交
4170
  // todo refactor
4171
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
hjxilinx 已提交
4172
  SArray *tx = taosArrayInit(1, sizeof(STableId));
4173

H
hjxilinx 已提交
4174
  taosArrayPush(tx, &item->info->id);
4175
  taosArrayPush(g1, &tx);
4176
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4177

4178
  // include only current table
4179 4180 4181 4182 4183
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
  
4184
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp);
4185 4186 4187
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
  
4188
  if (pRuntimeEnv->pTSBuf != NULL) {
4189
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4190 4191
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4192

4193 4194 4195 4196 4197 4198 4199 4200
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4201

4202
  initCtxOutputBuf(pRuntimeEnv);
4203 4204 4205 4206 4207 4208 4209 4210 4211 4212
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4213
static void sequentialTableProcess(SQInfo *pQInfo) {
4214
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4215
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4216
  setQueryStatus(pQuery, QUERY_COMPLETED);
4217

4218
  size_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4219

H
Haojun Liao 已提交
4220
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4221 4222
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4223

4224 4225
    while (pQInfo->groupIndex < numOfGroups) {
      SArray* group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
4226

H
Haojun Liao 已提交
4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248
      qTrace("QInfo:%p last_row query on group:%d, total group:%d, current group:%d", pQInfo, pQInfo->groupIndex,
             numOfGroups);

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4249
      if (isFirstLastRowQuery(pQuery)) {
4250
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp);
H
Haojun Liao 已提交
4251 4252
      } else {
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp);
4253
      }
H
Haojun Liao 已提交
4254 4255
      
      initCtxOutputBuf(pRuntimeEnv);
4256 4257 4258 4259 4260 4261 4262 4263 4264
      
      SArray* s = tsdbGetQueriedTableIdList(pRuntimeEnv->pQueryHandle);
      assert(taosArrayGetSize(s) >= 1);
      
      setTagVal(pRuntimeEnv, (STableId*) taosArrayGet(s, 0), pQInfo->tsdb);
      
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280
      
      // here we simply set the first table as current table
      pQuery->current = ((SGroupItem*) taosArrayGet(group, 0))->info;
      scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4281 4282 4283
    }
  } else {
    /*
4284
     * 1. super table projection query, 2. group-by on normal columns query, 3. ts-comp query
4285 4286 4287
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4288
    if (pQInfo->groupIndex > 0) {
4289
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4290
      pQuery->rec.total += pQuery->rec.rows;
4291

4292
      if (pQuery->rec.rows > 0) {
4293 4294 4295
        return;
      }
    }
4296

4297 4298
    // all data have returned already
    if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
4299 4300
      return;
    }
4301

4302 4303
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4304 4305 4306 4307 4308

    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
    assert(taosArrayGetSize(group) == pQInfo->groupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->groupInfo.pGroupList));

4309
    while (pQInfo->tableIndex < pQInfo->groupInfo.numOfTables) {
4310
      if (isQueryKilled(pQInfo)) {
4311 4312
        return;
      }
4313

H
hjxilinx 已提交
4314
      SGroupItem *item = taosArrayGet(group, pQInfo->tableIndex);
H
hjxilinx 已提交
4315
      pQuery->current = item->info;
H
hjxilinx 已提交
4316
      
4317
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4318
        pQInfo->tableIndex++;
4319 4320
        continue;
      }
4321

H
hjxilinx 已提交
4322
      // TODO handle the limit offset problem
4323
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4324
        //        skipBlocks(pRuntimeEnv);
4325 4326
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4327 4328 4329
          continue;
        }
      }
4330

H
Haojun Liao 已提交
4331
      scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4332
      skipResults(pRuntimeEnv);
4333

4334
      // the limitation of output result is reached, set the query completed
4335
      if (limitResults(pRuntimeEnv)) {
4336
        pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
4337 4338
        break;
      }
4339

4340 4341
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4342

4343
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4344 4345 4346 4347 4348 4349
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4350
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4351 4352 4353 4354

        STableIdInfo tidInfo;
        tidInfo.uid = item->id.uid;
        tidInfo.tid = item->id.tid;
weixin_48148422's avatar
weixin_48148422 已提交
4355
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4356 4357
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4358
        // if the buffer is full or group by each table, we need to jump out of the loop
4359 4360
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4361 4362
          break;
        }
4363

4364
      } else {
4365
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4366 4367
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4368 4369
          continue;
        } else {
4370 4371 4372
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4373 4374 4375 4376
        }
      }
    }
  }
4377

4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4390
    finalizeQueryResult(pRuntimeEnv);
4391
  }
4392

4393 4394 4395
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4396

4397 4398 4399
  // todo refactor
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
4400

4401 4402 4403
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
      pStatus->closed = true;  // enable return all results for group by normal columns
4404

4405
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
4406
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4407 4408 4409
        pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
      }
    }
4410

4411
    pQInfo->groupIndex = 0;
4412
    pQuery->rec.rows = 0;
4413 4414
    copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);
  }
4415 4416

  qTrace(
H
Haojun Liao 已提交
4417
      "QInfo %p numOfTables:%d, index:%d, numOfGroups:%d, %d points returned, total:%"PRId64", offset:%" PRId64,
4418 4419
      pQInfo, pQInfo->groupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
      pQuery->limit.offset);
4420 4421
}

4422 4423 4424 4425
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4426 4427 4428 4429
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4430
  if (pRuntimeEnv->pTSBuf != NULL) {
4431
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4432
  }
4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
  
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo);
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4451 4452
}

4453 4454 4455 4456
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4457
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4458

4459
  if (pRuntimeEnv->pTSBuf != NULL) {
4460
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4461
  }
4462

4463
  switchCtxOrder(pRuntimeEnv);
4464 4465 4466
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4467 4468 4469
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
4470
  if (isIntervalQuery(pQuery)) {
4471
    size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4472 4473 4474
    for (int32_t i = 0; i < numOfGroup; ++i) {
      SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);

4475
      size_t num = taosArrayGetSize(group);
4476
      for (int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
4477 4478
        SGroupItem* item = taosArrayGet(group, j);
        closeAllTimeWindow(&item->info->windowResInfo);
4479
      }
H
hjxilinx 已提交
4480 4481 4482 4483 4484 4485 4486
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4487 4488 4489
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4490
  if (pQInfo->groupIndex > 0) {
4491
    /*
4492
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4493 4494
     * copy the data into output buffer
     */
H
hjxilinx 已提交
4495
    if (isIntervalQuery(pQuery)) {
4496 4497 4498
      copyResToQueryResultBuf(pQInfo, pQuery);

#ifdef _DEBUG_VIEW
4499
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4500 4501 4502 4503
#endif
    } else {
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
    }
4504

4505
    if (pQuery->rec.rows == 0) {
4506
      //      queryCostStatis(pSupporter);
4507
    }
4508

S
slguan 已提交
4509
    qTrace("QInfo:%p current:%lld, total:%lld", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4510 4511
    return;
  }
4512 4513 4514 4515

  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4516 4517
  // do check all qualified data blocks
  int64_t el = queryOnDataBlocks(pQInfo);
H
hjxilinx 已提交
4518
  qTrace("QInfo:%p master scan completed, elapsed time: %lldms, reverse scan start", pQInfo, el);
4519

H
hjxilinx 已提交
4520 4521
  // query error occurred or query is killed, abort current execution
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
S
slguan 已提交
4522
    qTrace("QInfo:%p query killed or error occurred, code:%d, abort", pQInfo, pQInfo->code);
H
hjxilinx 已提交
4523
    return;
4524
  }
4525

H
hjxilinx 已提交
4526 4527
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4528

H
hjxilinx 已提交
4529 4530
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4531

H
hjxilinx 已提交
4532
    el = queryOnDataBlocks(pQInfo);
S
slguan 已提交
4533
    qTrace("QInfo:%p reversed scan completed, elapsed time: %lldms", pQInfo, el);
4534

H
hjxilinx 已提交
4535 4536
    doRestoreContext(pQInfo);
  } else {
S
slguan 已提交
4537
    qTrace("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4538
  }
4539

4540
  setQueryStatus(pQuery, QUERY_COMPLETED);
4541

H
hjxilinx 已提交
4542
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
S
slguan 已提交
4543
    qTrace("QInfo:%p query killed or error occurred, code:%d, abort", pQInfo, pQInfo->code);
H
hjxilinx 已提交
4544 4545
    return;
  }
4546

H
hjxilinx 已提交
4547
  if (isIntervalQuery(pQuery) || isSumAvgRateQuery(pQuery)) {
4548
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4549
      copyResToQueryResultBuf(pQInfo, pQuery);
4550 4551

#ifdef _DEBUG_VIEW
4552
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4553 4554 4555 4556 4557
#endif
    }
  } else {  // not a interval query
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
  }
4558

4559
  // handle the limitation of output buffer
S
slguan 已提交
4560
  qTrace("QInfo:%p points returned:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4561 4562 4563 4564 4565 4566 4567 4568
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4569
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4570
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4571 4572
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4573 4574 4575 4576
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4577 4578
  pQuery->current = pTableInfo;  // set current query table info
  
H
Haojun Liao 已提交
4579
  scanAllDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4580
  finalizeQueryResult(pRuntimeEnv);
4581

4582
  if (isQueryKilled(pQInfo)) {
4583 4584
    return;
  }
4585

H
Haojun Liao 已提交
4586
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4587
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4588

4589
  skipResults(pRuntimeEnv);
4590
  limitResults(pRuntimeEnv);
4591 4592
}

H
hjxilinx 已提交
4593
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4594
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4595 4596 4597 4598
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4599 4600 4601 4602
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4603

4604 4605 4606 4607 4608 4609
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4610 4611

  while (1) {
H
Haojun Liao 已提交
4612
    scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4613
    finalizeQueryResult(pRuntimeEnv);
4614

4615
    if (isQueryKilled(pQInfo)) {
4616 4617 4618
      return;
    }

4619 4620
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4621
      skipResults(pRuntimeEnv);
4622 4623 4624
    }

    /*
H
hjxilinx 已提交
4625 4626
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4627
     */
4628
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4629 4630 4631
      break;
    }

S
slguan 已提交
4632
    qTrace("QInfo:%p vid:%d sid:%d id:%s, skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
H
hjxilinx 已提交
4633
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey);
4634 4635 4636 4637

    resetCtxOutputBuf(pRuntimeEnv);
  }

4638
  limitResults(pRuntimeEnv);
4639
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
hjxilinx 已提交
4640 4641
    qTrace("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4642 4643 4644 4645 4646 4647
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
    tidInfo.uid = pQuery->current->id.uid;
    tidInfo.tid = pQuery->current->id.tid;
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4648 4649
  }

4650 4651 4652
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4653 4654
}

H
Haojun Liao 已提交
4655
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4656
  SQuery *pQuery = pRuntimeEnv->pQuery;
4657

4658
  while (1) {
H
Haojun Liao 已提交
4659
    scanAllDataBlocks(pRuntimeEnv, start);
4660

4661
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
4662 4663
      return;
    }
4664

4665
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4666
    finalizeQueryResult(pRuntimeEnv);
4667

4668 4669 4670
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4671
        pQuery->fillType == TSDB_FILL_NONE) {
4672 4673
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4674

4675 4676 4677 4678
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4679

4680
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4681 4682 4683 4684 4685
      break;
    }
  }
}

4686
// handle time interval query on table
H
hjxilinx 已提交
4687
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4688 4689
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4690 4691
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4692

H
Haojun Liao 已提交
4693 4694 4695
  int32_t numOfInterpo = 0;
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4696
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4697
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4698
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4699 4700 4701 4702
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4703
  while (1) {
H
Haojun Liao 已提交
4704
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4705

H
hjxilinx 已提交
4706
    if (isIntervalQuery(pQuery)) {
4707
      pQInfo->groupIndex = 0;  // always start from 0
4708
      pQuery->rec.rows = 0;
4709
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4710

4711
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4712
    }
4713

4714
    // the offset is handled at prepare stage if no interpolation involved
4715
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4716
      limitResults(pRuntimeEnv);
4717 4718
      break;
    } else {
4719 4720 4721 4722
      TSKEY ekey = taosGetRevisedEndKey(pQuery->window.ekey, pQuery->order.order, pQuery->slidingTime,
                                        pQuery->slidingTimeUnit, pQuery->precision);
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, ekey);
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
4723
      numOfInterpo = 0;
4724
      
4725
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, pQuery->rec.rows, &numOfInterpo);
4726
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4727
        limitResults(pRuntimeEnv);
4728 4729
        break;
      }
4730

4731
      // no result generated yet, continue retrieve data
4732
      pQuery->rec.rows = 0;
4733 4734
    }
  }
4735

4736 4737
  // all data scanned, the group by normal column can return
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // todo refactor with merge interval time result
4738
    pQInfo->groupIndex = 0;
4739
    pQuery->rec.rows = 0;
4740
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4741
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4742
  }
4743

4744 4745 4746
  pQInfo->pointsInterpo += numOfInterpo;
}

4747 4748 4749 4750
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4751
  if (queryHasRemainResults(pRuntimeEnv)) {
4752 4753 4754 4755 4756
    /*
     * There are remain results that are not returned due to result interpolation
     * So, we do keep in this procedure instead of launching retrieve procedure for next results.
     */
    int32_t numOfInterpo = 0;
4757 4758 4759 4760
    int32_t remain = taosNumOfRemainRows(pRuntimeEnv->pFillInfo);
    pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, remain, &numOfInterpo);
  
    if (pQuery->rec.rows > 0) {
4761
      limitResults(pRuntimeEnv);
4762 4763
    }
    
S
slguan 已提交
4764
    qTrace("QInfo:%p current:%d returned, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4765 4766
    return;
  }
4767

4768
  // here we have scan all qualified data in both data file and cache
H
hjxilinx 已提交
4769
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4770 4771
    // continue to get push data from the group result
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr) ||
4772
        ((isIntervalQuery(pQuery) && pQuery->rec.total < pQuery->limit.limit))) {
4773
      // todo limit the output for interval query?
4774
      pQuery->rec.rows = 0;
4775
      pQInfo->groupIndex = 0;  // always start from 0
4776

4777 4778
      if (pRuntimeEnv->windowResInfo.size > 0) {
        copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4779
        pQuery->rec.rows += pQuery->rec.rows;
4780

4781
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4782

4783
        if (pQuery->rec.rows > 0) {
S
slguan 已提交
4784
          qTrace("QInfo:%p %d rows returned from group results, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4785 4786 4787 4788
          return;
        }
      }
    }
4789

S
slguan 已提交
4790
    qTrace("QInfo:%p query over, %d rows are returned", pQInfo, pQuery->rec.total);
4791
    queryCostStatis(pQInfo);
4792 4793
    return;
  }
4794

H
hjxilinx 已提交
4795
  // number of points returned during this query
4796
  pQuery->rec.rows = 0;
4797
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
4798 4799 4800 4801 4802
  
  assert(pQInfo->groupInfo.numOfTables == 1);
  SArray* g = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
  SGroupItem* item = taosArrayGet(g, 0);
  
4803
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
[td-98]  
hjxilinx 已提交
4804
  if (isIntervalQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // interval (down sampling operation)
H
hjxilinx 已提交
4805
    tableIntervalProcess(pQInfo, item->info);
4806
  } else if (isFixedOutputQuery(pQuery)) {
H
hjxilinx 已提交
4807
    tableFixedOutputProcess(pQInfo, item->info);
4808 4809
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
H
hjxilinx 已提交
4810
    tableMultiOutputProcess(pQInfo, item->info);
4811
  }
4812

4813 4814
  // record the total elapsed time
  pQInfo->elapsedTime += (taosGetTimestampUs() - st);
4815
  assert(pQInfo->groupInfo.numOfTables == 1);
4816

4817
  /* check if query is killed or not */
4818
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
4819
    qTrace("QInfo:%p query is killed", pQInfo);
H
hjxilinx 已提交
4820 4821 4822
  } else {// todo set the table uid and tid in log
    qTrace("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
        pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4823 4824 4825 4826
    
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      queryCostStatis(pQInfo);
    }
4827 4828 4829
  }
}

4830 4831
static void stableQueryImpl(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4832
  pQuery->rec.rows = 0;
4833

4834
  int64_t st = taosGetTimestampUs();
4835

H
hjxilinx 已提交
4836
  if (isIntervalQuery(pQuery) ||
4837 4838
      (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) &&
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
4839
    multiTableQueryProcess(pQInfo);
4840
  } else {
4841
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
4842
            isFirstLastRowQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr));
4843

4844
    sequentialTableProcess(pQInfo);
4845
  }
4846

H
hjxilinx 已提交
4847
  // record the total elapsed time
4848
  pQInfo->elapsedTime += (taosGetTimestampUs() - st);
4849
  //  taosFillSetStartInfo(&pQInfo->runtimeEnv.pFillInfo, pQuery->size, pQInfo->query.fillType);
4850

4851
  if (pQuery->rec.rows == 0) {
4852 4853
    qTrace("QInfo:%p over, %d tables queried, %d points are returned", pQInfo, pQInfo->groupInfo.numOfTables,
           pQuery->rec.total);
4854
    //    queryCostStatis(pSupporter);
4855
  }
H
hjxilinx 已提交
4856 4857
}

4858
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
4859
  int32_t j = 0;
4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876
  
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
      
      j += 1;
    }
    
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
    
      j += 1;
4877 4878 4879
    }
  }

4880
  assert(0);
4881 4882
}

4883 4884 4885
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
4886 4887
}

4888
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
4889
  if (pQueryMsg->intervalTime < 0) {
4890
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
4891
    return false;
4892 4893
  }

H
hjxilinx 已提交
4894
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
4895
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
4896
    return false;
4897 4898
  }

H
hjxilinx 已提交
4899
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
4900
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
4901
    return false;
4902 4903
  }

4904 4905
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
4906
    return false;
4907 4908
  }

4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
      if (pExprMsg[i]->functionId != TSDB_FUNC_TAGPRJ) {
        return false;
      }
    }
  }
  
  return true;
4926 4927
}

4928
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
4929
  assert(pQueryMsg->numOfTables > 0);
4930

weixin_48148422's avatar
weixin_48148422 已提交
4931
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
4932

weixin_48148422's avatar
weixin_48148422 已提交
4933 4934
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
4935

4936
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
4937 4938
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
4939

H
hjxilinx 已提交
4940 4941 4942
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
4943

H
hjxilinx 已提交
4944 4945
  return pMsg;
}
4946

4947
/**
H
hjxilinx 已提交
4948
 * pQueryMsg->head has been converted before this function is called.
4949
 *
H
hjxilinx 已提交
4950
 * @param pQueryMsg
4951 4952 4953 4954
 * @param pTableIdList
 * @param pExpr
 * @return
 */
4955
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
4956
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
4957 4958 4959 4960 4961 4962 4963 4964
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
4965

4966 4967 4968
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
  pQueryMsg->queryType = htons(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
4969
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
4970 4971

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
4972
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
4973
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
4974 4975 4976
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
4977
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
4978
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
4979
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
4980

4981
  // query msg safety check
4982
  if (!validateQueryMsg(pQueryMsg)) {
4983 4984 4985
    return TSDB_CODE_INVALID_QUERY_MSG;
  }

H
hjxilinx 已提交
4986
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
4987

H
hjxilinx 已提交
4988
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
4989 4990
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
4991
    pColInfo->colId = htons(pColInfo->colId);
4992
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
4993 4994
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
4995

H
hjxilinx 已提交
4996
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
4997

H
hjxilinx 已提交
4998
    int32_t numOfFilters = pColInfo->numOfFilters;
4999
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5000
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5001 5002 5003
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5004 5005 5006 5007
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5008 5009 5010

      pMsg += sizeof(SColumnFilterInfo);

5011 5012
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5013

5014
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5015 5016
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5017
      } else {
5018 5019
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5020 5021
      }

5022 5023
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5024 5025 5026
    }
  }

5027 5028
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5029

5030
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5031
    (*pExpr)[i] = pExprMsg;
5032

5033
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5034 5035 5036 5037
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5038

5039
    pMsg += sizeof(SSqlFuncMsg);
5040 5041

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5042
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5043 5044 5045 5046
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5047
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5048 5049 5050 5051 5052
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

5053
    if (pExprMsg->functionId == TSDB_FUNC_TAG || pExprMsg->functionId == TSDB_FUNC_TAGPRJ ||
5054 5055 5056 5057 5058
               pExprMsg->functionId == TSDB_FUNC_TAG_DUMMY) {
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
        return TSDB_CODE_INVALID_QUERY_MSG;
      }
    } else {
5059 5060 5061
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_INVALID_QUERY_MSG;
//      }
5062 5063
    }

5064
    pExprMsg = (SSqlFuncMsg *)pMsg;
5065
  }
5066 5067 5068 5069 5070 5071
  
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
    tfree(*pExpr);
    
    return TSDB_CODE_INVALID_QUERY_MSG;
  }
5072

H
hjxilinx 已提交
5073
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5074

H
hjxilinx 已提交
5075
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5076 5077 5078 5079
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5080
      pMsg += sizeof((*groupbyCols)[i].colId);
5081 5082

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5083 5084
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5085
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5086 5087 5088 5089 5090
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5091

H
hjxilinx 已提交
5092 5093
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5094 5095
  }

5096 5097
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5098
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5099 5100

    int64_t *v = (int64_t *)pMsg;
5101
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5102 5103
      v[i] = htobe64(v[i]);
    }
5104

5105
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5106
  }
5107

5108 5109 5110 5111
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5112
      
5113 5114 5115 5116 5117 5118
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
      
      (*tagCols)[i] = *pTagCol;
5119
      pMsg += sizeof(SColumnInfo);
5120
    }
H
hjxilinx 已提交
5121
  }
5122

5123 5124 5125 5126 5127 5128
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5129
  
weixin_48148422's avatar
weixin_48148422 已提交
5130
  if (*pMsg != 0) {
5131 5132
    size_t len = strlen(pMsg) + 1;
    *tbnameCond = malloc(len);
weixin_48148422's avatar
weixin_48148422 已提交
5133
    strcpy(*tbnameCond, pMsg);
5134
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5135
  }
5136 5137 5138
  
  qTrace("qmsg:%p query %d tables, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, limit:%" PRId64 ", offset:%" PRId64,
5139
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5140
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
5141
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->limit, pQueryMsg->offset);
5142 5143 5144 5145

  return 0;
}

H
hjxilinx 已提交
5146
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5147
  qTrace("qmsg:%p create arithmetic expr from binary string", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5148 5149 5150 5151 5152 5153 5154 5155 5156

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5157
  if (pExprNode == NULL) {
5158
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5159 5160 5161
    return TSDB_CODE_APP_ERROR;
  }
  
5162
  pArithExprInfo->pExpr = pExprNode;
5163 5164 5165
  return TSDB_CODE_SUCCESS;
}

5166 5167 5168
static int32_t createSqlFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5169
  int32_t code = TSDB_CODE_SUCCESS;
5170

H
hjxilinx 已提交
5171
  SExprInfo *pExprs = (SExprInfo *)calloc(1, sizeof(SExprInfo) * pQueryMsg->numOfOutput);
5172 5173 5174 5175 5176 5177 5178
  if (pExprs == NULL) {
    return TSDB_CODE_SERV_OUT_OF_MEMORY;
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5179
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5180
    pExprs[i].base = *pExprMsg[i];
5181
    pExprs[i].bytes = 0;
5182 5183 5184 5185

    int16_t type = 0;
    int16_t bytes = 0;

5186
    // parse the arithmetic expression
5187
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5188
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5189

5190 5191 5192
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5193 5194
      }

5195
      type  = TSDB_DATA_TYPE_DOUBLE;
5196
      bytes = tDataTypeDesc[type].nSize;
5197
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {  // parse the normal column
5198
      type  = TSDB_DATA_TYPE_BINARY;
H
hjxilinx 已提交
5199
      bytes = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE;
5200
    } else{
5201
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
H
hjxilinx 已提交
5202
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
hjxilinx 已提交
5203

5204
      SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
5205 5206
      type = pCol->type;
      bytes = pCol->bytes;
5207 5208
    }

5209 5210
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5211
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5212 5213 5214 5215
      tfree(pExprs);
      return TSDB_CODE_INVALID_QUERY_MSG;
    }

5216
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5217
      tagLen += pExprs[i].bytes;
5218
    }
5219
    assert(isValidDataType(pExprs[i].type, pExprs[i].bytes));
5220 5221 5222
  }

  // TODO refactor
5223
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5224 5225
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5226
    
5227
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5228
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5229 5230 5231 5232 5233
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5234
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5235
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5236 5237 5238 5239
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }

5240
  tfree(pExprMsg);
5241
  *pExprInfo = pExprs;
5242 5243 5244 5245

  return TSDB_CODE_SUCCESS;
}

5246
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5247 5248 5249 5250 5251
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5252
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5253 5254 5255 5256 5257 5258 5259 5260 5261
  if (pGroupbyExpr == NULL) {
    *code = TSDB_CODE_SERV_OUT_OF_MEMORY;
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5262 5263 5264 5265 5266
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
  
5267 5268 5269
  return pGroupbyExpr;
}

5270
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5271
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5272
    if (pQuery->colList[i].numOfFilters > 0) {
5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5284
    if (pQuery->colList[i].numOfFilters > 0) {
5285 5286
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

H
hjxilinx 已提交
5287
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfoData));
5288 5289 5290
      pFilterInfo->info = pQuery->colList[i];
      
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5291 5292 5293 5294
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5295
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5296 5297 5298 5299 5300

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5301
          qError("QInfo:%p invalid filter info", pQInfo);
5302 5303 5304
          return TSDB_CODE_INVALID_QUERY_MSG;
        }

5305 5306
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5307

5308 5309 5310
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5311 5312

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5313
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5314 5315 5316
          return TSDB_CODE_INVALID_QUERY_MSG;
        }

5317
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5318
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
5319
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
S
slguan 已提交
5337
              qError("pQInfo:%p failed to get filter function, invalid filter condition", pQInfo, type);
5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354
              return TSDB_CODE_INVALID_QUERY_MSG;
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5355
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5356
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5357

5358
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5359
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5360
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5361 5362
      continue;
    }
5363

5364
    // todo opt performance
H
Haojun Liao 已提交
5365 5366
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5367 5368
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5369 5370
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5371 5372 5373
          break;
        }
      }
5374 5375
      
      assert (f < pQuery->numOfCols);
5376
    } else {
5377 5378
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5379 5380
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5381 5382
          break;
        }
5383
      }
5384 5385
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5386 5387 5388 5389
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5390 5391 5392 5393 5394 5395 5396 5397 5398 5399

static int compareTableIdInfo( const void* a, const void* b ) {
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5400
                               STableGroupInfo *groupInfo, SColumnInfo* pTagCols) {
5401 5402
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5403
    return NULL;
5404 5405 5406 5407 5408 5409
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5410
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5411

5412
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5413
  pQuery->numOfOutput     = numOfOutput;
5414 5415 5416
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5417
  pQuery->order.orderColId = pQueryMsg->orderColId;
5418 5419 5420 5421
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5422
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5423
  pQuery->fillType        = pQueryMsg->fillType;
5424
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5425

5426
  // todo do not allocate ??
5427
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5428
  if (pQuery->colList == NULL) {
5429
    goto _cleanup;
5430
  }
5431

H
hjxilinx 已提交
5432
  for (int16_t i = 0; i < numOfCols; ++i) {
5433
    pQuery->colList[i] = pQueryMsg->colList[i];
5434
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5435
  }
5436
  
5437
  pQuery->tagColList = pTagCols;
5438
  
5439
  // calculate the result row size
5440 5441 5442
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5443
  }
5444

5445
  doUpdateExprColumnIndex(pQuery);
5446

5447
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5448
  if (ret != TSDB_CODE_SUCCESS) {
5449
    goto _cleanup;
5450 5451 5452
  }

  // prepare the result buffer
5453
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5454
  if (pQuery->sdata == NULL) {
5455
    goto _cleanup;
5456 5457
  }

H
hjxilinx 已提交
5458
  // set the output buffer capacity
H
hjxilinx 已提交
5459
  pQuery->rec.capacity = 4096;
5460
  pQuery->rec.threshold = 4000;
5461

5462
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5463
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5464 5465

    // allocate additional memory for interResults that are usually larger then final results
5466 5467
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5468
    if (pQuery->sdata[col] == NULL) {
5469
      goto _cleanup;
5470 5471 5472
    }
  }

5473
  if (pQuery->fillType != TSDB_FILL_NONE) {
5474 5475
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5476
      goto _cleanup;
5477 5478 5479
    }

    // the first column is the timestamp
5480
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5481 5482 5483
  }

  // to make sure third party won't overwrite this structure
5484
  pQInfo->signature = pQInfo;
H
hjxilinx 已提交
5485 5486 5487 5488 5489 5490 5491
  
  pQInfo->tableIdGroupInfo = *groupInfo;
  size_t numOfGroups = taosArrayGetSize(groupInfo->pGroupList);
  
  pQInfo->groupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
  pQInfo->groupInfo.numOfTables = groupInfo->numOfTables;
  
weixin_48148422's avatar
weixin_48148422 已提交
5492 5493 5494
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
  taosArraySort( pTableIdList, compareTableIdInfo );
H
hjxilinx 已提交
5495 5496 5497 5498 5499 5500 5501
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray* pa = taosArrayGetP(groupInfo->pGroupList, i);
    size_t s = taosArrayGetSize(pa);
    
    SArray* p1 = taosArrayInit(s, sizeof(SGroupItem));
    
    for(int32_t j = 0; j < s; ++j) {
weixin_48148422's avatar
weixin_48148422 已提交
5502 5503 5504
      STableId id = *(STableId*) taosArrayGet(pa, j);
      SGroupItem item = { .id = id };
      // NOTE: compare STableIdInfo with STableId
5505
      STableIdInfo* pTableId = taosArraySearch( pTableIdList, &id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5506 5507 5508
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5509
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5510 5511
      }
      item.info = createTableQueryInfo(&pQInfo->runtimeEnv, item.id, window);
5512
      item.info->groupIndex = i;
weixin_48148422's avatar
weixin_48148422 已提交
5513
      item.info->tableIndex = tableIndex++;
H
hjxilinx 已提交
5514 5515 5516 5517
      taosArrayPush(p1, &item);
    }
    taosArrayPush(pQInfo->groupInfo.pGroupList, &p1);
  }
5518

weixin_48148422's avatar
weixin_48148422 已提交
5519 5520
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5521
  pQuery->pos = -1;
5522
  pQuery->window = pQueryMsg->window;
5523

5524
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
S
slguan 已提交
5525
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, strerror(errno));
5526
    goto _cleanup;
5527
  }
5528

5529
  colIdCheck(pQuery);
5530

S
slguan 已提交
5531
  qTrace("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5532 5533
  return pQInfo;

5534
_cleanup:
5535
  tfree(pQuery->fillVal);
5536 5537

  if (pQuery->sdata != NULL) {
5538
    for (int16_t col = 0; col < pQuery->numOfOutput; ++col) {
5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554
      tfree(pQuery->sdata[col]);
    }
  }

  tfree(pQuery->sdata);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);

  tfree(pExprs);
  tfree(pGroupbyExpr);

  tfree(pQInfo);

  return NULL;
}

H
hjxilinx 已提交
5555
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5556 5557 5558 5559
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5560

H
hjxilinx 已提交
5561 5562 5563 5564
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5565
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5566 5567 5568
  return (sig == (uint64_t)pQInfo);
}

H
hjxilinx 已提交
5569 5570
static void freeQInfo(SQInfo *pQInfo);

5571
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
5572
  int32_t code = TSDB_CODE_SUCCESS;
5573
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5574

H
hjxilinx 已提交
5575 5576 5577 5578
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
    char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset;
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5579

H
hjxilinx 已提交
5580 5581 5582
    tsBufResetPos(pTSBuf);
    tsBufNextPos(pTSBuf);
  }
5583

5584 5585 5586
  // only the successful complete requries the sem_post/over = 1 operations.
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
S
slguan 已提交
5587
    qTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5588
           pQuery->window.ekey, pQuery->order.order);
5589
    setQueryStatus(pQuery, QUERY_COMPLETED);
5590

5591 5592 5593
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5594 5595 5596 5597 5598 5599 5600 5601
  
  if (pQInfo->groupInfo.numOfTables == 0) {
    qTrace("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5602 5603

  // filter the qualified
5604
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5605 5606
    goto _error;
  }
H
hjxilinx 已提交
5607
  
H
hjxilinx 已提交
5608 5609 5610 5611
  return code;

_error:
  // table query ref will be decrease during error handling
5612
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5613 5614 5615 5616 5617 5618 5619
  return code;
}

static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5620 5621

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5622
  setQueryKilled(pQInfo);
5623

S
slguan 已提交
5624
  qTrace("QInfo:%p start to free QInfo", pQInfo);
5625
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5626 5627
    tfree(pQuery->sdata[col]);
  }
5628

H
hjxilinx 已提交
5629
  sem_destroy(&(pQInfo->dataReady));
5630
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5631

H
hjxilinx 已提交
5632 5633 5634 5635 5636 5637
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5638

H
hjxilinx 已提交
5639
  if (pQuery->pSelectExpr != NULL) {
5640
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5641
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5642

H
hjxilinx 已提交
5643 5644 5645
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5646
    }
5647

H
hjxilinx 已提交
5648 5649
    tfree(pQuery->pSelectExpr);
  }
5650

5651 5652
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5653
  }
5654

5655
  // todo refactor, extract method to destroytableDataInfo
H
hjxilinx 已提交
5656
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
5657 5658
  for (int32_t i = 0; i < numOfGroups; ++i) {
    SArray *p = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
5659 5660 5661
    
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
5662 5663 5664
      SGroupItem* item = taosArrayGet(p, j);
      if (item->info != NULL) {
        destroyTableQueryInfo(item->info, pQuery->numOfOutput);
5665 5666
      }
    }
H
hjxilinx 已提交
5667
    
H
hjxilinx 已提交
5668 5669
    taosArrayDestroy(p);
  }
5670
  
H
hjxilinx 已提交
5671 5672 5673 5674 5675 5676 5677 5678
  taosArrayDestroy(pQInfo->groupInfo.pGroupList);
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray* p = taosArrayGetP(pQInfo->tableIdGroupInfo.pGroupList, i);
    taosArrayDestroy(p);
  }
  
  taosArrayDestroy(pQInfo->tableIdGroupInfo.pGroupList);
weixin_48148422's avatar
weixin_48148422 已提交
5679
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5680
  
5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
  
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);
  tfree(pQuery->sdata);
  
  tfree(pQuery);
  
S
slguan 已提交
5693
  qTrace("QInfo:%p QInfo is freed", pQInfo);
5694

5695
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5696 5697 5698 5699
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5700
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5701 5702
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5714
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5715 5716 5717 5718
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5719
  }
H
hjxilinx 已提交
5720
}
5721

H
hjxilinx 已提交
5722 5723 5724
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5725

H
hjxilinx 已提交
5726 5727 5728
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5729

H
hjxilinx 已提交
5730 5731 5732
    // make sure file exist
    if (FD_VALID(fd)) {
      size_t s = lseek(fd, 0, SEEK_END);
S
slguan 已提交
5733
      qTrace("QInfo:%p ts comp data return, file:%s, size:%zu", pQInfo, pQuery->sdata[0]->data, s);
5734

H
hjxilinx 已提交
5735 5736 5737
      lseek(fd, 0, SEEK_SET);
      read(fd, data, s);
      close(fd);
5738

H
hjxilinx 已提交
5739 5740
      unlink(pQuery->sdata[0]->data);
    } else {
H
hjxilinx 已提交
5741
      // todo return the error code to client
S
slguan 已提交
5742
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5743 5744
             pQuery->sdata[0]->data, strerror(errno));
    }
H
hjxilinx 已提交
5745 5746 5747 5748 5749
  
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5750
  } else {
5751
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5752
  }
5753

5754
  pQuery->rec.total += pQuery->rec.rows;
5755
  qTrace("QInfo:%p current numOfRes rows:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5756

5757 5758 5759 5760 5761
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
    qTrace("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
5762
  return TSDB_CODE_SUCCESS;
5763

H
hjxilinx 已提交
5764
  // todo if interpolation exists, the result may be dump to client by several rounds
5765 5766
}

5767
int32_t qCreateQueryInfo(void *tsdb, int32_t vgId, SQueryTableMsg *pQueryMsg, qinfo_t *pQInfo) {
H
hjxilinx 已提交
5768
  assert(pQueryMsg != NULL);
5769 5770

  int32_t code = TSDB_CODE_SUCCESS;
5771

weixin_48148422's avatar
weixin_48148422 已提交
5772
  char *        tagCond = NULL, *tbnameCond = NULL;
5773
  SArray *      pTableIdList = NULL;
5774
  SSqlFuncMsg **pExprMsg = NULL;
5775 5776
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
5777

weixin_48148422's avatar
weixin_48148422 已提交
5778
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
5779
         TSDB_CODE_SUCCESS) {
5780 5781 5782
    return code;
  }

H
hjxilinx 已提交
5783
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5784
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
5785
    code = TSDB_CODE_INVALID_QUERY_MSG;
H
hjxilinx 已提交
5786
    goto _over;
5787 5788
  }

H
hjxilinx 已提交
5789
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
5790
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
5791
    code = TSDB_CODE_INVALID_QUERY_MSG;
H
hjxilinx 已提交
5792
    goto _over;
5793 5794
  }

H
hjxilinx 已提交
5795
  SExprInfo *pExprs = NULL;
5796
  if ((code = createSqlFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5797
    goto _over;
5798 5799
  }

5800
  SSqlGroupbyExpr *pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
5801
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5802
    goto _over;
5803
  }
5804

H
hjxilinx 已提交
5805
  bool isSTableQuery = false;
H
hjxilinx 已提交
5806
  STableGroupInfo groupInfo = {0};
5807
  
H
hjxilinx 已提交
5808
  //todo multitable_query??
5809 5810 5811
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_TABLE_QUERY)) {
    isSTableQuery = TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY);
    
weixin_48148422's avatar
weixin_48148422 已提交
5812
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
5813 5814
    qTrace("qmsg:%p query table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
    
5815
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &groupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5816
      goto _over;
5817 5818
    }
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_STABLE_QUERY)) {
5819
    isSTableQuery = true;
weixin_48148422's avatar
weixin_48148422 已提交
5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844
    // TODO: need a macro from TSDB to check if table is super table,
    // also note there's possiblity that only one table in the super table
    if (taosArrayGetSize(pTableIdList) == 1) {
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);
      // if array size is 1 and assert super table

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
      // todo handle the error
      /*int32_t ret =*/tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &groupInfo, pGroupColIndex,
                                          numOfGroupByCols);
    } else {
      SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);

      SArray* sa = taosArrayInit(groupInfo.numOfTables, sizeof(STableId));
      for(int32_t i = 0; i < groupInfo.numOfTables; ++i) {
        STableIdInfo* tableId = taosArrayGet(pTableIdList, i);
        taosArrayPush(sa, tableId);
      }
      taosArrayPush(pTableGroup, &sa);
      groupInfo.pGroupList = pTableGroup;
5845
    }
H
hjxilinx 已提交
5846
  } else {
5847
    assert(0);
5848
  }
5849

weixin_48148422's avatar
weixin_48148422 已提交
5850
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &groupInfo, pTagColumnInfo);
5851 5852
  if ((*pQInfo) == NULL) {
    code = TSDB_CODE_SERV_OUT_OF_MEMORY;
H
hjxilinx 已提交
5853
    goto _over;
5854
  }
5855

5856
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
5857

H
hjxilinx 已提交
5858
_over:
weixin_48148422's avatar
weixin_48148422 已提交
5859 5860
  tfree(tagCond);
  tfree(tbnameCond);
H
hjxilinx 已提交
5861
  taosArrayDestroy(pTableIdList);
5862
  
5863
  // if failed to add ref for all meters in this query, abort current query
5864
  return code;
H
hjxilinx 已提交
5865 5866
}

H
hjxilinx 已提交
5867
void qDestroyQueryInfo(qinfo_t pQInfo) {
S
slguan 已提交
5868
  qTrace("QInfo:%p query completed", pQInfo);
5869 5870 5871
  freeQInfo(pQInfo);
}

H
hjxilinx 已提交
5872
void qTableQuery(qinfo_t qinfo) {
5873 5874
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5875
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
5876
    qTrace("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
5877 5878
    return;
  }
5879

H
hjxilinx 已提交
5880
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
5881
    qTrace("QInfo:%p it is already killed, abort", pQInfo);
H
hjxilinx 已提交
5882 5883
    return;
  }
5884

S
slguan 已提交
5885
  qTrace("QInfo:%p query task is launched", pQInfo);
H
hjxilinx 已提交
5886 5887
  
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
hjxilinx 已提交
5888
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
5889
  } else if (pQInfo->runtimeEnv.stableQuery) {
5890
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
5891
  } else {
5892
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
5893
  }
H
hjxilinx 已提交
5894 5895
  
  sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
5896 5897 5898
  //  vnodeDecRefCount(pQInfo);
}

H
hjxilinx 已提交
5899
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
5900 5901
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5902
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
hjxilinx 已提交
5903 5904
    return TSDB_CODE_INVALID_QHANDLE;
  }
5905

H
hjxilinx 已提交
5906
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5907
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
5908
    qTrace("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
5909
    return pQInfo->code;
H
hjxilinx 已提交
5910
  }
5911

H
hjxilinx 已提交
5912
  sem_wait(&pQInfo->dataReady);
S
slguan 已提交
5913
  qTrace("QInfo:%p retrieve result info, rowsize:%d, rows:%d, code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
5914 5915
         pQInfo->code);

H
hjxilinx 已提交
5916
  return pQInfo->code;
H
hjxilinx 已提交
5917
}
5918

H
hjxilinx 已提交
5919
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
5920 5921
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5922 5923 5924
  if (pQInfo == NULL || pQInfo->signature != pQInfo || pQInfo->code != TSDB_CODE_SUCCESS) {
    return false;
  }
5925 5926

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5927 5928 5929 5930 5931 5932 5933 5934
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
    return false;
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    return true;
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    return true;
  } else {
    assert(0);
5935 5936 5937
  }
}

5938 5939 5940
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5941
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
5942 5943
    return TSDB_CODE_INVALID_QHANDLE;
  }
5944 5945 5946

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
5947 5948
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
5949
  *contLen = size + sizeof(SRetrieveTableRsp);
5950

5951 5952
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
5953
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
5954

5955 5956 5957 5958 5959 5960 5961 5962
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
    (*pRsp)->useconds = htobe64(pQInfo->elapsedTime);
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
5963

5964
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5965
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
5966
  } else {
H
hjxilinx 已提交
5967
    setQueryStatus(pQuery, QUERY_OVER);
5968
    code = pQInfo->code;
5969
  }
5970

H
hjxilinx 已提交
5971
  if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
5972
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
5973
  }
5974

H
hjxilinx 已提交
5975
  return code;
5976 5977 5978 5979 5980 5981

  //  if (numOfRows == 0 && (pRetrieve->qhandle == (uint64_t)pObj->qhandle) && (code != TSDB_CODE_ACTION_IN_PROGRESS)) {
  //    qTrace("QInfo:%p %s free qhandle code:%d", pObj->qhandle, __FUNCTION__, code);
  //    vnodeDecRefCount(pObj->qhandle);
  //    pObj->qhandle = NULL;
  //  }
5982
}
H
hjxilinx 已提交
5983 5984 5985 5986 5987 5988

static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
  
  size_t num = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
5989 5990 5991 5992
  assert(num == 0 || num == 1);
  if (num == 0) {
    return;
  }
H
hjxilinx 已提交
5993 5994 5995 5996
  
  SArray* pa = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
  num = taosArrayGetSize(pa);
  
5997
  assert(num == pQInfo->groupInfo.numOfTables);
H
hjxilinx 已提交
5998 5999
  int16_t type, bytes;
  
6000 6001 6002 6003 6004 6005
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
  
    int32_t rsize = pExprInfo->bytes;
H
hjxilinx 已提交
6006
    char* data = NULL;
6007 6008 6009 6010 6011
    
    for(int32_t i = 0; i < num; ++i) {
      SGroupItem* item = taosArrayGet(pa, i);
    
      char* output = pQuery->sdata[0]->data + i * rsize;
6012 6013 6014 6015
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
      
      output = varDataVal(output);
      *(int64_t*) output = item->id.uid;  // memory align problem, todo serialize
6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026
      output += sizeof(item->id.uid);
      
      *(int32_t*) output = item->id.tid;
      output += sizeof(item->id.tid);
      
      *(int32_t*) output = pQInfo->vgId;
      output += sizeof(pQInfo->vgId);
      
      tsdbGetTableTagVal(pQInfo->tsdb, &item->id, pExprInfo->base.colInfo.colId, &type, &bytes, &data);
      memcpy(output, data, bytes);
    }
H
hjxilinx 已提交
6027
  
H
hjxilinx 已提交
6028
    qTrace("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, num);
6029 6030 6031 6032 6033 6034 6035
  } else {  // return only the tags|table name etc.
    for(int32_t i = 0; i < num; ++i) {
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
      SGroupItem* item = taosArrayGet(pa, i);
    
      char* data = NULL;
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
hjxilinx 已提交
6036
        // todo check the return value, refactor codes
6037
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
H
hjxilinx 已提交
6038
          data = tsdbGetTableName(pQInfo->tsdb, &item->id, &bytes);
H
hjxilinx 已提交
6039
          
6040
          char* dst = pQuery->sdata[j]->data + i * (TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE);
H
hjxilinx 已提交
6041
          memcpy(dst, data, varDataTLen(data));
H
hjxilinx 已提交
6042
        } else {// todo refactor, return the true length of binary|nchar data
6043
          tsdbGetTableTagVal(pQInfo->tsdb, &item->id, pExprInfo[j].base.colInfo.colId, &type, &bytes, &data);
6044
          assert(bytes <= pExprInfo[j].bytes && type == pExprInfo[j].type);
H
hjxilinx 已提交
6045
          
6046
          char* dst = pQuery->sdata[j]->data + i * pExprInfo[j].bytes;
H
hjxilinx 已提交
6047 6048 6049 6050 6051
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
            memcpy(dst, data, varDataTLen(data));
          } else {
            memcpy(dst, data, bytes);
          }
6052
        }
H
hjxilinx 已提交
6053
      }
H
hjxilinx 已提交
6054
    }
6055
  
H
Haojun Liao 已提交
6056
    pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
H
hjxilinx 已提交
6057
    qTrace("QInfo:%p create tag values results completed, rows:%d", pQInfo, num);
H
hjxilinx 已提交
6058
  }
H
hjxilinx 已提交
6059 6060 6061
  
  pQuery->rec.rows = num;
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6062 6063
}