qExecutor.c 203.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
15
#include "qfill.h"
16 17 18 19
#include "os.h"

#include "hash.h"
#include "hashfunc.h"
20 21
#include "qExecutor.h"
#include "qUtil.h"
H
hjxilinx 已提交
22
#include "qast.h"
23
#include "qresultBuf.h"
H
hjxilinx 已提交
24
#include "query.h"
S
slguan 已提交
25
#include "queryLog.h"
26
#include "taosmsg.h"
27
#include "tdataformat.h"
28
#include "tlosertree.h"
29
#include "tscUtil.h"  // todo move the function to common module
30 31
#include "tscompression.h"
#include "ttime.h"
32 33 34 35 36 37 38 39 40

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

46
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50 51 52

/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
53 54
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
55

56
enum {
H
hjxilinx 已提交
57
  // when query starts to execute, this status will set
58 59
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
60 61
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
62
   */
63 64
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
65 66 67
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
68
   */
69
  QUERY_COMPLETED = 0x4u,
70

H
hjxilinx 已提交
71 72
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
73
   */
74
  QUERY_OVER = 0x8u,
75
};
76 77

enum {
78 79
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
80 81 82
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

83
typedef struct {
84 85 86 87 88 89
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
90 91
} SQueryStatusInfo;

92
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
93
static void setQueryStatus(SQuery *pQuery, int8_t status);
94

H
hjxilinx 已提交
95
static bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; }
96

H
hjxilinx 已提交
97
// todo move to utility
98
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
99

H
hjxilinx 已提交
100
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
101 102 103
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
104

105 106 107
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

108
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
109
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
110 111
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
112
static void buildTagQueryResult(SQInfo *pQInfo);
113

H
hjxilinx 已提交
114
static int32_t setAdditionalInfo(SQInfo *pQInfo, STableId *pTableId, STableQueryInfo *pTableQueryInfo);
115
static int32_t flushFromResultBuf(SQInfo *pQInfo);
116

117
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
118 119
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
120

121 122
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
123 124
      return false;
    }
125

126 127
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
128
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
129

130 131 132 133 134
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
135

136 137 138 139
    if (!qualified) {
      return false;
    }
  }
140

141 142 143 144 145 146
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
147

148
  int64_t maxOutput = 0;
149
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
150
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
151

152 153 154 155 156 157 158 159
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
160

161 162 163 164 165
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
166

167
  assert(maxOutput >= 0);
168 169 170
  return maxOutput;
}

171 172 173 174 175 176 177 178 179
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
180 181 182 183 184 185 186
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
187 188 189 190
    pResInfo->numOfRes = numOfRes;
  }
}

191 192 193 194 195 196 197 198 199
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
200

201
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
202
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
203 204 205 206 207
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
208
        assert(pColIndex->colIndex > 0);
209
      }
210

211 212 213
      return true;
    }
  }
214

215 216 217 218 219
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
220

221 222
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
223

224
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
225
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
226 227 228 229 230
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
231

232
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
233 234
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
235 236 237
      break;
    }
  }
238

239 240 241 242 243 244
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
245

246
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
247
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
248 249 250 251
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
252

253 254 255 256
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
257

258 259 260
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
261

262 263 264
  return false;
}

265
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
266

267 268 269 270
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
271 272
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
273 274 275 276
    
    qTrace("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%d, total:%"PRId64,
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
277 278 279
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
280

281 282 283 284
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
285
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
286
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
287 288 289
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
290

291 292 293 294
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
295

296 297 298
  return false;
}

H
Haojun Liao 已提交
299
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, int32_t numOfCols, int32_t index) {
300
  // for a tag column, no corresponding field info
H
Haojun Liao 已提交
301 302
  SColIndex *pColIndex = &pQuery->pSelectExpr[index].base.colInfo;
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
303 304
    return NULL;
  }
H
Haojun Liao 已提交
305
  
306 307 308
  /*
   * Choose the right column field info by field id, since the file block may be out of date,
   * which means the newest table schema is not equalled to the schema of this block.
H
Haojun Liao 已提交
309
   * TODO: speedup by using bsearch
310
   */
H
Haojun Liao 已提交
311 312
  for (int32_t i = 0; i < numOfCols; ++i) {
    if (pColIndex->colId == pStatis[i].colId) {
313 314 315
      return &pStatis[i];
    }
  }
H
Haojun Liao 已提交
316
  
317 318 319
  return NULL;
}

320 321 322 323 324 325 326 327
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
328
static bool hasNullValue(SQuery *pQuery, int32_t col, int32_t numOfCols, SDataStatis *pStatis, SDataStatis **pColStatis) {
329
  SColIndex *pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
330
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
331 332
    return false;
  }
333

334 335 336 337
  // query on primary timestamp column, not null value at all
  if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }
338

339
  if (pStatis != NULL) {
H
Haojun Liao 已提交
340
    *pColStatis = getStatisInfo(pQuery, pStatis, numOfCols, col);
H
hjxilinx 已提交
341 342
  } else {
    *pColStatis = NULL;
343
  }
344

345 346 347
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
348

349 350 351 352 353 354
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
                                             int16_t bytes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
355

356 357 358 359 360 361
  int32_t *p1 = (int32_t *)taosHashGet(pWindowResInfo->hashList, pData, bytes);
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
  } else {  // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 2;
362

363 364 365 366 367 368 369
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
      } else {
        // todo
      }
370

371 372 373 374
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        SPosInfo pos = {-1, -1};
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
      }
375

376 377
      pWindowResInfo->capacity = newCap;
    }
378

379 380 381 382
    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
  }
383

384 385 386 387 388 389
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
390

391 392 393 394 395 396 397
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
398

399 400
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
401

402 403 404
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
405

406 407 408 409
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
410

411 412 413
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
414

415 416 417 418 419 420 421
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
422

423
  assert(ts >= w.skey && ts <= w.ekey);
424

425 426 427 428 429 430 431 432
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
433

434
  tFilePage *pData = NULL;
435

436 437 438
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
439

440 441 442 443 444
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
    pData = getResultBufferPageById(pResultBuf, pageId);
445

446
    if (pData->num >= numOfRowsPerPage) {
447 448
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
449
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
450 451 452
      }
    }
  }
453

454 455 456
  if (pData == NULL) {
    return -1;
  }
457

458 459 460
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
461
    pWindowRes->pos.rowId = pData->num++;
462
  }
463

464 465 466 467 468 469 470
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
                                       STimeWindow *win) {
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
471

472 473 474 475
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE);
  if (pWindowRes == NULL) {
    return -1;
  }
476

477 478 479 480 481 482 483
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
484

485 486
  // set time window for current result
  pWindowRes->window = *win;
487

488 489
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
490

491 492 493 494 495 496 497 498
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
499
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
500
                                      int16_t order, int64_t *pData) {
H
Haojun Liao 已提交
501
  int32_t endPos = searchFn((char *)pData, numOfRows, ekey, order);
502
  int32_t forwardStep = 0;
503

504
  if (endPos >= 0) {
505
    forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
506
    assert(forwardStep >= 0);
507

508 509 510 511 512
    // endPos data is equalled to the key so, we do need to read the element in endPos
    if (pData[endPos] == ekey) {
      forwardStep += 1;
    }
  }
513

514 515 516 517 518 519
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
520
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
521 522
  SQuery *pQuery = pRuntimeEnv->pQuery;
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) {
523
    return pWindowResInfo->size;
524
  }
525

526
  // no qualified results exist, abort check
527 528
  int32_t numOfClosed = 0;
  
529
  if (pWindowResInfo->size == 0) {
530
    return pWindowResInfo->size;
531
  }
532

533
  // query completed
H
hjxilinx 已提交
534 535
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
536
    closeAllTimeWindow(pWindowResInfo);
537

538 539 540 541
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
542
    int64_t skey = TSKEY_INITIAL_VAL;
543

544 545 546
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
547
        numOfClosed += 1;
548 549
        continue;
      }
550

551 552 553 554 555 556 557 558
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
559

560
    // all windows are closed, set the last one to be the skey
561
    if (skey == TSKEY_INITIAL_VAL) {
562 563 564 565 566
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
567

568
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
569

570 571 572 573 574
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
      qTrace("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
575
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
576 577 578
    } else {
      qTrace("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
             numOfClosed);
579 580
    }
  }
581 582 583 584 585 586 587
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
588
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
589
  return numOfClosed;
590 591 592
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
593
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
594
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
595

596 597 598
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
599

H
hjxilinx 已提交
600 601
  STableQueryInfo* item = pQuery->current;
  
602 603
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
604
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
605 606 607 608
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
609
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
610 611 612
        }
      }
    } else {
613
      num = pDataBlockInfo->rows - startPos;
614
      if (updateLastKey) {
H
hjxilinx 已提交
615
        item->lastKey = pDataBlockInfo->window.ekey + step;
616 617 618 619
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
620
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
621 622 623 624
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
625
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
626 627 628 629 630
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
631
        item->lastKey = pDataBlockInfo->window.skey + step;
632 633 634
      }
    }
  }
635

636 637 638 639 640
  assert(num >= 0);
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
641
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
642 643
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
644

645
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
646
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
647
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
648

649 650
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
H
Haojun Liao 已提交
651
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
652

653
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
654
        pCtx[k].ptsList = tsBuf;
655
      }
656

H
Haojun Liao 已提交
657 658 659 660 661
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
      
662 663 664 665 666 667 668 669 670 671 672
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
673

674
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
675
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
676
      pCtx[k].nStartQueryTimestamp = pWin->skey;
677

678
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
679 680 681 682 683 684 685 686
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
    }
  }
}

static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
687 688
                                      SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
                                      __block_search_fn_t searchFn) {
689
  SQuery *pQuery = pRuntimeEnv->pQuery;
690

691 692 693 694 695
  while (1) {
    if ((pNextWin->ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
        (pNextWin->skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
      return -1;
    }
696

697
    getNextTimeWindow(pQuery, pNextWin);
698

699 700 701 702 703
    // next time window is not in current block
    if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
        (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
      return -1;
    }
704

705 706 707 708 709 710 711 712 713 714 715 716
    TSKEY startKey = -1;
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      startKey = pNextWin->skey;
      if (startKey < pQuery->window.skey) {
        startKey = pQuery->window.skey;
      }
    } else {
      startKey = pNextWin->ekey;
      if (startKey > pQuery->window.skey) {
        startKey = pQuery->window.skey;
      }
    }
717

718
    int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
719

720 721 722 723 724 725 726 727
    /*
     * This time window does not cover any data, try next time window,
     * this case may happen when the time window is too small
     */
    if ((primaryKeys[startPos] > pNextWin->ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
        (primaryKeys[startPos] < pNextWin->skey && !QUERY_IS_ASC_QUERY(pQuery))) {
      continue;
    }
728

729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
    return startPos;
  }
}

static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
746

747 748 749
  return ekey;
}

H
hjxilinx 已提交
750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
765
                    SArray *pDataBlock) {
766
  char *dataBlock = NULL;
767
  SQuery *pQuery = pRuntimeEnv->pQuery;
768

769
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
770

771
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
772
  if (functionId == TSDB_FUNC_ARITHM) {
773
    sas->pArithExpr = &pQuery->pSelectExpr[col];
774

775 776 777 778 779 780
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
781

782 783 784 785
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
786

787
    // here the pQuery->colList and sas->colList are identical
788
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
789
      SColumnInfo *pColMsg = &pQuery->colList[i];
790

791
      int32_t numOfCols = taosArrayGetSize(pDataBlock);
792

793 794 795 796 797 798 799 800
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
801

802
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
803
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
804
    }
805

806
  } else {  // other type of query function
807
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
808
    if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
809 810
      dataBlock = NULL;
    } else {
H
hjxilinx 已提交
811
      dataBlock = getDataBlockImpl(pDataBlock, pCol->colId);
812 813
    }
  }
814

815 816 817 818 819 820 821
  return dataBlock;
}

/**
 *
 * @param pRuntimeEnv
 * @param forwardStep
822
 * @param tsCols
823 824 825 826 827
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
828
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
829 830
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
831
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
832 833 834
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
835
  if (pDataBlock != NULL) {
836
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
837
    tsCols = (TSKEY *)(pColInfo->pData);
838
  }
839

840
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
841

842
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
843
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
844
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
845
  }
846

847 848 849
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  if (isIntervalQuery(pQuery)) {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
850
    TSKEY   ts = tsCols[offset];
851

852
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
hjxilinx 已提交
853
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
854
      return;
855
    }
856

857 858
    TSKEY   ekey = reviseWindowEkey(pQuery, &win);
    int32_t forwardStep =
859
        getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
860

861
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
862
    doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, tsCols, pDataBlockInfo->rows);
863

864 865
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
866

867
    while (1) {
868
      int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn);
869 870 871
      if (startPos < 0) {
        break;
      }
872

873
      // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
874
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
875 876
        break;
      }
877

878
      ekey = reviseWindowEkey(pQuery, &nextWin);
879
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
880

881
      pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
882
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
883
    }
884

885 886 887 888 889 890 891
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
892
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
893
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
894 895 896 897 898
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
899

900 901 902 903
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
904

905 906
    tfree(sasArray[i].data);
  }
907

908 909 910 911 912 913 914
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
915

916
  int32_t GROUPRESULTID = 1;
917

918
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
919

920 921 922 923
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes);
  if (pWindowRes == NULL) {
    return -1;
  }
924

925 926 927 928 929 930 931
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
932

933 934 935 936 937
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

938
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
939
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
940

941
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
942 943
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
944 945
      continue;
    }
946

947
    int16_t colIndex = -1;
948
    int32_t colId = pColIndex->colId;
949

950
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
951
      if (pQuery->colList[i].colId == colId) {
952 953 954 955
        colIndex = i;
        break;
      }
    }
956

957
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
958

959 960
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
961 962 963 964 965 966
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
967

968 969 970 971 972 973
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
974
  }
975

976
  return NULL;
977 978 979 980
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
981

982 983
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
984

985 986 987 988
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
989

990 991 992
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
993 994
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
995 996
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
997

998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1011

1012 1013 1014 1015 1016
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1017 1018
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
1019 1020 1021
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1022

H
hjxilinx 已提交
1023 1024 1025 1026 1027 1028
  if (functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST) {
    return !QUERY_IS_ASC_QUERY(pQuery);
  } else if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
    return QUERY_IS_ASC_QUERY(pQuery);
  }
  
1029
  // in the supplementary scan, only the following functions need to be executed
H
hjxilinx 已提交
1030
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {// && (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS)) {
1031 1032
    return false;
  }
1033

1034 1035 1036
  return true;
}

1037 1038
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1039
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1040

1041
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1042 1043
  STableQueryInfo* item = pQuery->current;
  
1044
  TSKEY  *tsCols = (TSKEY*) ((SColumnInfoData *)taosArrayGet(pDataBlock, 0))->pData;
H
hjxilinx 已提交
1045
  bool    groupbyStateValue = isGroupbyNormalCol(pQuery->pGroupbyExpr);
1046
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
1047

1048 1049
  int16_t type = 0;
  int16_t bytes = 0;
1050

1051 1052
  char *groupbyColumnData = NULL;
  if (groupbyStateValue) {
1053
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1054
  }
1055

1056
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1057
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1058
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1059
  }
1060

1061 1062
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1063
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1064 1065
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1066
  }
1067

1068
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1069

1070 1071 1072 1073
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery);
1074
    qTrace("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1075 1076
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1077

1078
  int32_t j = 0;
H
hjxilinx 已提交
1079 1080
  int32_t offset = -1;
  
1081
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1082
    offset = GET_COL_DATA_POS(pQuery, j, step);
1083

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1094

1095
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1096 1097
      continue;
    }
1098

1099 1100 1101
    // interval window query
    if (isIntervalQuery(pQuery)) {
      // decide the time window according to the primary timestamp
1102
      int64_t     ts = tsCols[offset];
1103
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1104

H
hjxilinx 已提交
1105
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win);
1106 1107 1108
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1109

1110 1111
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1112

1113 1114
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1115

1116 1117
      while (1) {
        getNextTimeWindow(pQuery, &nextWin);
H
Haojun Liao 已提交
1118
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1119
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1120
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1121 1122
          break;
        }
1123

1124 1125 1126
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1127

1128
        // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
1129
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
1130 1131
          break;
        }
1132

1133 1134 1135
        pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
        doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
      }
1136

1137 1138 1139 1140
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
      if (groupbyStateValue) {
H
hjxilinx 已提交
1141
        char *val = groupbyColumnData + bytes * offset;
1142

H
hjxilinx 已提交
1143
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1144 1145 1146 1147
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1148

1149
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1150
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1151 1152 1153 1154 1155
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1156

1157 1158 1159
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1160
        setQueryStatus(pQuery, QUERY_COMPLETED);
1161 1162 1163 1164
        break;
      }
    }
  }
1165
  
1166
  item->lastKey = tsCols[offset] + step;
1167 1168 1169 1170 1171 1172
  
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1173

1174 1175
    tfree(sasArray[i].data);
  }
1176

1177 1178 1179 1180
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1181
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1182
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1183 1184 1185
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1186
  
1187
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1188
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1189
  } else {
1190
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1191
  }
1192 1193

  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1194
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1195 1196


1197
  // interval query with limit applied
1198 1199 1200 1201 1202 1203
  int32_t numOfRes = 0;
  
  if (isIntervalQuery(pQuery)) {
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1204

1205 1206 1207 1208
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1209

1210 1211 1212
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1213

1214 1215 1216
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1217
    }
1218
  }
1219

1220
  return numOfRes;
1221 1222
}

H
Haojun Liao 已提交
1223
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1224 1225 1226 1227 1228 1229 1230
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
  pCtx->hasNull = hasNullValue(pQuery, colIndex, pBlockInfo->numOfCols, pStatis, &tpField);
1231
  pCtx->aInputElemBuf = inputData;
1232

1233
  if (tpField != NULL) {
H
Haojun Liao 已提交
1234
    pCtx->preAggVals.isSet  = true;
1235 1236
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1237 1238 1239
  } else {
    pCtx->preAggVals.isSet = false;
  }
1240

H
Haojun Liao 已提交
1241 1242 1243
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1244

1245 1246
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1247
    pCtx->ptsList = tsCol;
1248
  }
1249

1250 1251 1252 1253 1254
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1255
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1256
    /*
H
Haojun Liao 已提交
1257
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1258 1259 1260 1261 1262 1263 1264 1265 1266 1267
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1268

1269 1270
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1271 1272 1273 1274 1275 1276
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1290
  }
1291

1292 1293 1294 1295 1296 1297
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
S
slguan 已提交
1298
//        qTrace("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1299 1300 1301
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
S
slguan 已提交
1302
//        qTrace("QInfo:%p block not loaded, bstatus:%d",
1303 1304 1305 1306 1307 1308 1309 1310
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) {
  if (isSelectivityWithTagsQuery(pQuery)) {
1311
    int32_t num = 0;
1312
    int16_t tagLen = 0;
1313 1314
    
    SQLFunctionCtx *p = NULL;
1315
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
1316
    
1317
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1318
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1319
      
1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
1333

1334 1335 1336 1337 1338 1339 1340
    p->tagInfo.pTagCtxList = pTagCtx;
    p->tagInfo.numOfTagCols = num;
    p->tagInfo.tagsLen = tagLen;
  }
}

static void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
1341
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1342 1343
    assert(pQuery->pSelectExpr[i].interBytes <= DEFAULT_INTERN_BUF_PAGE_SIZE);
    
1344
    setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interBytes, isStableQuery);
1345 1346 1347
  }
}

1348
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
S
slguan 已提交
1349
  qTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1350 1351
  SQuery *pQuery = pRuntimeEnv->pQuery;

1352 1353
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1354

1355
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1356
    goto _clean;
1357
  }
1358

1359
  pRuntimeEnv->offset[0] = 0;
1360
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1361
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1362

1363
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1364
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1365

1366 1367
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1368
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
hjxilinx 已提交
1369
        pCtx->inputBytes = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE;
1370 1371 1372 1373 1374
        pCtx->inputType = TSDB_DATA_TYPE_BINARY;
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1375
      
1376 1377 1378 1379
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1380 1381
  
    assert(isValidDataType(pCtx->inputType, pCtx->inputBytes));
1382
    pCtx->ptsOutputBuf = NULL;
1383

1384 1385
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1386

1387 1388
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1389

1390 1391 1392 1393 1394 1395 1396 1397 1398 1399
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1400

1401 1402
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1403

1404
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1405
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1406
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1407

1408 1409 1410 1411
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1412

1413 1414
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1415

1416 1417 1418 1419
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1420

1421
  // set the intermediate result output buffer
1422
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
1423

1424
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1425
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1426 1427
    resetCtxOutputBuf(pRuntimeEnv);
  }
1428

1429 1430
  setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx);
  return TSDB_CODE_SUCCESS;
1431

1432
_clean:
1433 1434
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1435

1436 1437 1438 1439 1440 1441 1442
  return TSDB_CODE_SERV_OUT_OF_MEMORY;
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1443

1444
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1445
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1446

H
hjxilinx 已提交
1447
  qTrace("QInfo:%p teardown runtime env", pQInfo);
1448
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1449

1450
  if (pRuntimeEnv->pCtx != NULL) {
1451
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1452
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1453

1454 1455 1456
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1457

1458 1459 1460 1461
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
      tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
    }
1462

1463 1464 1465
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1466

1467
  taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1468

H
hjxilinx 已提交
1469
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1470
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1471
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1472

1473 1474 1475
  pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
}

1476 1477
static bool isQueryKilled(SQInfo *pQInfo) {
  return (pQInfo->code == TSDB_CODE_QUERY_CANCELLED);
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
#if 0
  /*
   * check if the queried meter is going to be deleted.
   * if it will be deleted soon, stop current query ASAP.
   */
  SMeterObj *pMeterObj = pQInfo->pObj;
  if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DROPPING)) {
    pQInfo->killed = 1;
    return true;
  }
1488

1489 1490 1491 1492
  return (pQInfo->killed == 1);
#endif
}

1493
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_QUERY_CANCELLED; }
H
hjxilinx 已提交
1494

H
hjxilinx 已提交
1495
static bool isFixedOutputQuery(SQuery *pQuery) {
1496 1497 1498
  if (pQuery->intervalTime != 0) {
    return false;
  }
1499

1500 1501 1502 1503
  // Note:top/bottom query is fixed output query
  if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    return true;
  }
1504

1505
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1506
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1507

1508 1509
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1510
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1511 1512
      continue;
    }
1513

1514 1515 1516
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1517

1518 1519 1520 1521
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1522

1523 1524 1525
  return false;
}

1526
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1527
static bool isPointInterpoQuery(SQuery *pQuery) {
1528
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1529
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1530
    if (functionID == TSDB_FUNC_INTERP) {
1531 1532 1533
      return true;
    }
  }
1534

1535 1536 1537 1538
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1539
static bool isSumAvgRateQuery(SQuery *pQuery) {
1540
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1541
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1542 1543 1544
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1545

1546 1547 1548 1549 1550
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1551

1552 1553 1554
  return false;
}

H
hjxilinx 已提交
1555
static bool isFirstLastRowQuery(SQuery *pQuery) {
1556
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1557
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1558 1559 1560 1561
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1562

1563 1564 1565
  return false;
}

H
hjxilinx 已提交
1566
static UNUSED_FUNC bool notHasQueryTimeRange(SQuery *pQuery) {
1567
  return (pQuery->window.skey == 0 && pQuery->window.ekey == INT64_MAX && QUERY_IS_ASC_QUERY(pQuery)) ||
1568
         (pQuery->window.skey == INT64_MAX && pQuery->window.ekey == 0 && (!QUERY_IS_ASC_QUERY(pQuery)));
1569 1570
}

H
hjxilinx 已提交
1571
static bool needReverseScan(SQuery *pQuery) {
1572
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1573
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1574 1575 1576
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1577

1578 1579 1580 1581 1582
    if (((functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) && QUERY_IS_ASC_QUERY(pQuery)) ||
        ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery))) {
      return true;
    }
  }
1583

1584 1585
  return false;
}
H
hjxilinx 已提交
1586 1587 1588 1589

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1590
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG) {
H
hjxilinx 已提交
1591 1592 1593
      return false;
    }
  }
1594

H
hjxilinx 已提交
1595 1596 1597
  return true;
}

1598 1599
/////////////////////////////////////////////////////////////////////////////////////////////

1600 1601
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, int64_t *realSkey,
                             int64_t *realEkey, STimeWindow *win) {
1602
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
1603

1604
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision);
1605

1606 1607 1608 1609 1610 1611
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    /*
     * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
     * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
     */
    assert(keyLast - keyFirst < pQuery->intervalTime);
1612

1613 1614
    *realSkey = keyFirst;
    *realEkey = keyLast;
1615

1616 1617 1618
    win->ekey = INT64_MAX;
    return;
  }
1619

1620
  win->ekey = win->skey + pQuery->intervalTime - 1;
1621

1622 1623 1624 1625 1626
  if (win->skey < keyFirst) {
    *realSkey = keyFirst;
  } else {
    *realSkey = win->skey;
  }
1627

1628 1629 1630 1631 1632 1633 1634 1635 1636
  if (win->ekey < keyLast) {
    *realEkey = win->ekey;
  } else {
    *realEkey = keyLast;
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1637
    pQuery->checkBuffer = 0;
1638
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1639
    pQuery->checkBuffer = 0;
1640 1641
  } else {
    bool hasMultioutput = false;
1642
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1643
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1644 1645 1646
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1647

1648 1649 1650 1651 1652
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1653

1654
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1655 1656 1657 1658 1659 1660
  }
}

/*
 * todo add more parameters to check soon..
 */
1661
bool colIdCheck(SQuery *pQuery) {
1662 1663
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1664
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1665
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1666 1667 1668
      return false;
    }
  }
1669
  
1670 1671 1672 1673 1674 1675
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1676
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1677
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1678

1679 1680 1681 1682
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1683

1684 1685 1686 1687
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1688

1689 1690 1691 1692 1693 1694 1695
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

1696
static void changeExecuteScanOrder(SQuery *pQuery, bool stableQuery) {
1697 1698 1699
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1700

1701 1702 1703
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
S
slguan 已提交
1704
    qTrace("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1705
           pQuery->order.order, TSDB_ORDER_DESC);
1706

1707
    pQuery->order.order = TSDB_ORDER_DESC;
1708

1709 1710
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1711

1712 1713
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1714

1715 1716
    return;
  }
1717

1718 1719
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1720
      qTrace(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1721
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1722 1723
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1724

1725
    pQuery->order.order = TSDB_ORDER_ASC;
1726 1727
    return;
  }
1728

1729 1730 1731
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1732
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1733 1734
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1735 1736
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1737

1738
      pQuery->order.order = TSDB_ORDER_ASC;
1739 1740
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1741
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1742 1743
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1744 1745
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1746

1747
      pQuery->order.order = TSDB_ORDER_DESC;
1748
    }
1749

1750
  } else {  // interval query
1751
    if (stableQuery) {
1752 1753
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1754
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1755 1756
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1757 1758
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1759

1760
        pQuery->order.order = TSDB_ORDER_ASC;
1761 1762
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1763
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1764 1765
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1766 1767
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1768

1769
        pQuery->order.order = TSDB_ORDER_DESC;
1770 1771 1772 1773 1774
      }
    }
  }
}

H
hjxilinx 已提交
1775
static UNUSED_FUNC void doSetInterpVal(SQLFunctionCtx *pCtx, TSKEY ts, int16_t type, int32_t index, char *data) {
1776
  assert(pCtx->param[index].pz == NULL);
1777

1778 1779
  int32_t len = 0;
  size_t  t = 0;
1780

1781 1782
  if (type == TSDB_DATA_TYPE_BINARY) {
    t = strlen(data);
1783

1784 1785 1786 1787
    len = t + 1 + TSDB_KEYSIZE;
    pCtx->param[index].pz = calloc(1, len);
  } else if (type == TSDB_DATA_TYPE_NCHAR) {
    t = wcslen((const wchar_t *)data);
1788

1789 1790 1791 1792 1793 1794
    len = (t + 1) * TSDB_NCHAR_SIZE + TSDB_KEYSIZE;
    pCtx->param[index].pz = calloc(1, len);
  } else {
    len = TSDB_KEYSIZE * 2;
    pCtx->param[index].pz = malloc(len);
  }
1795

1796
  pCtx->param[index].nType = TSDB_DATA_TYPE_BINARY;
1797

1798 1799 1800
  char *z = pCtx->param[index].pz;
  *(TSKEY *)z = ts;
  z += TSDB_KEYSIZE;
1801

1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
  switch (type) {
    case TSDB_DATA_TYPE_FLOAT:
      *(double *)z = GET_FLOAT_VAL(data);
      break;
    case TSDB_DATA_TYPE_DOUBLE:
      *(double *)z = GET_DOUBLE_VAL(data);
      break;
    case TSDB_DATA_TYPE_INT:
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_BIGINT:
    case TSDB_DATA_TYPE_TINYINT:
    case TSDB_DATA_TYPE_SMALLINT:
    case TSDB_DATA_TYPE_TIMESTAMP:
      *(int64_t *)z = GET_INT64_VAL(data);
      break;
    case TSDB_DATA_TYPE_BINARY:
      strncpy(z, data, t);
      break;
    case TSDB_DATA_TYPE_NCHAR: {
      wcsncpy((wchar_t *)z, (const wchar_t *)data, t);
    } break;
    default:
      assert(0);
  }
1826

1827 1828 1829 1830 1831 1832
  pCtx->param[index].nLen = len;
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1833

1834
  int32_t num = 0;
1835

1836 1837 1838
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
  } else if (isIntervalQuery(pQuery)) {  // time window query, allocate one page for each table
1839
    size_t s = pQInfo->groupInfo.numOfTables;
1840
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1841 1842
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1843
  }
1844

1845 1846 1847 1848 1849 1850
  assert(num > 0);
  return num;
}

static int32_t getRowParamForMultiRowsOutput(SQuery *pQuery, bool isSTableQuery) {
  int32_t rowparam = 1;
1851

1852
  if (isTopBottomQuery(pQuery) && (!isSTableQuery)) {
1853
    rowparam = pQuery->pSelectExpr[1].base.arg->argValue.i64;
1854
  }
1855

1856 1857 1858 1859 1860
  return rowparam;
}

static int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * getRowParamForMultiRowsOutput(pQuery, isSTableQuery);
1861
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1862 1863 1864 1865
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1866

1867 1868
  SQuery *   pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = getResultBufferPageById(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
1869

1870 1871
  int32_t numOfRows = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->stableQuery);
  int32_t realRowId = pResult->pos.rowId * getRowParamForMultiRowsOutput(pQuery, pRuntimeEnv->stableQuery);
1872

1873
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * numOfRows +
1874
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1875 1876 1877 1878 1879 1880
}

/**
 * decrease the refcount for each table involved in this query
 * @param pQInfo
 */
1881
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
1882
  if (pQInfo != NULL) {
1883
    //    assert(taosHashGetSize(pQInfo->groupInfo) >= 1);
1884 1885 1886
  }

#if 0
1887
  if (pQInfo == NULL || pQInfo->groupInfo.numOfTables == 1) {
1888
    atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
S
slguan 已提交
1889
    qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
1890 1891 1892
           pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
  } else {
    int32_t num = 0;
1893 1894
    for (int32_t i = 0; i < pQInfo->groupInfo.numOfTables; ++i) {
      SMeterObj *pMeter = getMeterObj(pQInfo->groupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
1895
      atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
1896

1897
      if (pMeter->numOfQueries > 0) {
S
slguan 已提交
1898
        qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
1899 1900 1901 1902
               pMeter->meterId, pMeter->numOfQueries);
        num++;
      }
    }
1903

1904 1905 1906 1907
    /*
     * in order to reduce log output, for all meters of which numOfQueries count are 0,
     * we do not output corresponding information
     */
1908
    num = pQInfo->groupInfo.numOfTables - num;
S
slguan 已提交
1909
    qTrace("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
1910
           pQInfo->groupInfo.numOfTables, num);
1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923
  }
#endif
}

static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
                                int32_t numOfTotalPoints) {
  if (pDataStatis == NULL) {
    return true;
  }

#if 0
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1924
    int32_t                  colIndex = pFilterInfo->info.colIndex;
1925

1926 1927 1928 1929
    // this column not valid in current data block
    if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
      continue;
    }
1930

1931 1932 1933 1934
    // not support pre-filter operation on binary/nchar data type
    if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
      continue;
    }
1935

1936 1937 1938 1939
    // all points in current column are NULL, no need to check its boundary value
    if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
      continue;
    }
1940

1941 1942 1943
    if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataStatis[colIndex].min);
      float maxval = *(double *)(&pDataStatis[colIndex].max);
1944

1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
                                        (char *)&pDataStatis[colIndex].max)) {
          return true;
        }
      }
    }
  }
1959

1960
  // todo disable this opt code block temporarily
1961
  //  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1962
  //    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
1963 1964 1965 1966
  //    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
  //      return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
  //    }
  //  }
1967

1968 1969 1970 1971 1972 1973 1974
#endif
  return true;
}

// previous time window may not be of the same size of pQuery->intervalTime
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1975

1976 1977 1978 1979
  pTimeWindow->skey += (pQuery->slidingTime * factor);
  pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
}

H
hjxilinx 已提交
1980
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis) {
1981
  SQuery *pQuery = pRuntimeEnv->pQuery;
1982 1983 1984 1985

  uint32_t r = 0;
  SArray * pDataBlock = NULL;

1986 1987 1988
  if (pQuery->numOfFilterCols > 0) {
    r = BLK_DATA_ALL_NEEDED;
  } else {
1989
    // check if this data block is required to load
1990
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1991 1992 1993 1994
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
      
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
1995
      r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
1996
    }
1997

1998 1999 2000 2001
    if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) {
      r |= BLK_DATA_ALL_NEEDED;
    }
  }
2002

2003
  if (r == BLK_DATA_NO_NEEDED) {
2004
    qTrace("QInfo:%p data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
2005
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2006 2007
    pRuntimeEnv->summary.discardBlocks += 1;
  } else if (r == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2008
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2009
      //        return DISK_DATA_LOAD_FAILED;
2010
    }
2011 2012 2013 2014
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
hjxilinx 已提交
2015
      pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2016
      pRuntimeEnv->summary.checkRows += pBlockInfo->rows;
2017 2018 2019
    }
  } else {
    assert(r == BLK_DATA_ALL_NEEDED);
2020 2021 2022
  
    // load the data block statistics to perform further filter
    pRuntimeEnv->summary.loadBlockStatis +=1;
H
hjxilinx 已提交
2023
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2024
    }
2025 2026
    
    if (!needToLoadDataBlock(pQuery,*pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2027
#if defined(_DEBUG_VIEW)
2028
      qTrace("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2029
#endif
2030 2031
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
2032 2033
      //        return DISK_DATA_DISCARDED;
    }
2034 2035
  
    pRuntimeEnv->summary.checkRows += pBlockInfo->rows;
H
hjxilinx 已提交
2036
    pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2037
  }
2038

2039 2040 2041
  return pDataBlock;
}

H
hjxilinx 已提交
2042
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2043
  int32_t midPos = -1;
H
Haojun Liao 已提交
2044
  int32_t numOfRows;
2045

2046 2047 2048
  if (num <= 0) {
    return -1;
  }
2049

2050
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2051 2052

  TSKEY * keyList = (TSKEY *)pValue;
2053
  int32_t firstPos = 0;
2054
  int32_t lastPos = num - 1;
2055

2056
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2057 2058 2059 2060 2061
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2062

H
Haojun Liao 已提交
2063 2064
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2065

H
hjxilinx 已提交
2066 2067 2068 2069 2070 2071 2072 2073
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2074

H
hjxilinx 已提交
2075 2076 2077 2078 2079
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2080

H
hjxilinx 已提交
2081 2082 2083 2084 2085 2086 2087
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2088

H
Haojun Liao 已提交
2089 2090
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2091

H
hjxilinx 已提交
2092 2093 2094 2095 2096 2097 2098 2099 2100
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2101

H
hjxilinx 已提交
2102 2103 2104
  return midPos;
}

2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (!isIntervalQuery(pQuery) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isFixedOutputQuery(pQuery)) {
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
        
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
      qTrace("QInfo:%p realloc output buffer, new size: %d rows, old:%d, remain:%d", GET_QINFO_ADDR(pRuntimeEnv),
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2142 2143
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2144
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
hjxilinx 已提交
2145
  
S
slguan 已提交
2146
  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2147 2148
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2149

2150
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
2151
  while (tsdbNextDataBlock(pQueryHandle)) {
2152
    pRuntimeEnv->summary.dataBlocks += 1;
2153
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
2154
      return 0;
2155
    }
2156

2157
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
2158

2159
    // todo extract methods
H
Haojun Liao 已提交
2160
    if (isIntervalQuery(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
2161
      TSKEY           skey1, ekey1;
H
hjLiao 已提交
2162
      STimeWindow     w = TSWINDOW_INITIALIZER;
2163 2164
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2165
      if (QUERY_IS_ASC_QUERY(pQuery)) {
2166 2167
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &skey1,
                                &ekey1, &w);
2168 2169 2170 2171
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
2172 2173
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &skey1,
                                &ekey1, &w);
2174

H
hjxilinx 已提交
2175
        pWindowResInfo->startTime = pQuery->window.skey;
2176 2177
        pWindowResInfo->prevSKey = w.skey;
      }
2178 2179 2180 2181
      
      if (pRuntimeEnv->pFillInfo != NULL) {
        pRuntimeEnv->pFillInfo->start = w.skey;
      }
2182
    }
2183

H
hjxilinx 已提交
2184
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2185
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2186

2187
    SDataStatis *pStatis = NULL;
2188
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
Haojun Liao 已提交
2189 2190
    
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
H
hjxilinx 已提交
2191
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2192

2193
    pRuntimeEnv->summary.dataInRows += blockInfo.rows;
2194 2195
    qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2196

2197 2198
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2199
      break;
2200 2201
    }
  }
2202

H
hjxilinx 已提交
2203
  // if the result buffer is not full, set the query complete
2204 2205 2206
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2207

2208
  if (isIntervalQuery(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2209
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2210 2211
      int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;

2212
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2213
      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2214
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2215 2216 2217 2218
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2219

2220
  return 0;
2221 2222 2223 2224 2225 2226
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
H
hjxilinx 已提交
2227
static void doSetTagValueInParam(void *tsdb, STableId* pTableId, int32_t tagColId, tVariant *param) {
2228
  tVariantDestroy(param);
2229

2230
  char *  val = NULL;
2231
  int16_t bytes = 0;
2232
  int16_t type = 0;
2233

2234
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
H
hjxilinx 已提交
2235
    val = tsdbGetTableName(tsdb, pTableId, &bytes);
2236
    type = TSDB_DATA_TYPE_BINARY;
H
hjxilinx 已提交
2237
    tVariantCreateFromBinary(param, varDataVal(val), varDataLen(val), type);
2238
  } else {
H
hjxilinx 已提交
2239
    tsdbGetTableTagVal(tsdb, pTableId, tagColId, &type, &bytes, &val);
H
hjxilinx 已提交
2240 2241 2242 2243 2244 2245
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
      tVariantCreateFromBinary(param, varDataVal(val), varDataLen(val), type);
    } else {
      tVariantCreateFromBinary(param, val, bytes, type);
    }
2246
  }
2247 2248
}

H
hjxilinx 已提交
2249
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, STableId* pTableId, void *tsdb) {
2250
  SQuery *pQuery = pRuntimeEnv->pQuery;
2251

2252
  SSqlFuncMsg *pFuncMsg = &pQuery->pSelectExpr[0].base;
2253
  if (pQuery->numOfOutput == 1 && pFuncMsg->functionId == TSDB_FUNC_TS_COMP) {
2254
    assert(pFuncMsg->numOfParams == 1);
H
hjxilinx 已提交
2255
    doSetTagValueInParam(tsdb, pTableId, pFuncMsg->arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag);
2256 2257
  } else {
    // set tag value, by which the results are aggregated.
2258
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
2259
      SColIndex *pCol = &pQuery->pSelectExpr[idx].base.colInfo;
2260

2261
      // ts_comp column required the tag value for join filter
2262
      if (!TSDB_COL_IS_TAG(pCol->flag)) {
2263 2264
        continue;
      }
2265

2266
      // todo use tag column index to optimize performance
H
hjxilinx 已提交
2267
      doSetTagValueInParam(tsdb, pTableId, pCol->colId, &pRuntimeEnv->pCtx[idx].tag);
2268
    }
2269

2270
    // set the join tag for first column
2271
    if (pFuncMsg->functionId == TSDB_FUNC_TS && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2272 2273
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
2274 2275
      assert(0);  // to do fix me
      //      doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag);
2276 2277 2278 2279 2280 2281 2282
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2283

2284
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2285
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2286 2287 2288
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2289

2290 2291 2292
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2293

2294 2295 2296
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2297

2298 2299 2300
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2301 2302 2303 2304 2305 2306 2307 2308
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2309 2310
    }
  }
2311

2312
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2313
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2314 2315 2316
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2317

2318 2319 2320 2321
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2322
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2391
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2392
  SQuery* pQuery = pRuntimeEnv->pQuery;
2393
  int32_t numOfCols = pQuery->numOfOutput;
2394
  printf("super table query intermediate result, total:%d\n", numOfRows);
2395

2396 2397
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2398
      
2399
      switch (pQuery->pSelectExpr[i].type) {
2400
        case TSDB_DATA_TYPE_BINARY: {
2401 2402 2403 2404 2405
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2406
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2407 2408 2409 2410 2411
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2412
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2413 2414
          break;
        case TSDB_DATA_TYPE_INT:
2415
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2416 2417
          break;
        case TSDB_DATA_TYPE_FLOAT:
2418
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2419 2420
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2421
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2422 2423 2424 2425 2426 2427 2428 2429
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2430 2431 2432
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2433 2434 2435 2436 2437
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2438

2439 2440
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2441

2442 2443
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2444

2445 2446 2447 2448
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2449

2450 2451 2452 2453
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2454

H
hjxilinx 已提交
2455
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2456
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2457

2458 2459
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2460

H
hjxilinx 已提交
2461
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2462
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2463

2464 2465
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2466

2467 2468 2469
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2470

2471 2472 2473
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2474
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2475
  int64_t st = taosGetTimestampMs();
2476
  int32_t ret = TSDB_CODE_SUCCESS;
2477

2478
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
2479

2480
  while (pQInfo->groupIndex < numOfGroups) {
2481
    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
2482
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2483 2484 2485 2486
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2487
    pQInfo->groupIndex += 1;
2488 2489

    // this group generates at least one result, return results
2490 2491 2492
    if (ret > 0) {
      break;
    }
2493 2494

    assert(pQInfo->numOfGroupResultPages == 0);
H
hjxilinx 已提交
2495
    qTrace("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2496
  }
2497

2498 2499
  qTrace("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%lldms", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2500

2501 2502 2503 2504 2505 2506
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2507

2508
    // current results of group has been sent to client, try next group
2509
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2510 2511
      return;  // failed to save data in the disk
    }
2512

2513 2514 2515 2516 2517 2518
    // check if all results has been sent to client
    int32_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
      pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;  // set query completed
      return;
    }
2519
  }
2520 2521

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2522
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2523

2524
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2525
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2526

2527 2528 2529
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
    tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[i]);
2530
    total += pData->num;
2531
  }
2532

2533
  int32_t rows = total;
2534

2535 2536 2537
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
    tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[num]);
2538

2539
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2540
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2541
      char *  pDest = pQuery->sdata[i]->data;
2542

2543 2544
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2545
    }
2546

2547
    offset += pData->num;
2548
  }
2549

2550
  assert(pQuery->rec.rows == 0);
2551

2552
  pQuery->rec.rows += rows;
2553 2554 2555 2556 2557
  pQInfo->offset += 1;
}

int64_t getNumOfResultWindowRes(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindowRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2558

2559
  int64_t maxOutput = 0;
2560
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2561
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2562

2563 2564 2565 2566 2567 2568 2569
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2570

2571 2572 2573 2574 2575
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
    if (pResultInfo != NULL && maxOutput < pResultInfo->numOfRes) {
      maxOutput = pResultInfo->numOfRes;
    }
  }
2576

2577 2578 2579
  return maxOutput;
}

2580
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2581
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2582
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2583

2584
  size_t size = taosArrayGetSize(pGroup);
2585

2586
  tFilePage **buffer = pQuery->sdata;
2587 2588
  int32_t *   posList = calloc(size, sizeof(int32_t));

H
hjxilinx 已提交
2589
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2590

2591
  // todo opt for the case of one table per group
2592
  int32_t numOfTables = 0;
2593
  for (int32_t i = 0; i < size; ++i) {
H
hjxilinx 已提交
2594 2595
    SGroupItem *item = taosArrayGet(pGroup, i);
    STableQueryInfo *pInfo = item->info;
2596

H
hjxilinx 已提交
2597 2598
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, pInfo->id.tid);
    if (list.size > 0 && pInfo->windowResInfo.size > 0) {
2599
      pTableList[numOfTables] = pInfo;
2600
      numOfTables += 1;
2601 2602
    }
  }
2603

2604
  if (numOfTables == 0) {
2605 2606
    tfree(posList);
    tfree(pTableList);
2607

2608 2609 2610
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
  }
2611

2612
  SCompSupporter cs = {pTableList, posList, pQInfo};
2613

2614
  SLoserTreeInfo *pTree = NULL;
2615
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2616

2617
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
2618 2619
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2620

2621 2622
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2623

2624 2625
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2626

H
hjxilinx 已提交
2627
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2628
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2629

2630 2631
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2632

2633 2634 2635 2636
    assert(ts == pWindowRes->window.skey);
    int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes);
    if (num <= 0) {
      cs.position[pos] += 1;
2637

2638 2639
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2640

2641
        // all input sources are exhausted
2642
        if (--numOfTables == 0) {
2643 2644 2645 2646 2647 2648 2649
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2650
        if (buffer[0]->num == pQuery->rec.capacity) {
2651 2652 2653
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2654

2655 2656
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2657

2658
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2659
        buffer[0]->num += 1;
2660
      }
2661

2662
      lastTimestamp = ts;
2663

2664 2665 2666
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2667

2668
        // all input sources are exhausted
2669
        if (--numOfTables == 0) {
2670 2671 2672 2673
          break;
        }
      }
    }
2674

2675 2676
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2677

2678
  if (buffer[0]->num != 0) {  // there are data in buffer
2679
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2680
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2681

2682 2683 2684 2685
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2686

2687 2688 2689
      return -1;
    }
  }
2690

2691 2692 2693
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2694
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2695
#endif
2696

S
slguan 已提交
2697
  qTrace("QInfo:%p result merge completed, elapsed time:%" PRId64 " ms", GET_QINFO_ADDR(pQuery), endt - startt);
2698 2699 2700
  tfree(pTree);
  tfree(pTableList);
  tfree(posList);
2701

2702
  pQInfo->offset = 0;
2703
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2704 2705
    tfree(pResultInfo[i].interResultBuf);
  }
2706

2707 2708 2709 2710 2711
  tfree(pResultInfo);
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2712 2713 2714
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2715
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2716
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2717

2718 2719
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2720

2721
  int32_t remain = pQuery->sdata[0]->num;
2722
  int32_t offset = 0;
2723

2724 2725 2726 2727 2728
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2729

2730
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2731
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2732

2733
    // pagewise copy to dest buffer
2734
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2735
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2736
      buf->num = r;
2737

2738 2739
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2740
    }
2741

2742 2743 2744
    offset += r;
    remain -= r;
  }
2745

2746 2747 2748 2749 2750
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2751
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2752
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2753 2754 2755
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2756

2757
    pQuery->sdata[k]->num = 0;
2758 2759 2760
  }
}

2761 2762 2763 2764 2765 2766 2767
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2768 2769 2770 2771 2772 2773 2774
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2788 2789 2790 2791 2792
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2793

2794
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2795

2796
    // open/close the specified query for each group result
2797
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2798
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2799

2800 2801
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2802 2803 2804 2805 2806 2807 2808 2809
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2810 2811
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2812
  SQuery *pQuery = pRuntimeEnv->pQuery;
2813
  int32_t order = pQuery->order.order;
2814

2815 2816 2817
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
2818
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2819
  } else {  // for simple result of table query,
2820
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2821
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2822

2823
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2824 2825 2826
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2827

2828 2829
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2830 2831 2832 2833 2834 2835
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847
  
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
      SGroupItem *item = taosArrayGet(group, j);
      updateTableQueryInfoForReverseScan(pQuery, item->info);
    }
  }
2848 2849
}

2850
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2851
  SQuery *pQuery = pRuntimeEnv->pQuery;
2852
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2853
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2854 2855 2856 2857
  }
}

void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
2858
  int32_t numOfCols = pQuery->numOfOutput;
2859

2860 2861
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2862

2863 2864 2865 2866 2867 2868
  // set the intermediate result output buffer
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2869

2870
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2871 2872
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2873

2874 2875 2876 2877 2878 2879
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2880

2881
    // set the timestamp output buffer for top/bottom/diff query
2882
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2883 2884 2885
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2886

2887
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2888
  }
2889

2890 2891 2892 2893 2894
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2895

2896
  // reset the execution contexts
2897
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2898
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2899
    assert(functionId != TSDB_FUNC_DIFF);
2900

2901 2902 2903 2904
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
2905

2906 2907 2908 2909 2910 2911 2912 2913 2914 2915
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
2916

2917 2918 2919 2920 2921 2922
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2923

2924
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2925
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2926

2927 2928 2929 2930 2931
    pRuntimeEnv->pCtx[j].currentStage = 0;
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

2932
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
2933
  SQuery *pQuery = pRuntimeEnv->pQuery;
2934
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
2935 2936
    return;
  }
2937

2938
  if (pQuery->rec.rows <= pQuery->limit.offset) {
2939 2940 2941
    qTrace("QInfo:%p skip rows:%d, new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
        pQuery->limit.offset - pQuery->rec.rows);
    
2942 2943
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
2944

2945
    resetCtxOutputBuf(pRuntimeEnv);
2946

H
Haojun Liao 已提交
2947
    // clear the buffer full flag if exists
2948
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
2949
  } else {
2950
    int64_t numOfSkip = pQuery->limit.offset;
2951
    pQuery->rec.rows -= numOfSkip;
2952 2953 2954 2955 2956
    pQuery->limit.offset = 0;
  
    qTrace("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
           0, pQuery->rec.rows);
    
2957
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2958
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2959
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2960
      
H
Haojun Liao 已提交
2961 2962
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
2963

2964
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
2965
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
2966 2967
      }
    }
2968
  
H
Haojun Liao 已提交
2969
    
2970
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
2971 2972 2973 2974 2975 2976 2977 2978
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
2979
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
2980 2981 2982 2983 2984 2985
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2986

H
hjxilinx 已提交
2987
  bool toContinue = false;
2988 2989 2990
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2991

2992 2993 2994 2995 2996
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
2997

2998
      setWindowResOutputBuf(pRuntimeEnv, pResult);
2999

3000
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3001
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3002 3003 3004
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3005

3006 3007
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3008

3009 3010 3011 3012
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3013
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3014
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3015 3016 3017
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3018

3019 3020
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3021

3022 3023 3024
      toContinue |= (!pResInfo->complete);
    }
  }
3025

3026 3027 3028
  return toContinue;
}

H
Haojun Liao 已提交
3029
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3030
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3031 3032
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3033 3034 3035
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3036
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3037
      .status      = pQuery->status,
3038
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3039
      .lastKey     = start,
H
hjxilinx 已提交
3040
      .w           = pQuery->window,
H
Haojun Liao 已提交
3041
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3042
  };
3043

3044 3045 3046
  return info;
}

3047 3048 3049 3050
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3051 3052 3053 3054 3055
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3056

3057
  // reverse order time range
3058 3059 3060
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3061
  SWITCH_ORDER(pQuery->order.order);
3062
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3063

3064
  STsdbQueryCond cond = {
3065
      .twindow = pQuery->window,
H
hjxilinx 已提交
3066
      .order   = pQuery->order.order,
3067
      .colList = pQuery->colList,
3068 3069
      .numOfCols = pQuery->numOfCols,
  };
3070

3071 3072 3073 3074
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3075

H
hjxilinx 已提交
3076
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo);
3077

3078 3079
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3080
  disableFuncInReverseScan(pQInfo);
3081 3082
}

3083 3084
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3085
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3086

3087 3088
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3089

3090 3091 3092 3093
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3094

3095
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3096

3097 3098
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3099
  pTableQueryInfo->lastKey = pStatus->lastKey;
3100
  pQuery->status = pStatus->status;
3101
  
H
hjxilinx 已提交
3102
  pTableQueryInfo->win = pStatus->w;
3103
  pQuery->window = pTableQueryInfo->win;
3104 3105
}

H
Haojun Liao 已提交
3106
void scanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3107
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3108
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3109 3110
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3111
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3112

3113
  // store the start query position
H
Haojun Liao 已提交
3114
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3115

3116 3117
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3118

3119 3120
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3121

3122 3123
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3124
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3125
      qstatus.lastKey = pTableQueryInfo->lastKey;
3126
    }
3127

3128
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3129
      // restore the status code and jump out of loop
3130
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3131
        pQuery->status = qstatus.status;
3132
      }
3133

3134 3135
      break;
    }
3136

3137
    STsdbQueryCond cond = {
3138
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3139
        .order   = pQuery->order.order,
3140
        .colList = pQuery->colList,
3141
        .numOfCols = pQuery->numOfCols,
3142
    };
3143

3144 3145
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3146
    }
3147

H
hjxilinx 已提交
3148
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo);
3149
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3150

3151 3152
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3153 3154 3155
    
    qTrace("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
        cond.twindow.skey, cond.twindow.ekey);
3156

3157
    // check if query is killed or not
3158
    if (isQueryKilled(pQInfo)) {
3159 3160 3161
      return;
    }
  }
3162

H
hjxilinx 已提交
3163
  if (!needReverseScan(pQuery)) {
3164 3165
    return;
  }
3166

3167
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3168

3169
  // reverse scan from current position
3170
  qTrace("QInfo:%p start to reverse scan", pQInfo);
3171
  doScanAllDataBlocks(pRuntimeEnv);
3172 3173

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3174 3175
}

H
hjxilinx 已提交
3176
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3177
  SQuery *pQuery = pRuntimeEnv->pQuery;
3178

3179 3180 3181 3182 3183 3184
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      closeAllTimeWindow(pWindowResInfo);
    }
3185

3186 3187 3188 3189 3190
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3191

3192
      setWindowResOutputBuf(pRuntimeEnv, buf);
3193

3194
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3195
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3196
      }
3197

3198 3199 3200 3201 3202 3203
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3204

3205
  } else {
3206
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3207
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3208 3209 3210 3211 3212
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3213
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3214
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3215

3216 3217 3218 3219
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3220

3221 3222 3223
  return false;
}

weixin_48148422's avatar
weixin_48148422 已提交
3224 3225 3226 3227 3228
static STableQueryInfo *createTableQueryInfo(
  SQueryRuntimeEnv *pRuntimeEnv,
  STableId tableId,
  STimeWindow win
) {
3229
  STableQueryInfo *pTableQueryInfo = calloc(1, sizeof(STableQueryInfo));
3230

H
hjxilinx 已提交
3231 3232
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3233

H
hjxilinx 已提交
3234
  pTableQueryInfo->id = tableId;
3235
  pTableQueryInfo->cur.vgroupIndex = -1;
3236

3237 3238 3239 3240
  initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, 100, 100, TSDB_DATA_TYPE_INT);
  return pTableQueryInfo;
}

3241
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3242 3243 3244
  if (pTableQueryInfo == NULL) {
    return;
  }
3245

3246 3247 3248 3249 3250 3251
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
  free(pTableQueryInfo);
}

void restoreIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3252 3253 3254 3255
  pQuery->current = pTableQueryInfo;
  
  assert(((pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) && QUERY_IS_ASC_QUERY(pQuery)) ||
         ((pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) && !QUERY_IS_ASC_QUERY(pQuery)));
3256 3257 3258 3259 3260
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3261
 * @param pDataBlockInfo
3262
 */
3263
void setExecutionContext(SQInfo *pQInfo, STableId* pTableId, int32_t groupIndex, TSKEY nextKey) {
3264
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
3265 3266
  STableQueryInfo *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  
3267 3268
  SWindowResInfo *  pWindowResInfo = &pRuntimeEnv->windowResInfo;
  int32_t           GROUPRESULTID = 1;
3269

3270
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex, sizeof(groupIndex));
3271 3272 3273
  if (pWindowRes == NULL) {
    return;
  }
3274

3275 3276 3277 3278 3279 3280 3281 3282 3283 3284
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3285

3286 3287
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
3288

3289
  pTableQueryInfo->lastKey = nextKey;
H
hjxilinx 已提交
3290
  setAdditionalInfo(pQInfo, pTableId, pTableQueryInfo);
3291 3292 3293 3294
}

static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3295

3296
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3297
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3298 3299
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3300
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3301 3302 3303
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3304

3305 3306 3307 3308 3309
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3310

3311 3312 3313 3314 3315 3316
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
hjxilinx 已提交
3317
int32_t setAdditionalInfo(SQInfo *pQInfo, STableId* pTableId, STableQueryInfo *pTableQueryInfo) {
3318
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3319
  assert(pTableQueryInfo->lastKey >= 0);
3320

H
hjxilinx 已提交
3321
  setTagVal(pRuntimeEnv, pTableId, pQInfo->tsdb);
3322

3323 3324
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3325
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3326
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3327

3328
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3329

3330 3331 3332 3333 3334 3335
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3336

3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3349
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3350 3351
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3352 3353
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3354 3355 3356
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3357
    pTableQueryInfo->win.skey = key;
3358
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3359

3360 3361 3362 3363 3364
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3365

3366 3367 3368 3369 3370 3371
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
3372 3373
    TSKEY           skey1, ekey1;
    STimeWindow     w = {0};
3374
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3375

H
Haojun Liao 已提交
3376 3377 3378
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &skey1, &ekey1, &w);
3379
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3380

3381 3382
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3383
        assert(win.ekey == pQuery->window.ekey);
3384
      }
3385 3386
      
      pWindowResInfo->prevSKey = w.skey;
3387
    }
3388

3389
    pTableQueryInfo->queryRangeSet = 1;
3390
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3391 3392 3393 3394
  }
}

bool requireTimestamp(SQuery *pQuery) {
3395
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3396
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3410 3411 3412
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3413 3414
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3415 3416 3417 3418 3419
  return loadPrimaryTS;
}

static int32_t getNumOfSubset(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3420

3421 3422 3423 3424
  int32_t totalSubset = 0;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (isIntervalQuery(pQuery))) {
    totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  } else {
3425
    totalSubset = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
3426
  }
3427

3428 3429 3430 3431 3432 3433
  return totalSubset;
}

static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3434

3435 3436 3437
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3438

3439
  qTrace("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3440
  int32_t totalSubset = getNumOfSubset(pQInfo);
3441

3442
  if (orderType == TSDB_ORDER_ASC) {
3443
    startIdx = pQInfo->groupIndex;
3444 3445
    step = 1;
  } else {  // desc order copy all data
3446
    startIdx = totalSubset - pQInfo->groupIndex - 1;
3447 3448
    step = -1;
  }
3449

3450 3451 3452
  for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3453
      pQInfo->groupIndex += 1;
3454 3455
      continue;
    }
3456

3457
    assert(result[i].numOfRows >= 0 && pQInfo->offset <= 1);
3458

3459 3460
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3461

3462 3463 3464 3465
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3466 3467 3468 3469 3470
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3471
      pQInfo->groupIndex += 1;
3472
    }
3473

3474
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3475
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3476

3477 3478 3479 3480
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3481

3482
    numOfResult += numOfRowsToCopy;
3483 3484 3485
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3486
  }
3487

S
slguan 已提交
3488
  qTrace("QInfo:%p copy data to query buf completed", pQInfo);
3489 3490

#ifdef _DEBUG_VIEW
3491
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3507

3508
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3509
  int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
3510

3511
  pQuery->rec.rows += numOfResult;
3512

3513
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3514 3515
}

H
hjxilinx 已提交
3516
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3517
  SQuery *pQuery = pRuntimeEnv->pQuery;
3518

3519 3520
  // update the number of result for each, only update the number of rows for the corresponding window result.
  if (pQuery->intervalTime == 0) {
3521
    int32_t g = pTableQueryInfo->groupIndex;
3522
    assert(pRuntimeEnv->windowResInfo.size > 0);
3523

3524 3525 3526 3527 3528 3529 3530
    SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&g, sizeof(g));
    if (pWindowRes->numOfRows == 0) {
      pWindowRes->numOfRows = getNumOfResult(pRuntimeEnv);
    }
  }
}

H
hjxilinx 已提交
3531
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo,
3532 3533 3534 3535
                                 SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis, SArray *pDataBlock,
                                 __block_search_fn_t searchFn) {
  SQuery *         pQuery = pRuntimeEnv->pQuery;
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3536
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3537

3538
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) {
3539
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3540
  } else {
3541
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3542
  }
3543

H
hjxilinx 已提交
3544
  updateWindowResNumOfRes(pRuntimeEnv, pTableQueryInfo);
3545 3546
}

3547 3548 3549 3550
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
  
3551 3552
  // todo refactor
  if (pQuery->fillType == TSDB_FILL_NONE || (pQuery->fillType != TSDB_FILL_NONE && isPointInterpoQuery(pQuery))) {
3553
    assert(pFillInfo == NULL);
3554 3555
    return false;
  }
3556

3557
  if (pQuery->limit.limit > 0 && pQuery->rec.rows >= pQuery->limit.limit) {
3558 3559
    return false;
  }
3560

3561 3562 3563
  // There are results not returned to client, fill operation applied to the remain result set in the
  // first place is required.
  int32_t remain = taosNumOfRemainRows(pFillInfo);
3564 3565 3566 3567
  if (remain > 0) {
    return true;
  }
  
3568
  /*
3569
   * While the code reaches here, there are no results returned to client now.
3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581
   * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
   * is retrieved from TSDB.
   *
   * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
   * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
   * first result row in the actual result set will fill nothing.
   */
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    TSKEY ekey = taosGetRevisedEndKey(pQuery->window.ekey, pQuery->order.order, pQuery->slidingTime,
                                         pQuery->slidingTimeUnit, pQuery->precision);
    int32_t numOfTotal = taosGetNumOfResultWithFill(pFillInfo, remain, ekey, pQuery->rec.capacity);
    return numOfTotal > 0;
3582
  }
3583 3584

  return false;
3585 3586 3587
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3588
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3589 3590
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3591

3592 3593 3594
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3595

weixin_48148422's avatar
weixin_48148422 已提交
3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3608 3609
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3610
    if (pQInfo->runtimeEnv.stableQuery) {
3611 3612 3613 3614
      if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3615 3616 3617
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3618
    }
H
hjxilinx 已提交
3619
  }
3620 3621
}

3622
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t numOfRows, int32_t *numOfInterpo) {
3623
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3624
  SQuery *pQuery = pRuntimeEnv->pQuery;
3625 3626
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3627
  while (1) {
3628
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3629
    
3630
    // todo apply limit output function
3631 3632
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3633
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3634 3635
      return ret;
    }
3636

3637
    if (pQuery->limit.offset < ret) {
3638 3639 3640
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%d. Discard due to offset, remain:%d, new offset:%d",
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3641 3642 3643
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3644 3645 3646
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3647
      }
3648
      
3649 3650 3651
      pQuery->limit.offset = 0;
      return ret;
    } else {
3652 3653 3654 3655
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%d. Discard due to offset, "
             "remain:%d, new offset:%d", pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
          pQuery->limit.offset - ret);
      
3656
      pQuery->limit.offset -= ret;
3657
      pQuery->rec.rows = 0;
3658 3659
      ret = 0;
    }
3660 3661

    if (!queryHasRemainResults(pRuntimeEnv)) {
3662 3663 3664 3665 3666
      return ret;
    }
  }
}

3667
void queryCostStatis(SQInfo *pQInfo) {
3668
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3669
//  SQuery *pQuery = pRuntimeEnv->pQuery;
3670

3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
//  qTrace("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
//  qTrace("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
//  qTrace(
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3689
  
3690 3691 3692 3693 3694 3695 3696 3697 3698 3699
  qTrace("QInfo:%p cost: check blocks:%d, statis:%d, rows:%"PRId64", check rows:%"PRId64, pQInfo, pSummary->dataBlocks,
         pSummary->loadBlockStatis, pSummary->dataInRows, pSummary->checkRows);

//  qTrace("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
//
//  qTrace("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qTrace("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3700 3701
  
  // todo add the intermediate result save cost!!
3702 3703 3704 3705 3706 3707 3708 3709
//  double computing = total - io;
//
//  qTrace(
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3710 3711
}

3712 3713
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3714 3715
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3716
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3717

3718
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3719
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3720 3721 3722
    pQuery->limit.offset = 0;
    return;
  }
3723

3724 3725 3726 3727 3728
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3729

3730
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3731

3732
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3733
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3734

3735 3736 3737 3738
  // update the pQuery->limit.offset value, and pQuery->pos value
  TSKEY *keys = (TSKEY *)pColInfoData->pData;

  // update the offset value
H
hjxilinx 已提交
3739
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3740
  pQuery->limit.offset = 0;
3741

H
hjxilinx 已提交
3742
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3743

3744 3745
  qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3746
}
3747

3748 3749 3750 3751 3752
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3753
  }
3754

3755 3756 3757
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3758
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3759
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3760

3761 3762 3763
  while (tsdbNextDataBlock(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      return;
3764
    }
3765

3766
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
3767

3768 3769
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3770 3771
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3772

3773
      qTrace("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3774 3775
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3776 3777 3778
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3779
  }
3780
}
3781

H
Haojun Liao 已提交
3782
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3783
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3784
  *start = pQuery->current->lastKey;
3785

3786
  // if queried with value filter, do NOT forward query start position
3787
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3788
    return true;
3789
  }
3790

3791 3792 3793 3794 3795
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3796
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3797

3798 3799 3800
  TSKEY       skey1, ekey1;
  STimeWindow w = TSWINDOW_INITIALIZER;
  
3801
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3802
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3803

3804 3805
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle);
3806

H
Haojun Liao 已提交
3807 3808 3809 3810 3811 3812 3813
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &skey1,
                                &ekey1, &w);
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3814 3815 3816
    } else {
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &skey1, &ekey1,
                              &w);
3817

3818 3819 3820
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3821

3822 3823
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3824

3825 3826 3827 3828 3829 3830
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3831

3832 3833
      STimeWindow tw = win;
      getNextTimeWindow(pQuery, &tw);
3834

3835
      if (pQuery->limit.offset == 0) {
3836 3837
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3838 3839
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3840 3841 3842
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

3843 3844 3845
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
3846 3847 3848 3849
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
3850 3851 3852 3853 3854 3855
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
3856
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
3857 3858
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
3859
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
3860 3861
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
3862 3863 3864
          qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
3865
          return true;
H
Haojun Liao 已提交
3866 3867 3868 3869
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
3870
          return true;
3871 3872 3873
        }
      }

H
Haojun Liao 已提交
3874 3875 3876 3877 3878 3879 3880
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
3893
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
3894 3895
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
3896
      } else {
H
Haojun Liao 已提交
3897
        break;  // offset is not 0, and next time window begins or ends in the next block.
3898 3899 3900
      }
    }
  }
3901

3902 3903 3904
  return true;
}

B
Bomin Zhang 已提交
3905 3906
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3907 3908
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
3909 3910 3911 3912 3913 3914 3915
  if (onlyQueryTags(pQuery)) {
    return;
  }

  if (isSTableQuery && (!isIntervalQuery(pQuery)) && (!isFixedOutputQuery(pQuery))) {
    return;
  }
3916 3917

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
3918 3919 3920 3921
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
3922
  };
weixin_48148422's avatar
weixin_48148422 已提交
3923

B
Bomin Zhang 已提交
3924 3925 3926 3927 3928 3929 3930 3931 3932 3933
  if (!isSTableQuery
    && (pQInfo->groupInfo.numOfTables == 1)
    && (cond.order == TSDB_ORDER_ASC) 
    && (!isIntervalQuery(pQuery))
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
    && (!isFixedOutputQuery(pQuery))
  ) {
    SArray* pa = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
    SGroupItem* pItem = taosArrayGet(pa, 0);
    cond.twindow = pItem->info->win;
3934
  }
B
Bomin Zhang 已提交
3935

H
Haojun Liao 已提交
3936 3937
  if (isFirstLastRowQuery(pQuery)) {
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableIdGroupInfo);
3938 3939
  } else if (isPointInterpoQuery(pQuery)) {
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableIdGroupInfo);
H
Haojun Liao 已提交
3940 3941 3942
  } else {
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableIdGroupInfo);
  }
B
Bomin Zhang 已提交
3943 3944
}

3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
3958
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
3959 3960 3961 3962 3963 3964 3965
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

3966
int32_t doInitQInfo(SQInfo *pQInfo, void *param, void *tsdb, int32_t vgId, bool isSTableQuery) {
3967 3968
  int32_t code = TSDB_CODE_SUCCESS;
  
3969 3970 3971
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3972
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
3973 3974 3975

  setScanLimitationByResultBuffer(pQuery);
  changeExecuteScanOrder(pQuery, false);
B
Bomin Zhang 已提交
3976
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
3977
  
3978
  pQInfo->tsdb = tsdb;
3979
  pQInfo->vgId = vgId;
3980 3981 3982

  pRuntimeEnv->pQuery = pQuery;
  pRuntimeEnv->pTSBuf = param;
3983
  pRuntimeEnv->cur.vgroupIndex = -1;
3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000
  pRuntimeEnv->stableQuery = isSTableQuery;

  if (param != NULL) {
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, isSTableQuery);

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4001
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

      if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // group by columns not tags;
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
    }

  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    int32_t rows = getInitialPageNum(pQInfo);
4020
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);

  /*
H
hjxilinx 已提交
4038 4039
   * in case of last_row query without query range, we set the query timestamp to be
   * STable->lastKey. Otherwise, keep the initial query time range unchanged.
4040
   */
H
hjxilinx 已提交
4041 4042 4043 4044 4045 4046 4047
//  if (isFirstLastRowQuery(pQuery)) {
//    if (!normalizeUnBoundLastRowQuery(pQInfo, &interpInfo)) {
//      sem_post(&pQInfo->dataReady);
//      pointInterpSupporterDestroy(&interpInfo);
//      return TSDB_CODE_SUCCESS;
//    }
//  }
4048

4049
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4050 4051 4052 4053 4054
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, 0, 0, pQuery->rec.capacity, pQuery->numOfOutput,
                                              pQuery->slidingTime, pQuery->fillType, pColInfo);
  }
  
4055 4056 4057
  return TSDB_CODE_SUCCESS;
}

4058
static UNUSED_FUNC bool isGroupbyEachTable(SSqlGroupbyExpr *pGroupbyExpr, STableGroupInfo *pSidset) {
4059 4060 4061
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
4062

4063
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
4064
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
4065 4066 4067 4068
    if (pColIndex->flag == TSDB_COL_TAG) {
      return true;
    }
  }
4069

4070 4071 4072
  return false;
}

4073
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4074
  SQuery *pQuery = pRuntimeEnv->pQuery;
4075

4076
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4077 4078 4079 4080 4081 4082 4083
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
hjxilinx 已提交
4084
static int64_t queryOnDataBlocks(SQInfo *pQInfo) {
4085 4086
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4087

H
hjxilinx 已提交
4088
  int64_t st = taosGetTimestampMs();
4089

4090 4091
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
  
4092
  while (tsdbNextDataBlock(pQueryHandle)) {
4093
    if (isQueryKilled(pQInfo)) {
4094 4095
      break;
    }
4096

4097
    SDataBlockInfo  blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
H
hjxilinx 已提交
4098
    STableQueryInfo *pTableQueryInfo = NULL;
4099

4100 4101
    // todo opt performance using hash table
    size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4102 4103 4104
    for (int32_t i = 0; i < numOfGroup; ++i) {
      SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);

4105
      size_t num = taosArrayGetSize(group);
4106
      for (int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
4107 4108
        SGroupItem *item = taosArrayGet(group, j);
        STableQueryInfo *pInfo = item->info;
4109

H
hjxilinx 已提交
4110 4111 4112
        if (pInfo->id.tid == blockInfo.tid) {
          assert(pInfo->id.uid == blockInfo.uid);
          pTableQueryInfo = item->info;
4113

4114 4115
          break;
        }
H
hjxilinx 已提交
4116
      }
4117

H
hjxilinx 已提交
4118 4119 4120
      if (pTableQueryInfo != NULL) {
        break;
      }
H
hjxilinx 已提交
4121
    }
H
hjxilinx 已提交
4122
  
4123
    assert(pTableQueryInfo != NULL);
4124
    restoreIntervalQueryRange(pRuntimeEnv, pTableQueryInfo);
4125
    printf("table:%d, groupIndex:%d, rows:%d\n", pTableQueryInfo->id.tid, pTableQueryInfo->groupIndex, blockInfo.tid);
4126

4127
    SDataStatis *pStatis = NULL;
H
hjxilinx 已提交
4128 4129
    
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
4130

4131
    if (!isIntervalQuery(pQuery)) {
H
Haojun Liao 已提交
4132
      int32_t step = QUERY_IS_ASC_QUERY(pQuery)? 1:-1;
4133
      setExecutionContext(pQInfo, &pTableQueryInfo->id, pTableQueryInfo->groupIndex, blockInfo.window.ekey + step);
4134
    } else {  // interval query
H
Haojun Liao 已提交
4135
      TSKEY nextKey = blockInfo.window.skey;
H
hjxilinx 已提交
4136
      setIntervalQueryRange(pQInfo, nextKey);
H
hjxilinx 已提交
4137
      int32_t ret = setAdditionalInfo(pQInfo, &pTableQueryInfo->id, pTableQueryInfo);
4138

4139
      if (ret != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4140 4141
        pQInfo->code = ret;
        return taosGetTimestampMs() - st;
4142 4143
      }
    }
4144

H
hjxilinx 已提交
4145
    stableApplyFunctionsOnBlock(pRuntimeEnv, pTableQueryInfo, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
4146
  }
4147

H
hjxilinx 已提交
4148 4149
  int64_t et = taosGetTimestampMs();
  return et - st;
4150 4151
}

4152 4153
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4154
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4155

4156
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4157
  SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
H
hjxilinx 已提交
4158
  SGroupItem* item = taosArrayGet(group, index);
4159

H
hjxilinx 已提交
4160
  setTagVal(pRuntimeEnv, &item->id, pQInfo->tsdb);
4161

S
slguan 已提交
4162
  qTrace("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
hjxilinx 已提交
4163
         item->id.uid, item->id.tid, item->info->lastKey, item->info->win.ekey);
4164

4165
  STsdbQueryCond cond = {
H
hjxilinx 已提交
4166 4167 4168
      .twindow   = {item->info->lastKey, item->info->win.ekey},
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4169
      .numOfCols = pQuery->numOfCols,
4170
  };
4171

H
hjxilinx 已提交
4172
  // todo refactor
4173
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
hjxilinx 已提交
4174
  SArray *tx = taosArrayInit(1, sizeof(STableId));
4175

H
hjxilinx 已提交
4176
  taosArrayPush(tx, &item->info->id);
4177
  taosArrayPush(g1, &tx);
4178
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4179

4180
  // include only current table
4181 4182 4183 4184
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4185

4186
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp);
4187 4188
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4189

4190
  if (pRuntimeEnv->pTSBuf != NULL) {
4191
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4192 4193
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4194

4195 4196 4197 4198 4199 4200 4201 4202
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4203

4204
  initCtxOutputBuf(pRuntimeEnv);
4205 4206 4207 4208 4209 4210 4211 4212 4213 4214
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4215
static void sequentialTableProcess(SQInfo *pQInfo) {
4216
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4217
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4218
  setQueryStatus(pQuery, QUERY_COMPLETED);
4219

4220
  size_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4221

H
Haojun Liao 已提交
4222
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4223 4224
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4225

4226 4227
    while (pQInfo->groupIndex < numOfGroups) {
      SArray* group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
4228

H
Haojun Liao 已提交
4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250
      qTrace("QInfo:%p last_row query on group:%d, total group:%d, current group:%d", pQInfo, pQInfo->groupIndex,
             numOfGroups);

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4251
      if (isFirstLastRowQuery(pQuery)) {
4252
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp);
H
Haojun Liao 已提交
4253 4254
      } else {
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp);
4255
      }
H
Haojun Liao 已提交
4256 4257
      
      initCtxOutputBuf(pRuntimeEnv);
4258 4259 4260 4261 4262 4263 4264 4265 4266
      
      SArray* s = tsdbGetQueriedTableIdList(pRuntimeEnv->pQueryHandle);
      assert(taosArrayGetSize(s) >= 1);
      
      setTagVal(pRuntimeEnv, (STableId*) taosArrayGet(s, 0), pQInfo->tsdb);
      
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282
      
      // here we simply set the first table as current table
      pQuery->current = ((SGroupItem*) taosArrayGet(group, 0))->info;
      scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4283 4284 4285
    }
  } else {
    /*
4286
     * 1. super table projection query, 2. group-by on normal columns query, 3. ts-comp query
4287 4288 4289
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4290
    if (pQInfo->groupIndex > 0) {
4291
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4292
      pQuery->rec.total += pQuery->rec.rows;
4293

4294
      if (pQuery->rec.rows > 0) {
4295 4296 4297
        return;
      }
    }
4298

4299 4300
    // all data have returned already
    if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
4301 4302
      return;
    }
4303

4304 4305
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4306 4307 4308 4309 4310

    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
    assert(taosArrayGetSize(group) == pQInfo->groupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->groupInfo.pGroupList));

4311
    while (pQInfo->tableIndex < pQInfo->groupInfo.numOfTables) {
4312
      if (isQueryKilled(pQInfo)) {
4313 4314
        return;
      }
4315

H
hjxilinx 已提交
4316
      SGroupItem *item = taosArrayGet(group, pQInfo->tableIndex);
H
hjxilinx 已提交
4317
      pQuery->current = item->info;
H
hjxilinx 已提交
4318
      
4319
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4320
        pQInfo->tableIndex++;
4321 4322
        continue;
      }
4323

H
hjxilinx 已提交
4324
      // TODO handle the limit offset problem
4325
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4326
        //        skipBlocks(pRuntimeEnv);
4327 4328
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4329 4330 4331
          continue;
        }
      }
4332

H
Haojun Liao 已提交
4333
      scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4334
      skipResults(pRuntimeEnv);
4335

4336
      // the limitation of output result is reached, set the query completed
4337
      if (limitResults(pRuntimeEnv)) {
4338
        pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
4339 4340
        break;
      }
4341

4342 4343
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4344

4345
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4346 4347 4348 4349 4350 4351
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4352
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4353 4354 4355 4356

        STableIdInfo tidInfo;
        tidInfo.uid = item->id.uid;
        tidInfo.tid = item->id.tid;
weixin_48148422's avatar
weixin_48148422 已提交
4357
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4358 4359
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4360
        // if the buffer is full or group by each table, we need to jump out of the loop
4361 4362
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4363 4364
          break;
        }
4365

4366
      } else {
4367
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4368 4369
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4370 4371
          continue;
        } else {
4372 4373 4374
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4375 4376 4377 4378
        }
      }
    }
  }
4379

4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4392
    finalizeQueryResult(pRuntimeEnv);
4393
  }
4394

4395 4396 4397
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4398

4399 4400 4401
  // todo refactor
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
4402

4403 4404 4405
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
      pStatus->closed = true;  // enable return all results for group by normal columns
4406

4407
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
4408
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4409 4410 4411
        pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
      }
    }
4412

4413
    pQInfo->groupIndex = 0;
4414
    pQuery->rec.rows = 0;
4415 4416
    copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);
  }
4417 4418

  qTrace(
H
Haojun Liao 已提交
4419
      "QInfo %p numOfTables:%d, index:%d, numOfGroups:%d, %d points returned, total:%"PRId64", offset:%" PRId64,
4420 4421
      pQInfo, pQInfo->groupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
      pQuery->limit.offset);
4422 4423
}

4424 4425 4426 4427
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4428 4429 4430 4431
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4432
  if (pRuntimeEnv->pTSBuf != NULL) {
4433
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4434
  }
4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
  
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo);
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4453 4454
}

4455 4456 4457 4458
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4459
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4460

4461
  if (pRuntimeEnv->pTSBuf != NULL) {
4462
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4463
  }
4464

4465
  switchCtxOrder(pRuntimeEnv);
4466 4467 4468
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4469 4470 4471
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
4472
  if (isIntervalQuery(pQuery)) {
4473
    size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4474 4475 4476
    for (int32_t i = 0; i < numOfGroup; ++i) {
      SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);

4477
      size_t num = taosArrayGetSize(group);
4478
      for (int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
4479 4480
        SGroupItem* item = taosArrayGet(group, j);
        closeAllTimeWindow(&item->info->windowResInfo);
4481
      }
H
hjxilinx 已提交
4482 4483 4484 4485 4486 4487 4488
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4489 4490 4491
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4492
  if (pQInfo->groupIndex > 0) {
4493
    /*
4494
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4495 4496
     * copy the data into output buffer
     */
H
hjxilinx 已提交
4497
    if (isIntervalQuery(pQuery)) {
4498 4499 4500
      copyResToQueryResultBuf(pQInfo, pQuery);

#ifdef _DEBUG_VIEW
4501
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4502 4503 4504 4505
#endif
    } else {
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
    }
4506

4507
    if (pQuery->rec.rows == 0) {
4508
      //      queryCostStatis(pSupporter);
4509
    }
4510

S
slguan 已提交
4511
    qTrace("QInfo:%p current:%lld, total:%lld", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4512 4513
    return;
  }
4514 4515 4516 4517

  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4518 4519
  // do check all qualified data blocks
  int64_t el = queryOnDataBlocks(pQInfo);
H
hjxilinx 已提交
4520
  qTrace("QInfo:%p master scan completed, elapsed time: %lldms, reverse scan start", pQInfo, el);
4521

H
hjxilinx 已提交
4522 4523
  // query error occurred or query is killed, abort current execution
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
S
slguan 已提交
4524
    qTrace("QInfo:%p query killed or error occurred, code:%d, abort", pQInfo, pQInfo->code);
H
hjxilinx 已提交
4525
    return;
4526
  }
4527

H
hjxilinx 已提交
4528 4529
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4530

H
hjxilinx 已提交
4531 4532
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4533

H
hjxilinx 已提交
4534
    el = queryOnDataBlocks(pQInfo);
S
slguan 已提交
4535
    qTrace("QInfo:%p reversed scan completed, elapsed time: %lldms", pQInfo, el);
4536

H
hjxilinx 已提交
4537 4538
    doRestoreContext(pQInfo);
  } else {
S
slguan 已提交
4539
    qTrace("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4540
  }
4541

4542
  setQueryStatus(pQuery, QUERY_COMPLETED);
4543

H
hjxilinx 已提交
4544
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
S
slguan 已提交
4545
    qTrace("QInfo:%p query killed or error occurred, code:%d, abort", pQInfo, pQInfo->code);
H
hjxilinx 已提交
4546 4547
    return;
  }
4548

H
hjxilinx 已提交
4549
  if (isIntervalQuery(pQuery) || isSumAvgRateQuery(pQuery)) {
4550
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4551
      copyResToQueryResultBuf(pQInfo, pQuery);
4552 4553

#ifdef _DEBUG_VIEW
4554
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4555 4556 4557 4558 4559
#endif
    }
  } else {  // not a interval query
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
  }
4560

4561
  // handle the limitation of output buffer
S
slguan 已提交
4562
  qTrace("QInfo:%p points returned:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4563 4564 4565 4566 4567 4568 4569 4570
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4571
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4572
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4573 4574
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4575 4576 4577 4578
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4579 4580
  pQuery->current = pTableInfo;  // set current query table info
  
H
Haojun Liao 已提交
4581
  scanAllDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4582
  finalizeQueryResult(pRuntimeEnv);
4583

4584
  if (isQueryKilled(pQInfo)) {
4585 4586
    return;
  }
4587

H
Haojun Liao 已提交
4588
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4589
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4590

4591
  skipResults(pRuntimeEnv);
4592
  limitResults(pRuntimeEnv);
4593 4594
}

H
hjxilinx 已提交
4595
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4596
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4597 4598 4599 4600
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4601 4602 4603 4604
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4605

4606 4607 4608 4609 4610 4611
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4612 4613

  while (1) {
H
Haojun Liao 已提交
4614
    scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4615
    finalizeQueryResult(pRuntimeEnv);
4616

4617
    if (isQueryKilled(pQInfo)) {
4618 4619 4620
      return;
    }

4621 4622
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4623
      skipResults(pRuntimeEnv);
4624 4625 4626
    }

    /*
H
hjxilinx 已提交
4627 4628
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4629
     */
4630
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4631 4632 4633
      break;
    }

S
slguan 已提交
4634
    qTrace("QInfo:%p vid:%d sid:%d id:%s, skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
H
hjxilinx 已提交
4635
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey);
4636 4637 4638 4639

    resetCtxOutputBuf(pRuntimeEnv);
  }

4640
  limitResults(pRuntimeEnv);
4641
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
hjxilinx 已提交
4642 4643
    qTrace("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4644 4645 4646 4647 4648 4649
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
    tidInfo.uid = pQuery->current->id.uid;
    tidInfo.tid = pQuery->current->id.tid;
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4650 4651
  }

4652 4653 4654
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4655 4656
}

H
Haojun Liao 已提交
4657
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4658
  SQuery *pQuery = pRuntimeEnv->pQuery;
4659

4660
  while (1) {
H
Haojun Liao 已提交
4661
    scanAllDataBlocks(pRuntimeEnv, start);
4662

4663
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
4664 4665
      return;
    }
4666

4667
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4668
    finalizeQueryResult(pRuntimeEnv);
4669

4670 4671 4672
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4673
        pQuery->fillType == TSDB_FILL_NONE) {
4674 4675
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4676

4677 4678 4679 4680
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4681

4682
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4683 4684 4685 4686 4687
      break;
    }
  }
}

4688
// handle time interval query on table
H
hjxilinx 已提交
4689
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4690 4691
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4692 4693
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4694

H
Haojun Liao 已提交
4695 4696 4697
  int32_t numOfInterpo = 0;
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4698
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4699
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4700
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4701 4702 4703 4704
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4705
  while (1) {
H
Haojun Liao 已提交
4706
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4707

H
hjxilinx 已提交
4708
    if (isIntervalQuery(pQuery)) {
4709
      pQInfo->groupIndex = 0;  // always start from 0
4710
      pQuery->rec.rows = 0;
4711
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4712

4713
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4714
    }
4715

4716
    // the offset is handled at prepare stage if no interpolation involved
4717
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4718
      limitResults(pRuntimeEnv);
4719 4720
      break;
    } else {
4721 4722 4723 4724
      TSKEY ekey = taosGetRevisedEndKey(pQuery->window.ekey, pQuery->order.order, pQuery->slidingTime,
                                        pQuery->slidingTimeUnit, pQuery->precision);
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, ekey);
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
4725
      numOfInterpo = 0;
4726
      
4727
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, pQuery->rec.rows, &numOfInterpo);
4728
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4729
        limitResults(pRuntimeEnv);
4730 4731
        break;
      }
4732

4733
      // no result generated yet, continue retrieve data
4734
      pQuery->rec.rows = 0;
4735 4736
    }
  }
4737

4738 4739
  // all data scanned, the group by normal column can return
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // todo refactor with merge interval time result
4740
    pQInfo->groupIndex = 0;
4741
    pQuery->rec.rows = 0;
4742
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4743
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4744
  }
4745

4746 4747 4748
  pQInfo->pointsInterpo += numOfInterpo;
}

4749 4750 4751 4752
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4753
  if (queryHasRemainResults(pRuntimeEnv)) {
4754 4755 4756 4757 4758
    /*
     * There are remain results that are not returned due to result interpolation
     * So, we do keep in this procedure instead of launching retrieve procedure for next results.
     */
    int32_t numOfInterpo = 0;
4759 4760 4761 4762
    int32_t remain = taosNumOfRemainRows(pRuntimeEnv->pFillInfo);
    pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, remain, &numOfInterpo);
  
    if (pQuery->rec.rows > 0) {
4763
      limitResults(pRuntimeEnv);
4764 4765
    }
    
S
slguan 已提交
4766
    qTrace("QInfo:%p current:%d returned, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4767 4768
    return;
  }
4769

4770
  // here we have scan all qualified data in both data file and cache
H
hjxilinx 已提交
4771
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4772 4773
    // continue to get push data from the group result
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr) ||
4774
        ((isIntervalQuery(pQuery) && pQuery->rec.total < pQuery->limit.limit))) {
4775
      // todo limit the output for interval query?
4776
      pQuery->rec.rows = 0;
4777
      pQInfo->groupIndex = 0;  // always start from 0
4778

4779 4780
      if (pRuntimeEnv->windowResInfo.size > 0) {
        copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4781
        pQuery->rec.rows += pQuery->rec.rows;
4782

4783
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4784

4785
        if (pQuery->rec.rows > 0) {
S
slguan 已提交
4786
          qTrace("QInfo:%p %d rows returned from group results, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4787 4788 4789 4790
          return;
        }
      }
    }
4791

S
slguan 已提交
4792
    qTrace("QInfo:%p query over, %d rows are returned", pQInfo, pQuery->rec.total);
4793
    queryCostStatis(pQInfo);
4794 4795
    return;
  }
4796

H
hjxilinx 已提交
4797
  // number of points returned during this query
4798
  pQuery->rec.rows = 0;
4799
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
4800 4801 4802 4803 4804
  
  assert(pQInfo->groupInfo.numOfTables == 1);
  SArray* g = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
  SGroupItem* item = taosArrayGet(g, 0);
  
4805
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
[td-98]  
hjxilinx 已提交
4806
  if (isIntervalQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // interval (down sampling operation)
H
hjxilinx 已提交
4807
    tableIntervalProcess(pQInfo, item->info);
4808
  } else if (isFixedOutputQuery(pQuery)) {
H
hjxilinx 已提交
4809
    tableFixedOutputProcess(pQInfo, item->info);
4810 4811
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
H
hjxilinx 已提交
4812
    tableMultiOutputProcess(pQInfo, item->info);
4813
  }
4814

4815 4816
  // record the total elapsed time
  pQInfo->elapsedTime += (taosGetTimestampUs() - st);
4817
  assert(pQInfo->groupInfo.numOfTables == 1);
4818

4819
  /* check if query is killed or not */
4820
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
4821
    qTrace("QInfo:%p query is killed", pQInfo);
H
hjxilinx 已提交
4822 4823 4824
  } else {// todo set the table uid and tid in log
    qTrace("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
        pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4825 4826 4827 4828
    
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      queryCostStatis(pQInfo);
    }
4829 4830 4831
  }
}

4832 4833
static void stableQueryImpl(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4834
  pQuery->rec.rows = 0;
4835

4836
  int64_t st = taosGetTimestampUs();
4837

H
hjxilinx 已提交
4838
  if (isIntervalQuery(pQuery) ||
4839 4840
      (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) &&
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
4841
    multiTableQueryProcess(pQInfo);
4842
  } else {
4843
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
4844
            isFirstLastRowQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr));
4845

4846
    sequentialTableProcess(pQInfo);
4847
  }
4848

H
hjxilinx 已提交
4849
  // record the total elapsed time
4850
  pQInfo->elapsedTime += (taosGetTimestampUs() - st);
4851
  //  taosFillSetStartInfo(&pQInfo->runtimeEnv.pFillInfo, pQuery->size, pQInfo->query.fillType);
4852

4853
  if (pQuery->rec.rows == 0) {
4854 4855
    qTrace("QInfo:%p over, %d tables queried, %d points are returned", pQInfo, pQInfo->groupInfo.numOfTables,
           pQuery->rec.total);
4856
    //    queryCostStatis(pSupporter);
4857
  }
H
hjxilinx 已提交
4858 4859
}

4860
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
4861
  int32_t j = 0;
4862

4863 4864 4865 4866 4867
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
4868

4869 4870
      j += 1;
    }
4871

4872 4873 4874 4875 4876
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
4877

4878
      j += 1;
4879 4880 4881
    }
  }

4882
  assert(0);
4883 4884
}

4885 4886 4887
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
4888 4889
}

4890
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
4891
  if (pQueryMsg->intervalTime < 0) {
4892
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
4893
    return false;
4894 4895
  }

H
hjxilinx 已提交
4896
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
4897
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
4898
    return false;
4899 4900
  }

H
hjxilinx 已提交
4901
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
4902
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
4903
    return false;
4904 4905
  }

4906 4907
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
4908
    return false;
4909 4910
  }

4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
      if (pExprMsg[i]->functionId != TSDB_FUNC_TAGPRJ) {
        return false;
      }
    }
  }
4926

4927
  return true;
4928 4929
}

4930
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
4931
  assert(pQueryMsg->numOfTables > 0);
4932

weixin_48148422's avatar
weixin_48148422 已提交
4933
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
4934

weixin_48148422's avatar
weixin_48148422 已提交
4935 4936
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
4937

4938
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
4939 4940
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
4941

H
hjxilinx 已提交
4942 4943 4944
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
4945

H
hjxilinx 已提交
4946 4947
  return pMsg;
}
4948

4949
/**
H
hjxilinx 已提交
4950
 * pQueryMsg->head has been converted before this function is called.
4951
 *
H
hjxilinx 已提交
4952
 * @param pQueryMsg
4953 4954 4955 4956
 * @param pTableIdList
 * @param pExpr
 * @return
 */
4957
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
4958
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
4959 4960 4961 4962 4963 4964 4965 4966
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
4967

4968 4969 4970
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
  pQueryMsg->queryType = htons(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
4971
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
4972 4973

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
4974
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
4975
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
4976 4977 4978
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
4979
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
4980
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
4981
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
4982

4983
  // query msg safety check
4984
  if (!validateQueryMsg(pQueryMsg)) {
4985 4986 4987
    return TSDB_CODE_INVALID_QUERY_MSG;
  }

H
hjxilinx 已提交
4988
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
4989

H
hjxilinx 已提交
4990
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
4991 4992
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
4993
    pColInfo->colId = htons(pColInfo->colId);
4994
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
4995 4996
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
4997

H
hjxilinx 已提交
4998
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
4999

H
hjxilinx 已提交
5000
    int32_t numOfFilters = pColInfo->numOfFilters;
5001
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5002
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5003 5004 5005
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5006 5007 5008 5009
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5010 5011 5012

      pMsg += sizeof(SColumnFilterInfo);

5013 5014
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5015

5016
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5017 5018
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5019
      } else {
5020 5021
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5022 5023
      }

5024 5025
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5026 5027 5028
    }
  }

5029 5030
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5031

5032
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5033
    (*pExpr)[i] = pExprMsg;
5034

5035
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5036 5037 5038 5039
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5040

5041
    pMsg += sizeof(SSqlFuncMsg);
5042 5043

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5044
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5045 5046 5047 5048
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5049
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5050 5051 5052 5053 5054
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

5055
    if (pExprMsg->functionId == TSDB_FUNC_TAG || pExprMsg->functionId == TSDB_FUNC_TAGPRJ ||
5056 5057 5058 5059 5060
               pExprMsg->functionId == TSDB_FUNC_TAG_DUMMY) {
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
        return TSDB_CODE_INVALID_QUERY_MSG;
      }
    } else {
5061 5062 5063
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_INVALID_QUERY_MSG;
//      }
5064 5065
    }

5066
    pExprMsg = (SSqlFuncMsg *)pMsg;
5067
  }
5068

5069 5070
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
    tfree(*pExpr);
5071

5072 5073
    return TSDB_CODE_INVALID_QUERY_MSG;
  }
5074

H
hjxilinx 已提交
5075
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5076

H
hjxilinx 已提交
5077
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5078 5079 5080 5081
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5082
      pMsg += sizeof((*groupbyCols)[i].colId);
5083 5084

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5085 5086
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5087
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5088 5089 5090 5091 5092
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5093

H
hjxilinx 已提交
5094 5095
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5096 5097
  }

5098 5099
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5100
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5101 5102

    int64_t *v = (int64_t *)pMsg;
5103
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5104 5105
      v[i] = htobe64(v[i]);
    }
5106

5107
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5108
  }
5109

5110 5111 5112 5113
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5114

5115 5116 5117 5118
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5119

5120
      (*tagCols)[i] = *pTagCol;
5121
      pMsg += sizeof(SColumnInfo);
5122
    }
H
hjxilinx 已提交
5123
  }
5124

5125 5126 5127 5128 5129 5130
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5131

weixin_48148422's avatar
weixin_48148422 已提交
5132
  if (*pMsg != 0) {
5133 5134
    size_t len = strlen(pMsg) + 1;
    *tbnameCond = malloc(len);
weixin_48148422's avatar
weixin_48148422 已提交
5135
    strcpy(*tbnameCond, pMsg);
5136
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5137
  }
5138

5139 5140
  qTrace("qmsg:%p query %d tables, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, limit:%" PRId64 ", offset:%" PRId64,
5141
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5142
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
5143
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->limit, pQueryMsg->offset);
5144 5145 5146 5147

  return 0;
}

H
hjxilinx 已提交
5148
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5149
  qTrace("qmsg:%p create arithmetic expr from binary string", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5150 5151 5152 5153 5154 5155 5156 5157 5158

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5159
  if (pExprNode == NULL) {
5160
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5161 5162
    return TSDB_CODE_APP_ERROR;
  }
5163

5164
  pArithExprInfo->pExpr = pExprNode;
5165 5166 5167
  return TSDB_CODE_SUCCESS;
}

5168 5169 5170
static int32_t createSqlFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5171
  int32_t code = TSDB_CODE_SUCCESS;
5172

H
hjxilinx 已提交
5173
  SExprInfo *pExprs = (SExprInfo *)calloc(1, sizeof(SExprInfo) * pQueryMsg->numOfOutput);
5174 5175 5176 5177 5178 5179 5180
  if (pExprs == NULL) {
    return TSDB_CODE_SERV_OUT_OF_MEMORY;
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5181
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5182
    pExprs[i].base = *pExprMsg[i];
5183
    pExprs[i].bytes = 0;
5184 5185 5186 5187

    int16_t type = 0;
    int16_t bytes = 0;

5188
    // parse the arithmetic expression
5189
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5190
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5191

5192 5193 5194
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5195 5196
      }

5197
      type  = TSDB_DATA_TYPE_DOUBLE;
5198
      bytes = tDataTypeDesc[type].nSize;
5199
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {  // parse the normal column
5200
      type  = TSDB_DATA_TYPE_BINARY;
H
hjxilinx 已提交
5201
      bytes = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE;
5202
    } else{
5203
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
H
hjxilinx 已提交
5204
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
hjxilinx 已提交
5205

5206
      SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
5207 5208
      type = pCol->type;
      bytes = pCol->bytes;
5209 5210
    }

5211 5212
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5213
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5214 5215 5216 5217
      tfree(pExprs);
      return TSDB_CODE_INVALID_QUERY_MSG;
    }

5218
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5219
      tagLen += pExprs[i].bytes;
5220
    }
5221
    assert(isValidDataType(pExprs[i].type, pExprs[i].bytes));
5222 5223 5224
  }

  // TODO refactor
5225
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5226 5227
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5228

5229
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5230
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5231 5232 5233 5234 5235
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5236
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5237
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5238 5239 5240 5241
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }

5242
  tfree(pExprMsg);
5243
  *pExprInfo = pExprs;
5244 5245 5246 5247

  return TSDB_CODE_SUCCESS;
}

5248
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5249 5250 5251 5252 5253
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5254
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5255 5256 5257 5258 5259 5260 5261 5262 5263
  if (pGroupbyExpr == NULL) {
    *code = TSDB_CODE_SERV_OUT_OF_MEMORY;
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5264 5265 5266 5267
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5268

5269 5270 5271
  return pGroupbyExpr;
}

5272
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5273
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5274
    if (pQuery->colList[i].numOfFilters > 0) {
5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5286
    if (pQuery->colList[i].numOfFilters > 0) {
5287 5288
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

H
hjxilinx 已提交
5289
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfoData));
5290
      pFilterInfo->info = pQuery->colList[i];
5291

5292
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5293 5294 5295 5296
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5297
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5298 5299 5300 5301 5302

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5303
          qError("QInfo:%p invalid filter info", pQInfo);
5304 5305 5306
          return TSDB_CODE_INVALID_QUERY_MSG;
        }

5307 5308
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5309

5310 5311 5312
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5313 5314

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5315
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5316 5317 5318
          return TSDB_CODE_INVALID_QUERY_MSG;
        }

5319
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5320
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
5321
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
S
slguan 已提交
5339
              qError("pQInfo:%p failed to get filter function, invalid filter condition", pQInfo, type);
5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356
              return TSDB_CODE_INVALID_QUERY_MSG;
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5357
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5358
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5359

5360
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5361
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5362
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5363 5364
      continue;
    }
5365

5366
    // todo opt performance
H
Haojun Liao 已提交
5367 5368
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5369 5370
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5371 5372
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5373 5374 5375
          break;
        }
      }
5376 5377
      
      assert (f < pQuery->numOfCols);
5378
    } else {
5379 5380
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5381 5382
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5383 5384
          break;
        }
5385
      }
5386 5387
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5388 5389 5390 5391
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5392 5393 5394 5395 5396 5397 5398 5399 5400 5401

static int compareTableIdInfo( const void* a, const void* b ) {
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5402
                               STableGroupInfo *groupInfo, SColumnInfo* pTagCols) {
5403 5404
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5405
    return NULL;
5406 5407 5408 5409 5410 5411
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5412
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5413

5414
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5415
  pQuery->numOfOutput     = numOfOutput;
5416 5417 5418
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5419
  pQuery->order.orderColId = pQueryMsg->orderColId;
5420 5421 5422 5423
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5424
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5425
  pQuery->fillType        = pQueryMsg->fillType;
5426
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5427
  
5428
  // todo do not allocate ??
5429
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5430
  if (pQuery->colList == NULL) {
5431
    goto _cleanup;
5432
  }
5433

H
hjxilinx 已提交
5434
  for (int16_t i = 0; i < numOfCols; ++i) {
5435
    pQuery->colList[i] = pQueryMsg->colList[i];
5436
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5437
  }
5438

5439
  pQuery->tagColList = pTagCols;
5440

5441
  // calculate the result row size
5442 5443 5444
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5445
  }
5446

5447
  doUpdateExprColumnIndex(pQuery);
5448

5449
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5450
  if (ret != TSDB_CODE_SUCCESS) {
5451
    goto _cleanup;
5452 5453 5454
  }

  // prepare the result buffer
5455
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5456
  if (pQuery->sdata == NULL) {
5457
    goto _cleanup;
5458 5459
  }

H
hjxilinx 已提交
5460
  // set the output buffer capacity
H
hjxilinx 已提交
5461
  pQuery->rec.capacity = 4096;
5462
  pQuery->rec.threshold = 4000;
5463

5464
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5465
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5466 5467

    // allocate additional memory for interResults that are usually larger then final results
5468 5469
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5470
    if (pQuery->sdata[col] == NULL) {
5471
      goto _cleanup;
5472 5473 5474
    }
  }

5475
  if (pQuery->fillType != TSDB_FILL_NONE) {
5476 5477
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5478
      goto _cleanup;
5479 5480 5481
    }

    // the first column is the timestamp
5482
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5483 5484 5485
  }

  // to make sure third party won't overwrite this structure
5486
  pQInfo->signature = pQInfo;
5487

H
hjxilinx 已提交
5488 5489
  pQInfo->tableIdGroupInfo = *groupInfo;
  size_t numOfGroups = taosArrayGetSize(groupInfo->pGroupList);
5490

H
hjxilinx 已提交
5491 5492 5493
  pQInfo->groupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
  pQInfo->groupInfo.numOfTables = groupInfo->numOfTables;
  
weixin_48148422's avatar
weixin_48148422 已提交
5494 5495
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5496
  taosArraySort(pTableIdList, compareTableIdInfo);
5497

H
hjxilinx 已提交
5498 5499 5500
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray* pa = taosArrayGetP(groupInfo->pGroupList, i);
    size_t s = taosArrayGetSize(pa);
5501

H
hjxilinx 已提交
5502
    SArray* p1 = taosArrayInit(s, sizeof(SGroupItem));
5503

H
hjxilinx 已提交
5504
    for(int32_t j = 0; j < s; ++j) {
weixin_48148422's avatar
weixin_48148422 已提交
5505 5506 5507
      STableId id = *(STableId*) taosArrayGet(pa, j);
      SGroupItem item = { .id = id };
      // NOTE: compare STableIdInfo with STableId
5508
      STableIdInfo* pTableId = taosArraySearch( pTableIdList, &id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5509 5510 5511
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5512
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5513 5514
      }
      item.info = createTableQueryInfo(&pQInfo->runtimeEnv, item.id, window);
5515
      item.info->groupIndex = i;
weixin_48148422's avatar
weixin_48148422 已提交
5516
      item.info->tableIndex = tableIndex++;
H
hjxilinx 已提交
5517 5518
      taosArrayPush(p1, &item);
    }
5519

H
hjxilinx 已提交
5520 5521
    taosArrayPush(pQInfo->groupInfo.pGroupList, &p1);
  }
5522

weixin_48148422's avatar
weixin_48148422 已提交
5523 5524
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5525
  pQuery->pos = -1;
5526
  pQuery->window = pQueryMsg->window;
5527

5528
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
S
slguan 已提交
5529
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, strerror(errno));
5530
    goto _cleanup;
5531
  }
5532

5533
  colIdCheck(pQuery);
5534

S
slguan 已提交
5535
  qTrace("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5536 5537
  return pQInfo;

5538
_cleanup:
5539
  tfree(pQuery->fillVal);
5540 5541

  if (pQuery->sdata != NULL) {
5542
    for (int16_t col = 0; col < pQuery->numOfOutput; ++col) {
5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558
      tfree(pQuery->sdata[col]);
    }
  }

  tfree(pQuery->sdata);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);

  tfree(pExprs);
  tfree(pGroupbyExpr);

  tfree(pQInfo);

  return NULL;
}

H
hjxilinx 已提交
5559
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5560 5561 5562 5563
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5564

H
hjxilinx 已提交
5565 5566 5567 5568
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5569
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5570 5571 5572
  return (sig == (uint64_t)pQInfo);
}

H
hjxilinx 已提交
5573 5574
static void freeQInfo(SQInfo *pQInfo);

5575
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
5576
  int32_t code = TSDB_CODE_SUCCESS;
5577
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5578

H
hjxilinx 已提交
5579 5580 5581 5582
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
    char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset;
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5583

H
hjxilinx 已提交
5584 5585 5586
    tsBufResetPos(pTSBuf);
    tsBufNextPos(pTSBuf);
  }
5587

5588 5589 5590
  // only the successful complete requries the sem_post/over = 1 operations.
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
S
slguan 已提交
5591
    qTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5592
           pQuery->window.ekey, pQuery->order.order);
5593
    setQueryStatus(pQuery, QUERY_COMPLETED);
5594

5595 5596 5597
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5598 5599 5600 5601 5602 5603 5604 5605
  
  if (pQInfo->groupInfo.numOfTables == 0) {
    qTrace("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5606 5607

  // filter the qualified
5608
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5609 5610
    goto _error;
  }
H
hjxilinx 已提交
5611
  
H
hjxilinx 已提交
5612 5613 5614 5615
  return code;

_error:
  // table query ref will be decrease during error handling
5616
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5617 5618 5619 5620 5621 5622 5623
  return code;
}

static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5624 5625

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5626
  setQueryKilled(pQInfo);
5627

S
slguan 已提交
5628
  qTrace("QInfo:%p start to free QInfo", pQInfo);
5629
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5630 5631
    tfree(pQuery->sdata[col]);
  }
5632

H
hjxilinx 已提交
5633
  sem_destroy(&(pQInfo->dataReady));
5634
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5635

H
hjxilinx 已提交
5636 5637 5638 5639 5640 5641
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5642

H
hjxilinx 已提交
5643
  if (pQuery->pSelectExpr != NULL) {
5644
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5645
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5646

H
hjxilinx 已提交
5647 5648 5649
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5650
    }
5651

H
hjxilinx 已提交
5652 5653
    tfree(pQuery->pSelectExpr);
  }
5654

5655 5656
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5657
  }
5658

5659
  // todo refactor, extract method to destroytableDataInfo
H
hjxilinx 已提交
5660
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
5661 5662
  for (int32_t i = 0; i < numOfGroups; ++i) {
    SArray *p = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
5663

5664 5665
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
5666 5667 5668
      SGroupItem* item = taosArrayGet(p, j);
      if (item->info != NULL) {
        destroyTableQueryInfo(item->info, pQuery->numOfOutput);
5669 5670
      }
    }
5671

H
hjxilinx 已提交
5672 5673
    taosArrayDestroy(p);
  }
5674

H
hjxilinx 已提交
5675
  taosArrayDestroy(pQInfo->groupInfo.pGroupList);
5676

H
hjxilinx 已提交
5677 5678 5679 5680
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray* p = taosArrayGetP(pQInfo->tableIdGroupInfo.pGroupList, i);
    taosArrayDestroy(p);
  }
5681

H
hjxilinx 已提交
5682
  taosArrayDestroy(pQInfo->tableIdGroupInfo.pGroupList);
weixin_48148422's avatar
weixin_48148422 已提交
5683
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5684
  
5685 5686 5687 5688
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5689

5690 5691 5692 5693
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);
  tfree(pQuery->sdata);
5694

5695
  tfree(pQuery);
5696

S
slguan 已提交
5697
  qTrace("QInfo:%p QInfo is freed", pQInfo);
5698

5699
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5700 5701 5702 5703
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5704
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5705 5706
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5718
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5719 5720 5721 5722
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5723
  }
H
hjxilinx 已提交
5724
}
5725

H
hjxilinx 已提交
5726 5727 5728
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5729

H
hjxilinx 已提交
5730 5731 5732
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5733

H
hjxilinx 已提交
5734 5735 5736
    // make sure file exist
    if (FD_VALID(fd)) {
      size_t s = lseek(fd, 0, SEEK_END);
S
slguan 已提交
5737
      qTrace("QInfo:%p ts comp data return, file:%s, size:%zu", pQInfo, pQuery->sdata[0]->data, s);
5738

H
hjxilinx 已提交
5739 5740 5741
      lseek(fd, 0, SEEK_SET);
      read(fd, data, s);
      close(fd);
5742

H
hjxilinx 已提交
5743 5744
      unlink(pQuery->sdata[0]->data);
    } else {
H
hjxilinx 已提交
5745
      // todo return the error code to client
S
slguan 已提交
5746
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5747 5748
             pQuery->sdata[0]->data, strerror(errno));
    }
5749

H
hjxilinx 已提交
5750 5751 5752 5753
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5754
  } else {
5755
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5756
  }
5757

5758
  pQuery->rec.total += pQuery->rec.rows;
5759
  qTrace("QInfo:%p current numOfRes rows:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5760

5761 5762 5763 5764 5765
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
    qTrace("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
5766
  return TSDB_CODE_SUCCESS;
5767

H
hjxilinx 已提交
5768
  // todo if interpolation exists, the result may be dump to client by several rounds
5769 5770
}

5771
int32_t qCreateQueryInfo(void *tsdb, int32_t vgId, SQueryTableMsg *pQueryMsg, qinfo_t *pQInfo) {
H
hjxilinx 已提交
5772
  assert(pQueryMsg != NULL);
5773 5774

  int32_t code = TSDB_CODE_SUCCESS;
5775

weixin_48148422's avatar
weixin_48148422 已提交
5776
  char *        tagCond = NULL, *tbnameCond = NULL;
5777
  SArray *      pTableIdList = NULL;
5778
  SSqlFuncMsg **pExprMsg = NULL;
5779 5780
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
5781

weixin_48148422's avatar
weixin_48148422 已提交
5782
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
5783
         TSDB_CODE_SUCCESS) {
5784 5785 5786
    return code;
  }

H
hjxilinx 已提交
5787
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5788
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
5789
    code = TSDB_CODE_INVALID_QUERY_MSG;
H
hjxilinx 已提交
5790
    goto _over;
5791 5792
  }

H
hjxilinx 已提交
5793
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
5794
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
5795
    code = TSDB_CODE_INVALID_QUERY_MSG;
H
hjxilinx 已提交
5796
    goto _over;
5797 5798
  }

H
hjxilinx 已提交
5799
  SExprInfo *pExprs = NULL;
5800
  if ((code = createSqlFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5801
    goto _over;
5802 5803
  }

5804
  SSqlGroupbyExpr *pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
5805
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5806
    goto _over;
5807
  }
5808

H
hjxilinx 已提交
5809
  bool isSTableQuery = false;
H
hjxilinx 已提交
5810
  STableGroupInfo groupInfo = {0};
5811
  
H
hjxilinx 已提交
5812
  //todo multitable_query??
5813 5814 5815
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_TABLE_QUERY)) {
    isSTableQuery = TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY);
    
weixin_48148422's avatar
weixin_48148422 已提交
5816
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
5817 5818
    qTrace("qmsg:%p query table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
    
5819
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &groupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5820
      goto _over;
5821 5822
    }
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_STABLE_QUERY)) {
5823
    isSTableQuery = true;
weixin_48148422's avatar
weixin_48148422 已提交
5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848
    // TODO: need a macro from TSDB to check if table is super table,
    // also note there's possiblity that only one table in the super table
    if (taosArrayGetSize(pTableIdList) == 1) {
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);
      // if array size is 1 and assert super table

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
      // todo handle the error
      /*int32_t ret =*/tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &groupInfo, pGroupColIndex,
                                          numOfGroupByCols);
    } else {
      SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);

      SArray* sa = taosArrayInit(groupInfo.numOfTables, sizeof(STableId));
      for(int32_t i = 0; i < groupInfo.numOfTables; ++i) {
        STableIdInfo* tableId = taosArrayGet(pTableIdList, i);
        taosArrayPush(sa, tableId);
      }
      taosArrayPush(pTableGroup, &sa);
      groupInfo.pGroupList = pTableGroup;
5849
    }
H
hjxilinx 已提交
5850
  } else {
5851
    assert(0);
5852
  }
5853

weixin_48148422's avatar
weixin_48148422 已提交
5854
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &groupInfo, pTagColumnInfo);
5855 5856
  if ((*pQInfo) == NULL) {
    code = TSDB_CODE_SERV_OUT_OF_MEMORY;
H
hjxilinx 已提交
5857
    goto _over;
5858
  }
5859

5860
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
5861

H
hjxilinx 已提交
5862
_over:
weixin_48148422's avatar
weixin_48148422 已提交
5863 5864
  tfree(tagCond);
  tfree(tbnameCond);
H
hjxilinx 已提交
5865
  taosArrayDestroy(pTableIdList);
5866 5867 5868 5869 5870 5871

  if (code != TSDB_CODE_SUCCESS) {
    tfree(*pQInfo);
    *pQInfo = NULL;
  }

5872
  // if failed to add ref for all meters in this query, abort current query
5873
  return code;
H
hjxilinx 已提交
5874 5875
}

H
hjxilinx 已提交
5876
void qDestroyQueryInfo(qinfo_t pQInfo) {
S
slguan 已提交
5877
  qTrace("QInfo:%p query completed", pQInfo);
5878 5879 5880
  freeQInfo(pQInfo);
}

H
hjxilinx 已提交
5881
void qTableQuery(qinfo_t qinfo) {
5882 5883
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5884
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
5885
    qTrace("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
5886 5887
    return;
  }
5888

H
hjxilinx 已提交
5889
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
5890
    qTrace("QInfo:%p it is already killed, abort", pQInfo);
H
hjxilinx 已提交
5891 5892
    return;
  }
5893

S
slguan 已提交
5894
  qTrace("QInfo:%p query task is launched", pQInfo);
5895

H
hjxilinx 已提交
5896
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
hjxilinx 已提交
5897
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
5898
  } else if (pQInfo->runtimeEnv.stableQuery) {
5899
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
5900
  } else {
5901
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
5902
  }
5903

H
hjxilinx 已提交
5904
  sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
5905 5906 5907
  //  vnodeDecRefCount(pQInfo);
}

H
hjxilinx 已提交
5908
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
5909 5910
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5911
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
hjxilinx 已提交
5912 5913
    return TSDB_CODE_INVALID_QHANDLE;
  }
5914

H
hjxilinx 已提交
5915
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5916
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
5917
    qTrace("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
5918
    return pQInfo->code;
H
hjxilinx 已提交
5919
  }
5920

H
hjxilinx 已提交
5921
  sem_wait(&pQInfo->dataReady);
S
slguan 已提交
5922
  qTrace("QInfo:%p retrieve result info, rowsize:%d, rows:%d, code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
5923 5924
         pQInfo->code);

H
hjxilinx 已提交
5925
  return pQInfo->code;
H
hjxilinx 已提交
5926
}
5927

H
hjxilinx 已提交
5928
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
5929 5930
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5931 5932 5933
  if (pQInfo == NULL || pQInfo->signature != pQInfo || pQInfo->code != TSDB_CODE_SUCCESS) {
    return false;
  }
5934 5935

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5936 5937 5938 5939 5940 5941 5942 5943
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
    return false;
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    return true;
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    return true;
  } else {
    assert(0);
5944 5945 5946
  }
}

5947 5948 5949
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5950
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
5951 5952
    return TSDB_CODE_INVALID_QHANDLE;
  }
5953 5954 5955

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
5956 5957
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
5958
  *contLen = size + sizeof(SRetrieveTableRsp);
5959

5960 5961
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
5962
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
5963

5964 5965 5966 5967 5968 5969 5970 5971
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
    (*pRsp)->useconds = htobe64(pQInfo->elapsedTime);
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
5972 5973
  
  (*pRsp)->precision = htons(pQuery->precision);
5974
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5975
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
5976
  } else {
H
hjxilinx 已提交
5977
    setQueryStatus(pQuery, QUERY_OVER);
5978
    code = pQInfo->code;
5979
  }
5980

H
hjxilinx 已提交
5981
  if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
5982
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
5983
  }
5984

H
hjxilinx 已提交
5985
  return code;
5986 5987 5988 5989 5990 5991

  //  if (numOfRows == 0 && (pRetrieve->qhandle == (uint64_t)pObj->qhandle) && (code != TSDB_CODE_ACTION_IN_PROGRESS)) {
  //    qTrace("QInfo:%p %s free qhandle code:%d", pObj->qhandle, __FUNCTION__, code);
  //    vnodeDecRefCount(pObj->qhandle);
  //    pObj->qhandle = NULL;
  //  }
5992
}
H
hjxilinx 已提交
5993 5994 5995 5996

static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
5997

H
hjxilinx 已提交
5998
  size_t num = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
5999 6000 6001 6002
  assert(num == 0 || num == 1);
  if (num == 0) {
    return;
  }
H
hjxilinx 已提交
6003 6004 6005
  
  SArray* pa = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
  num = taosArrayGetSize(pa);
6006

6007
  assert(num == pQInfo->groupInfo.numOfTables);
H
hjxilinx 已提交
6008
  int16_t type, bytes;
6009

6010 6011 6012 6013
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
6014

6015
    int32_t rsize = pExprInfo->bytes;
H
hjxilinx 已提交
6016
    char* data = NULL;
6017

6018 6019
    for(int32_t i = 0; i < num; ++i) {
      SGroupItem* item = taosArrayGet(pa, i);
6020

6021
      char* output = pQuery->sdata[0]->data + i * rsize;
6022 6023 6024 6025
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
      
      output = varDataVal(output);
      *(int64_t*) output = item->id.uid;  // memory align problem, todo serialize
6026
      output += sizeof(item->id.uid);
6027

6028 6029
      *(int32_t*) output = item->id.tid;
      output += sizeof(item->id.tid);
6030

6031 6032
      *(int32_t*) output = pQInfo->vgId;
      output += sizeof(pQInfo->vgId);
6033

6034 6035 6036
      tsdbGetTableTagVal(pQInfo->tsdb, &item->id, pExprInfo->base.colInfo.colId, &type, &bytes, &data);
      memcpy(output, data, bytes);
    }
6037

H
hjxilinx 已提交
6038
    qTrace("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, num);
6039 6040 6041 6042
  } else {  // return only the tags|table name etc.
    for(int32_t i = 0; i < num; ++i) {
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
      SGroupItem* item = taosArrayGet(pa, i);
6043

6044 6045
      char* data = NULL;
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
hjxilinx 已提交
6046
        // todo check the return value, refactor codes
6047
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
H
hjxilinx 已提交
6048
          data = tsdbGetTableName(pQInfo->tsdb, &item->id, &bytes);
H
hjxilinx 已提交
6049
          
6050
          char* dst = pQuery->sdata[j]->data + i * (TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE);
H
hjxilinx 已提交
6051
          memcpy(dst, data, varDataTLen(data));
H
hjxilinx 已提交
6052
        } else {// todo refactor, return the true length of binary|nchar data
6053
          tsdbGetTableTagVal(pQInfo->tsdb, &item->id, pExprInfo[j].base.colInfo.colId, &type, &bytes, &data);
6054
          assert(bytes <= pExprInfo[j].bytes && type == pExprInfo[j].type);
H
hjxilinx 已提交
6055
          
6056
          char* dst = pQuery->sdata[j]->data + i * pExprInfo[j].bytes;
H
hjxilinx 已提交
6057 6058 6059 6060 6061
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
            memcpy(dst, data, varDataTLen(data));
          } else {
            memcpy(dst, data, bytes);
          }
6062
        }
H
hjxilinx 已提交
6063
      }
H
hjxilinx 已提交
6064
    }
6065
  
H
Haojun Liao 已提交
6066
    pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
H
hjxilinx 已提交
6067
    qTrace("QInfo:%p create tag values results completed, rows:%d", pQInfo, num);
H
hjxilinx 已提交
6068
  }
6069

H
hjxilinx 已提交
6070 6071
  pQuery->rec.rows = num;
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6072 6073
}