qExecutor.c 206.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
15
#include "qfill.h"
16 17 18 19
#include "os.h"

#include "hash.h"
#include "hashfunc.h"
20 21
#include "qExecutor.h"
#include "qUtil.h"
H
hjxilinx 已提交
22
#include "qast.h"
23
#include "qresultBuf.h"
H
hjxilinx 已提交
24
#include "query.h"
S
slguan 已提交
25
#include "queryLog.h"
26
#include "taosmsg.h"
27
#include "tdataformat.h"
28
#include "tlosertree.h"
29
#include "tscUtil.h"  // todo move the function to common module
30 31
#include "tscompression.h"
#include "ttime.h"
32 33 34 35 36 37 38 39 40

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

46
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50 51 52

/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
53 54
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
55

56
enum {
H
hjxilinx 已提交
57
  // when query starts to execute, this status will set
58 59
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
60 61
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
62
   */
63 64
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
65 66 67
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
68
   */
69
  QUERY_COMPLETED = 0x4u,
70

H
hjxilinx 已提交
71 72
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
73
   */
74
  QUERY_OVER = 0x8u,
75
};
76 77

enum {
78 79
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
80 81 82
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

83
typedef struct {
84 85 86 87 88 89
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
90 91
} SQueryStatusInfo;

92
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
93
static void setQueryStatus(SQuery *pQuery, int8_t status);
94

H
hjxilinx 已提交
95
static bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; }
96

H
hjxilinx 已提交
97
// todo move to utility
98
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
99

H
hjxilinx 已提交
100
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
101
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
102 103 104
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
105

106 107 108
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

109
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
110
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
111 112
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
113
static void buildTagQueryResult(SQInfo *pQInfo);
114

H
hjxilinx 已提交
115
static int32_t setAdditionalInfo(SQInfo *pQInfo, STableId *pTableId, STableQueryInfo *pTableQueryInfo);
116
static int32_t flushFromResultBuf(SQInfo *pQInfo);
117

118
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
119 120
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
121

122 123
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
124 125
      return false;
    }
126

127 128
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
129
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
130

131 132 133 134 135
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
136

137 138 139 140
    if (!qualified) {
      return false;
    }
  }
141

142 143 144 145 146 147
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
148

149
  int64_t maxOutput = 0;
150
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
151
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
152

153 154 155 156 157 158 159 160
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
161

162 163 164 165 166
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
167

168
  assert(maxOutput >= 0);
169 170 171
  return maxOutput;
}

172 173 174 175 176 177 178 179 180
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
181 182 183 184 185 186 187
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
188 189 190 191
    pResInfo->numOfRes = numOfRes;
  }
}

192 193 194 195 196 197 198 199 200
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
201

202
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
203
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
204 205 206 207 208
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
209
        assert(pColIndex->colIndex > 0);
210
      }
211

212 213 214
      return true;
    }
  }
215

216 217 218 219 220
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
221

222 223
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
224

225
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
226
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
227 228 229 230 231
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
232

233
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
234 235
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
236 237 238
      break;
    }
  }
239

240 241 242 243 244 245
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
246

247
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
248
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
249 250 251 252
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
253

254 255 256 257
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
258

259 260 261
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
262

263 264 265
  return false;
}

266
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
267

268 269 270 271
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
272 273
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
274 275 276 277
    
    qTrace("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%d, total:%"PRId64,
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
278 279 280
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
281

282 283 284 285
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
286
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
287
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
288 289 290
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
291

292 293 294 295
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
296

297 298 299
  return false;
}

H
Haojun Liao 已提交
300
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, int32_t numOfCols, int32_t index) {
301
  // for a tag column, no corresponding field info
H
Haojun Liao 已提交
302 303
  SColIndex *pColIndex = &pQuery->pSelectExpr[index].base.colInfo;
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
304 305
    return NULL;
  }
H
Haojun Liao 已提交
306
  
307 308 309
  /*
   * Choose the right column field info by field id, since the file block may be out of date,
   * which means the newest table schema is not equalled to the schema of this block.
H
Haojun Liao 已提交
310
   * TODO: speedup by using bsearch
311
   */
H
Haojun Liao 已提交
312 313
  for (int32_t i = 0; i < numOfCols; ++i) {
    if (pColIndex->colId == pStatis[i].colId) {
314 315 316
      return &pStatis[i];
    }
  }
H
Haojun Liao 已提交
317
  
318 319 320
  return NULL;
}

321 322 323 324 325 326 327 328
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
329
static bool hasNullValue(SQuery *pQuery, int32_t col, int32_t numOfCols, SDataStatis *pStatis, SDataStatis **pColStatis) {
330
  SColIndex *pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
331
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
332 333
    return false;
  }
334

335 336 337 338
  // query on primary timestamp column, not null value at all
  if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }
339

340
  if (pStatis != NULL) {
H
Haojun Liao 已提交
341
    *pColStatis = getStatisInfo(pQuery, pStatis, numOfCols, col);
H
hjxilinx 已提交
342 343
  } else {
    *pColStatis = NULL;
344
  }
345

346 347 348
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
349

350 351 352 353 354 355
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
                                             int16_t bytes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
356

357 358 359 360 361 362
  int32_t *p1 = (int32_t *)taosHashGet(pWindowResInfo->hashList, pData, bytes);
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
  } else {  // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 2;
363

364 365 366 367 368 369 370
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
      } else {
        // todo
      }
371

372 373 374 375
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        SPosInfo pos = {-1, -1};
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
      }
376

377 378
      pWindowResInfo->capacity = newCap;
    }
379

380 381 382 383
    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
  }
384

385 386 387 388 389 390
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
391

392 393 394 395 396 397 398
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
399

400 401
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
402

403 404 405
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
406

407 408 409 410
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
411

412 413 414
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
415

416 417 418 419 420 421 422
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
423

424
  assert(ts >= w.skey && ts <= w.ekey);
425

426 427 428 429 430 431 432 433
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
434

435
  tFilePage *pData = NULL;
436

437 438 439
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
440

441 442 443 444
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
H
Haojun Liao 已提交
445
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
446

447
    if (pData->num >= numOfRowsPerPage) {
448 449
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
450
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
451 452 453
      }
    }
  }
454

455 456 457
  if (pData == NULL) {
    return -1;
  }
458

459 460 461
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
462
    pWindowRes->pos.rowId = pData->num++;
463
  }
464

465 466 467 468 469 470 471
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
                                       STimeWindow *win) {
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
472

473 474 475 476
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE);
  if (pWindowRes == NULL) {
    return -1;
  }
477

478 479 480 481 482 483 484
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
485

486 487
  // set time window for current result
  pWindowRes->window = *win;
H
Haojun Liao 已提交
488 489
  
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
490 491 492 493 494 495 496 497
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
498
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
499
                                      int16_t order, int64_t *pData) {
H
Haojun Liao 已提交
500
  int32_t endPos = searchFn((char *)pData, numOfRows, ekey, order);
501
  int32_t forwardStep = 0;
502

503
  if (endPos >= 0) {
504
    forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
505
    assert(forwardStep >= 0);
506

507 508 509 510 511
    // endPos data is equalled to the key so, we do need to read the element in endPos
    if (pData[endPos] == ekey) {
      forwardStep += 1;
    }
  }
512

513 514 515 516 517 518
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
519
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
520 521
  SQuery *pQuery = pRuntimeEnv->pQuery;
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) {
522
    return pWindowResInfo->size;
523
  }
524

525
  // no qualified results exist, abort check
526 527
  int32_t numOfClosed = 0;
  
528
  if (pWindowResInfo->size == 0) {
529
    return pWindowResInfo->size;
530
  }
531

532
  // query completed
H
hjxilinx 已提交
533 534
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
535
    closeAllTimeWindow(pWindowResInfo);
536

537 538 539 540
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
541
    int64_t skey = TSKEY_INITIAL_VAL;
542

543 544 545
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
546
        numOfClosed += 1;
547 548
        continue;
      }
549

550 551 552 553 554 555 556 557
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
558

559
    // all windows are closed, set the last one to be the skey
560
    if (skey == TSKEY_INITIAL_VAL) {
561 562 563 564 565
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
566

567
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
568

569 570 571 572 573
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
      qTrace("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
574
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
575 576 577
    } else {
      qTrace("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
             numOfClosed);
578 579
    }
  }
580 581 582 583 584 585 586
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
587
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
588
  return numOfClosed;
589 590 591
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
592
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
593
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
594

595 596 597
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
598

H
hjxilinx 已提交
599 600
  STableQueryInfo* item = pQuery->current;
  
601 602
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
603
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
604 605 606 607
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
608
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
609 610 611
        }
      }
    } else {
612
      num = pDataBlockInfo->rows - startPos;
613
      if (updateLastKey) {
H
hjxilinx 已提交
614
        item->lastKey = pDataBlockInfo->window.ekey + step;
615 616 617 618
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
619
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
620 621 622 623
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
624
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
625 626 627 628 629
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
630
        item->lastKey = pDataBlockInfo->window.skey + step;
631 632 633
      }
    }
  }
634

635 636 637 638 639
  assert(num >= 0);
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
640
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
641 642
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
643

644
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
645
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
646
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
647

648 649
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
H
Haojun Liao 已提交
650
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
651

652
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
H
Haojun Liao 已提交
653
        pCtx[k].ptsList = &tsBuf[offset];
654
      }
655

H
Haojun Liao 已提交
656 657 658 659 660
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
      
661 662 663 664 665 666 667 668 669 670 671
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
672

673
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
674
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
675
      pCtx[k].nStartQueryTimestamp = pWin->skey;
676

677
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
678 679 680 681 682 683 684 685
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
    }
  }
}

static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
686 687
                                      SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
                                      __block_search_fn_t searchFn) {
688
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
689 690 691 692 693 694 695 696 697 698 699 700 701
  
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime) {
    // todo opt
  }
  
  getNextTimeWindow(pQuery, pNextWin);
  
  // next time window is not in current block
  if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
    return -1;
  }
702

H
Haojun Liao 已提交
703 704 705 706 707
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    startKey = pNextWin->skey;
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
708
    }
H
Haojun Liao 已提交
709 710 711 712
  } else {
    startKey = pNextWin->ekey;
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
713
    }
H
Haojun Liao 已提交
714
  }
715

H
Haojun Liao 已提交
716
  int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
717

H
Haojun Liao 已提交
718 719 720 721 722 723 724 725 726 727 728
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNextWin->ekey) {
    TSKEY next = primaryKeys[startPos];
    
    pNextWin->ekey += ((next - pNextWin->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->skey = pNextWin->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNextWin->skey) {
    TSKEY next = primaryKeys[startPos];
729

H
Haojun Liao 已提交
730 731
    pNextWin->skey -= ((pNextWin->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->ekey = pNextWin->skey + pQuery->intervalTime - 1;
732
  }
H
Haojun Liao 已提交
733 734
  
  return startPos;
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
}

static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
750

751 752 753
  return ekey;
}

H
hjxilinx 已提交
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
769
                    SArray *pDataBlock) {
770
  char *dataBlock = NULL;
771
  SQuery *pQuery = pRuntimeEnv->pQuery;
772

773
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
774

775
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
776
  if (functionId == TSDB_FUNC_ARITHM) {
777
    sas->pArithExpr = &pQuery->pSelectExpr[col];
778

779 780 781 782 783 784
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
785

786 787 788 789
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
790

791
    // here the pQuery->colList and sas->colList are identical
792
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
793
      SColumnInfo *pColMsg = &pQuery->colList[i];
794

795
      int32_t numOfCols = taosArrayGetSize(pDataBlock);
796

797 798 799 800 801 802 803 804
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
805

806
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
807
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
808
    }
809

810
  } else {  // other type of query function
811
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
812
    if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
813 814
      dataBlock = NULL;
    } else {
H
hjxilinx 已提交
815
      dataBlock = getDataBlockImpl(pDataBlock, pCol->colId);
816 817
    }
  }
818

819 820 821 822 823 824 825
  return dataBlock;
}

/**
 *
 * @param pRuntimeEnv
 * @param forwardStep
826
 * @param tsCols
827 828 829 830 831
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
832
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
833 834
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
835
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
836 837 838
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
839
  if (pDataBlock != NULL) {
840
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
841
    tsCols = (TSKEY *)(pColInfo->pData);
842
  }
843

844
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
845

846
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
847
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
848
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
849
  }
850

851 852 853
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
  if (isIntervalQuery(pQuery)) {
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
854
    TSKEY   ts = tsCols[offset];
855

856
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
hjxilinx 已提交
857
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
858
      return;
859
    }
860

861 862
    TSKEY   ekey = reviseWindowEkey(pQuery, &win);
    int32_t forwardStep =
863
        getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
864

865
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
866
    doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, tsCols, pDataBlockInfo->rows);
867

868 869
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
870

871
    while (1) {
872
      int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn);
873 874 875
      if (startPos < 0) {
        break;
      }
876

877
      // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
878
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
879 880
        break;
      }
881

882
      ekey = reviseWindowEkey(pQuery, &nextWin);
883
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
884

885
      pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
886
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
887
    }
888

889 890 891 892 893 894 895
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
896
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
897
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
898 899 900 901 902
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
903

904 905 906 907
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
908

909 910
    tfree(sasArray[i].data);
  }
911

912 913 914 915 916 917 918
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
919

920
  int32_t GROUPRESULTID = 1;
921

922
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
923

924 925 926 927
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes);
  if (pWindowRes == NULL) {
    return -1;
  }
928

929 930 931 932 933 934 935
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
936

937 938 939 940 941
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

942
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
943
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
944

945
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
946 947
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
948 949
      continue;
    }
950

951
    int16_t colIndex = -1;
952
    int32_t colId = pColIndex->colId;
953

954
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
955
      if (pQuery->colList[i].colId == colId) {
956 957 958 959
        colIndex = i;
        break;
      }
    }
960

961
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
962

963 964
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
965 966 967 968 969 970
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
971

972 973 974 975 976 977
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
978
  }
979

980
  return NULL;
981 982 983 984
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
985

986 987
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
988

989 990 991 992
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
993

994 995 996
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
997 998
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
999 1000
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1001

1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1015

1016 1017 1018 1019 1020
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1021 1022
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
1023 1024 1025
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1026

H
hjxilinx 已提交
1027 1028 1029 1030 1031 1032
  if (functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST) {
    return !QUERY_IS_ASC_QUERY(pQuery);
  } else if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
    return QUERY_IS_ASC_QUERY(pQuery);
  }
  
1033
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1034
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1035 1036
    return false;
  }
1037

1038 1039 1040
  return true;
}

1041 1042
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1043
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1044

1045
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1046 1047
  STableQueryInfo* item = pQuery->current;
  
1048
  TSKEY  *tsCols = (TSKEY*) ((SColumnInfoData *)taosArrayGet(pDataBlock, 0))->pData;
H
hjxilinx 已提交
1049
  bool    groupbyStateValue = isGroupbyNormalCol(pQuery->pGroupbyExpr);
1050
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
1051

1052 1053
  int16_t type = 0;
  int16_t bytes = 0;
1054

1055 1056
  char *groupbyColumnData = NULL;
  if (groupbyStateValue) {
1057
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1058
  }
1059

1060
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1061
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1062
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1063
  }
1064

1065 1066
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1067
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1068 1069
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1070
  }
1071

1072
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1073

1074 1075 1076 1077
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery);
1078
    qTrace("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1079 1080
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1081

1082
  int32_t j = 0;
H
hjxilinx 已提交
1083 1084
  int32_t offset = -1;
  
1085
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1086
    offset = GET_COL_DATA_POS(pQuery, j, step);
1087

1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1098

1099
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1100 1101
      continue;
    }
1102

1103 1104 1105
    // interval window query
    if (isIntervalQuery(pQuery)) {
      // decide the time window according to the primary timestamp
1106
      int64_t     ts = tsCols[offset];
1107
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1108

H
hjxilinx 已提交
1109
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win);
1110 1111 1112
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1113

1114 1115
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1116

1117 1118
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1119

1120 1121
      while (1) {
        getNextTimeWindow(pQuery, &nextWin);
H
Haojun Liao 已提交
1122
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1123
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1124
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1125 1126
          break;
        }
1127

1128 1129 1130
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1131

1132
        // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
1133
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
1134 1135
          break;
        }
1136

1137 1138 1139
        pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
        doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
      }
1140

1141 1142 1143 1144
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
      if (groupbyStateValue) {
H
hjxilinx 已提交
1145
        char *val = groupbyColumnData + bytes * offset;
1146

H
hjxilinx 已提交
1147
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1148 1149 1150 1151
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1152

1153
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1154
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1155 1156 1157 1158 1159
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1160

1161 1162 1163
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1164
        setQueryStatus(pQuery, QUERY_COMPLETED);
1165 1166 1167 1168
        break;
      }
    }
  }
1169
  
1170
  item->lastKey = tsCols[offset] + step;
1171 1172 1173 1174 1175 1176
  
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1177

1178 1179
    tfree(sasArray[i].data);
  }
1180

1181 1182 1183 1184
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1185
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1186
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1187 1188 1189
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1190
  
1191
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1192
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1193
  } else {
1194
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1195
  }
1196

1197
  // update the lastkey of current table
1198
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1199
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1200

1201
  // interval query with limit applied
1202 1203 1204 1205 1206 1207
  int32_t numOfRes = 0;
  
  if (isIntervalQuery(pQuery)) {
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1208

1209 1210 1211 1212
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1213

1214 1215 1216
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1217

1218 1219 1220
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1221
    }
1222
  }
1223

1224
  return numOfRes;
1225 1226
}

H
Haojun Liao 已提交
1227
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1228 1229 1230 1231 1232 1233 1234
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
  pCtx->hasNull = hasNullValue(pQuery, colIndex, pBlockInfo->numOfCols, pStatis, &tpField);
1235
  pCtx->aInputElemBuf = inputData;
1236

1237
  if (tpField != NULL) {
H
Haojun Liao 已提交
1238
    pCtx->preAggVals.isSet  = true;
1239 1240
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1241 1242 1243
  } else {
    pCtx->preAggVals.isSet = false;
  }
1244

H
Haojun Liao 已提交
1245 1246 1247
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1248

1249 1250
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1251
    pCtx->ptsList = tsCol;
1252
  }
1253

1254 1255 1256 1257 1258
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1259
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1260
    /*
H
Haojun Liao 已提交
1261
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1272

1273 1274
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1275 1276 1277 1278 1279 1280
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1294
  }
1295

1296 1297 1298 1299 1300 1301
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
S
slguan 已提交
1302
//        qTrace("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1303 1304 1305
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
S
slguan 已提交
1306
//        qTrace("QInfo:%p block not loaded, bstatus:%d",
1307 1308 1309 1310 1311 1312 1313 1314
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) {
  if (isSelectivityWithTagsQuery(pQuery)) {
1315
    int32_t num = 0;
1316
    int16_t tagLen = 0;
1317 1318
    
    SQLFunctionCtx *p = NULL;
1319
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
1320
    
1321
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1322
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1323
      
1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
1337

1338 1339 1340 1341 1342 1343 1344
    p->tagInfo.pTagCtxList = pTagCtx;
    p->tagInfo.numOfTagCols = num;
    p->tagInfo.tagsLen = tagLen;
  }
}

static void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
1345
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1346 1347
    assert(pQuery->pSelectExpr[i].interBytes <= DEFAULT_INTERN_BUF_PAGE_SIZE);
    
1348
    setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interBytes, isStableQuery);
1349 1350 1351
  }
}

1352
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
S
slguan 已提交
1353
  qTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1354 1355
  SQuery *pQuery = pRuntimeEnv->pQuery;

1356 1357
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1358

1359
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1360
    goto _clean;
1361
  }
1362

1363
  pRuntimeEnv->offset[0] = 0;
1364
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1365
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1366

1367
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1368
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1369

1370 1371
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1372
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
hjxilinx 已提交
1373
        pCtx->inputBytes = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE;
1374 1375 1376 1377 1378
        pCtx->inputType = TSDB_DATA_TYPE_BINARY;
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1379
      
1380 1381 1382 1383
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1384 1385
  
    assert(isValidDataType(pCtx->inputType, pCtx->inputBytes));
1386
    pCtx->ptsOutputBuf = NULL;
1387

1388 1389
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1390

1391 1392
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1393

1394 1395 1396 1397 1398 1399 1400 1401 1402 1403
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1404

1405 1406
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1407

1408
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1409
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1410
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1411

1412 1413 1414 1415
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1416

1417 1418
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1419

1420 1421 1422 1423
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1424

1425
  // set the intermediate result output buffer
1426
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
1427

1428
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1429
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1430 1431
    resetCtxOutputBuf(pRuntimeEnv);
  }
1432

1433 1434
  setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx);
  return TSDB_CODE_SUCCESS;
1435

1436
_clean:
1437 1438
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1439

1440 1441 1442 1443 1444 1445 1446
  return TSDB_CODE_SERV_OUT_OF_MEMORY;
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1447

1448
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1449
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1450

H
hjxilinx 已提交
1451
  qTrace("QInfo:%p teardown runtime env", pQInfo);
1452
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1453

1454
  if (pRuntimeEnv->pCtx != NULL) {
1455
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1456
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1457

1458 1459 1460
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1461

1462 1463 1464 1465
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
      tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
    }
1466

1467 1468 1469
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1470

1471
  taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1472

H
hjxilinx 已提交
1473
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1474
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1475
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1476

1477 1478 1479
  pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
}

1480 1481
static bool isQueryKilled(SQInfo *pQInfo) {
  return (pQInfo->code == TSDB_CODE_QUERY_CANCELLED);
1482 1483 1484 1485 1486 1487 1488 1489 1490 1491
#if 0
  /*
   * check if the queried meter is going to be deleted.
   * if it will be deleted soon, stop current query ASAP.
   */
  SMeterObj *pMeterObj = pQInfo->pObj;
  if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DROPPING)) {
    pQInfo->killed = 1;
    return true;
  }
1492

1493 1494 1495 1496
  return (pQInfo->killed == 1);
#endif
}

1497
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_QUERY_CANCELLED; }
H
hjxilinx 已提交
1498

H
hjxilinx 已提交
1499
static bool isFixedOutputQuery(SQuery *pQuery) {
1500 1501 1502
  if (pQuery->intervalTime != 0) {
    return false;
  }
1503

1504 1505 1506 1507
  // Note:top/bottom query is fixed output query
  if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    return true;
  }
1508

1509
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1510
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1511

1512 1513
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1514
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1515 1516
      continue;
    }
1517

1518 1519 1520
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1521

1522 1523 1524 1525
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1526

1527 1528 1529
  return false;
}

1530
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1531
static bool isPointInterpoQuery(SQuery *pQuery) {
1532
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1533
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1534
    if (functionID == TSDB_FUNC_INTERP) {
1535 1536 1537
      return true;
    }
  }
1538

1539 1540 1541 1542
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1543
static bool isSumAvgRateQuery(SQuery *pQuery) {
1544
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1545
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1546 1547 1548
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1549

1550 1551 1552 1553 1554
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1555

1556 1557 1558
  return false;
}

H
hjxilinx 已提交
1559
static bool isFirstLastRowQuery(SQuery *pQuery) {
1560
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1561
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1562 1563 1564 1565
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1566

1567 1568 1569
  return false;
}

H
hjxilinx 已提交
1570
static UNUSED_FUNC bool notHasQueryTimeRange(SQuery *pQuery) {
1571
  return (pQuery->window.skey == 0 && pQuery->window.ekey == INT64_MAX && QUERY_IS_ASC_QUERY(pQuery)) ||
1572
         (pQuery->window.skey == INT64_MAX && pQuery->window.ekey == 0 && (!QUERY_IS_ASC_QUERY(pQuery)));
1573 1574
}

H
hjxilinx 已提交
1575
static bool needReverseScan(SQuery *pQuery) {
1576
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1577
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1578 1579 1580
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1581

1582 1583 1584 1585 1586
    if (((functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) && QUERY_IS_ASC_QUERY(pQuery)) ||
        ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery))) {
      return true;
    }
  }
1587

1588 1589
  return false;
}
H
hjxilinx 已提交
1590 1591 1592 1593

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1594
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG) {
H
hjxilinx 已提交
1595 1596 1597
      return false;
    }
  }
1598

H
hjxilinx 已提交
1599 1600 1601
  return true;
}

1602 1603
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1604
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *realWin, STimeWindow *win) {
1605
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
1606

1607
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision);
1608

1609 1610 1611 1612 1613 1614
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    /*
     * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
     * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
     */
    assert(keyLast - keyFirst < pQuery->intervalTime);
1615

H
Haojun Liao 已提交
1616 1617
    realWin->skey = keyFirst;
    realWin->ekey = keyLast;
1618

1619 1620 1621
    win->ekey = INT64_MAX;
    return;
  }
1622

1623
  win->ekey = win->skey + pQuery->intervalTime - 1;
1624

H
Haojun Liao 已提交
1625 1626
  realWin->skey = (win->skey < keyFirst)? keyFirst : win->skey;
  realWin->ekey = (win->ekey < keyLast) ? win->ekey : keyLast;
1627 1628 1629 1630
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1631
    pQuery->checkBuffer = 0;
1632
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1633
    pQuery->checkBuffer = 0;
1634 1635
  } else {
    bool hasMultioutput = false;
1636
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1637
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1638 1639 1640
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1641

1642 1643 1644 1645 1646
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1647

1648
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1649 1650 1651 1652 1653 1654
  }
}

/*
 * todo add more parameters to check soon..
 */
1655
bool colIdCheck(SQuery *pQuery) {
1656 1657
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1658
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1659
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1660 1661 1662
      return false;
    }
  }
1663
  
1664 1665 1666 1667 1668 1669
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1670
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1671
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1672

1673 1674 1675 1676
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1677

1678 1679 1680 1681
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1682

1683 1684 1685 1686 1687 1688 1689
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

1690
static void changeExecuteScanOrder(SQuery *pQuery, bool stableQuery) {
1691 1692 1693
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1694

1695 1696 1697
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
S
slguan 已提交
1698
    qTrace("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1699
           pQuery->order.order, TSDB_ORDER_DESC);
1700

1701
    pQuery->order.order = TSDB_ORDER_DESC;
1702

1703 1704
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1705

1706 1707
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1708

1709 1710
    return;
  }
1711

1712 1713
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1714
      qTrace(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1715
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1716 1717
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1718

1719
    pQuery->order.order = TSDB_ORDER_ASC;
1720 1721
    return;
  }
1722

1723 1724 1725
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1726
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1727 1728
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1729 1730
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1731

1732
      pQuery->order.order = TSDB_ORDER_ASC;
1733 1734
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1735
        qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1736 1737
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1738 1739
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
      }
1740

1741
      pQuery->order.order = TSDB_ORDER_DESC;
1742
    }
1743

1744
  } else {  // interval query
1745
    if (stableQuery) {
1746 1747
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1748
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1749 1750
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1751 1752
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1753

1754
        pQuery->order.order = TSDB_ORDER_ASC;
1755 1756
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
S
slguan 已提交
1757
          qTrace(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1758 1759
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1760 1761
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1762

1763
        pQuery->order.order = TSDB_ORDER_DESC;
1764 1765 1766 1767 1768
      }
    }
  }
}

H
hjxilinx 已提交
1769
static UNUSED_FUNC void doSetInterpVal(SQLFunctionCtx *pCtx, TSKEY ts, int16_t type, int32_t index, char *data) {
1770
  assert(pCtx->param[index].pz == NULL);
1771

1772 1773
  int32_t len = 0;
  size_t  t = 0;
1774

1775 1776
  if (type == TSDB_DATA_TYPE_BINARY) {
    t = strlen(data);
1777

1778 1779 1780 1781
    len = t + 1 + TSDB_KEYSIZE;
    pCtx->param[index].pz = calloc(1, len);
  } else if (type == TSDB_DATA_TYPE_NCHAR) {
    t = wcslen((const wchar_t *)data);
1782

1783 1784 1785 1786 1787 1788
    len = (t + 1) * TSDB_NCHAR_SIZE + TSDB_KEYSIZE;
    pCtx->param[index].pz = calloc(1, len);
  } else {
    len = TSDB_KEYSIZE * 2;
    pCtx->param[index].pz = malloc(len);
  }
1789

1790
  pCtx->param[index].nType = TSDB_DATA_TYPE_BINARY;
1791

1792 1793 1794
  char *z = pCtx->param[index].pz;
  *(TSKEY *)z = ts;
  z += TSDB_KEYSIZE;
1795

1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819
  switch (type) {
    case TSDB_DATA_TYPE_FLOAT:
      *(double *)z = GET_FLOAT_VAL(data);
      break;
    case TSDB_DATA_TYPE_DOUBLE:
      *(double *)z = GET_DOUBLE_VAL(data);
      break;
    case TSDB_DATA_TYPE_INT:
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_BIGINT:
    case TSDB_DATA_TYPE_TINYINT:
    case TSDB_DATA_TYPE_SMALLINT:
    case TSDB_DATA_TYPE_TIMESTAMP:
      *(int64_t *)z = GET_INT64_VAL(data);
      break;
    case TSDB_DATA_TYPE_BINARY:
      strncpy(z, data, t);
      break;
    case TSDB_DATA_TYPE_NCHAR: {
      wcsncpy((wchar_t *)z, (const wchar_t *)data, t);
    } break;
    default:
      assert(0);
  }
1820

1821 1822 1823 1824 1825 1826
  pCtx->param[index].nLen = len;
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1827

1828
  int32_t num = 0;
1829

1830 1831 1832
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
  } else if (isIntervalQuery(pQuery)) {  // time window query, allocate one page for each table
1833
    size_t s = pQInfo->groupInfo.numOfTables;
1834
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1835 1836
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1837
  }
1838

1839 1840 1841 1842
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1843
#define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pSelectExpr[1].base.arg->argValue.i64:1)
1844

H
Haojun Liao 已提交
1845 1846
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1847
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1848 1849 1850 1851
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1852

H
Haojun Liao 已提交
1853 1854 1855
  SQuery    *pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = GET_RES_BUF_PAGE_BY_ID(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
  int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
1856

H
Haojun Liao 已提交
1857
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
1858
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1859 1860 1861 1862 1863 1864
}

/**
 * decrease the refcount for each table involved in this query
 * @param pQInfo
 */
1865
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
1866
  if (pQInfo != NULL) {
1867
    //    assert(taosHashGetSize(pQInfo->groupInfo) >= 1);
1868 1869 1870
  }

#if 0
1871
  if (pQInfo == NULL || pQInfo->groupInfo.numOfTables == 1) {
1872
    atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
S
slguan 已提交
1873
    qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
1874 1875 1876
           pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
  } else {
    int32_t num = 0;
1877 1878
    for (int32_t i = 0; i < pQInfo->groupInfo.numOfTables; ++i) {
      SMeterObj *pMeter = getMeterObj(pQInfo->groupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
1879
      atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
1880

1881
      if (pMeter->numOfQueries > 0) {
S
slguan 已提交
1882
        qTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
1883 1884 1885 1886
               pMeter->meterId, pMeter->numOfQueries);
        num++;
      }
    }
1887

1888 1889 1890 1891
    /*
     * in order to reduce log output, for all meters of which numOfQueries count are 0,
     * we do not output corresponding information
     */
1892
    num = pQInfo->groupInfo.numOfTables - num;
S
slguan 已提交
1893
    qTrace("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
1894
           pQInfo->groupInfo.numOfTables, num);
1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907
  }
#endif
}

static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
                                int32_t numOfTotalPoints) {
  if (pDataStatis == NULL) {
    return true;
  }

#if 0
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1908
    int32_t                  colIndex = pFilterInfo->info.colIndex;
1909

1910 1911 1912 1913
    // this column not valid in current data block
    if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
      continue;
    }
1914

1915 1916 1917 1918
    // not support pre-filter operation on binary/nchar data type
    if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
      continue;
    }
1919

1920 1921 1922 1923
    // all points in current column are NULL, no need to check its boundary value
    if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
      continue;
    }
1924

1925 1926 1927
    if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataStatis[colIndex].min);
      float maxval = *(double *)(&pDataStatis[colIndex].max);
1928

1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
                                        (char *)&pDataStatis[colIndex].max)) {
          return true;
        }
      }
    }
  }
1943

1944
  // todo disable this opt code block temporarily
1945
  //  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1946
  //    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
1947 1948 1949 1950
  //    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
  //      return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
  //    }
  //  }
1951

1952 1953 1954 1955 1956 1957 1958
#endif
  return true;
}

// previous time window may not be of the same size of pQuery->intervalTime
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1959

1960 1961 1962 1963
  pTimeWindow->skey += (pQuery->slidingTime * factor);
  pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
}

H
hjxilinx 已提交
1964
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis) {
1965
  SQuery *pQuery = pRuntimeEnv->pQuery;
1966 1967 1968 1969

  uint32_t r = 0;
  SArray * pDataBlock = NULL;

1970 1971 1972
  if (pQuery->numOfFilterCols > 0) {
    r = BLK_DATA_ALL_NEEDED;
  } else {
1973
    // check if this data block is required to load
1974
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1975 1976 1977 1978
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
      
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
1979
      r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
1980
    }
1981

1982 1983 1984 1985
    if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) {
      r |= BLK_DATA_ALL_NEEDED;
    }
  }
1986

1987
  if (r == BLK_DATA_NO_NEEDED) {
1988
    qTrace("QInfo:%p data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
1989
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
1990 1991
    pRuntimeEnv->summary.discardBlocks += 1;
  } else if (r == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
1992
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
1993
      //        return DISK_DATA_LOAD_FAILED;
1994
    }
1995 1996 1997 1998
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
hjxilinx 已提交
1999
      pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2000
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2001 2002 2003
    }
  } else {
    assert(r == BLK_DATA_ALL_NEEDED);
2004 2005 2006
  
    // load the data block statistics to perform further filter
    pRuntimeEnv->summary.loadBlockStatis +=1;
H
hjxilinx 已提交
2007
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2008
    }
2009 2010
    
    if (!needToLoadDataBlock(pQuery,*pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2011
#if defined(_DEBUG_VIEW)
2012
      qTrace("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2013
#endif
2014 2015
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
2016 2017
      //        return DISK_DATA_DISCARDED;
    }
2018
  
2019
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2020
    pRuntimeEnv->summary.loadBlocks += 1;
H
hjxilinx 已提交
2021
    pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2022
  }
2023

2024 2025 2026
  return pDataBlock;
}

H
hjxilinx 已提交
2027
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2028
  int32_t midPos = -1;
H
Haojun Liao 已提交
2029
  int32_t numOfRows;
2030

2031 2032 2033
  if (num <= 0) {
    return -1;
  }
2034

2035
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2036 2037

  TSKEY * keyList = (TSKEY *)pValue;
2038
  int32_t firstPos = 0;
2039
  int32_t lastPos = num - 1;
2040

2041
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2042 2043 2044 2045 2046
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2047

H
Haojun Liao 已提交
2048 2049
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2050

H
hjxilinx 已提交
2051 2052 2053 2054 2055 2056 2057 2058
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2059

H
hjxilinx 已提交
2060 2061 2062 2063 2064
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2065

H
hjxilinx 已提交
2066 2067 2068 2069 2070 2071 2072
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2073

H
Haojun Liao 已提交
2074 2075
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2076

H
hjxilinx 已提交
2077 2078 2079 2080 2081 2082 2083 2084 2085
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2086

H
hjxilinx 已提交
2087 2088 2089
  return midPos;
}

2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (!isIntervalQuery(pQuery) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isFixedOutputQuery(pQuery)) {
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
        
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
      qTrace("QInfo:%p realloc output buffer, new size: %d rows, old:%d, remain:%d", GET_QINFO_ADDR(pRuntimeEnv),
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2127 2128
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2129
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2130
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
H
hjxilinx 已提交
2131
  
S
slguan 已提交
2132
  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2133 2134
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2135

2136
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
2137
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2138
    summary->totalBlocks += 1;
2139
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
2140
      return 0;
2141
    }
2142

2143
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
2144

2145
    // todo extract methods
H
Haojun Liao 已提交
2146
    if (isIntervalQuery(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2147
      STimeWindow realWin = TSWINDOW_INITIALIZER, w = TSWINDOW_INITIALIZER;
2148 2149
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2150
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2151
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
2152 2153 2154 2155
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2156
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
2157

H
hjxilinx 已提交
2158
        pWindowResInfo->startTime = pQuery->window.skey;
2159 2160
        pWindowResInfo->prevSKey = w.skey;
      }
2161 2162 2163 2164
      
      if (pRuntimeEnv->pFillInfo != NULL) {
        pRuntimeEnv->pFillInfo->start = w.skey;
      }
2165
    }
2166

H
hjxilinx 已提交
2167
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2168
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2169

2170
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2171
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
H
Haojun Liao 已提交
2172 2173 2174
  
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2175
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2176

H
Haojun Liao 已提交
2177
    summary->totalRows += blockInfo.rows;
2178 2179
    qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2180

2181 2182
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2183
      break;
2184 2185
    }
  }
2186

H
hjxilinx 已提交
2187
  // if the result buffer is not full, set the query complete
2188 2189 2190
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2191

2192
  if (isIntervalQuery(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2193
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2194 2195
      int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;

2196
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2197
      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2198
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2199 2200 2201 2202
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2203

2204
  return 0;
2205 2206 2207 2208 2209 2210
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
H
[td-90]  
Haojun Liao 已提交
2211 2212 2213
static void doSetTagValueInParam(void *tsdb, STableId* pTableId, int32_t tagColId, tVariant *tag, int16_t type,
    int16_t bytes) {
  tVariantDestroy(tag);
2214

2215
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
H
[td-90]  
Haojun Liao 已提交
2216 2217 2218 2219
    char* val = tsdbGetTableName(tsdb, pTableId);
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2220
  } else {
H
[td-90]  
Haojun Liao 已提交
2221 2222 2223 2224 2225
    char* val = tsdbGetTableTagVal(tsdb, pTableId, tagColId, type, bytes);
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2226 2227
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
2228
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2229
    } else {
H
[td-90]  
Haojun Liao 已提交
2230
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2231
    }
2232
  }
2233 2234
}

H
hjxilinx 已提交
2235
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, STableId* pTableId, void *tsdb) {
2236
  SQuery *pQuery = pRuntimeEnv->pQuery;
2237

H
[td-90]  
Haojun Liao 已提交
2238 2239 2240 2241 2242 2243
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
  
    assert(pExprInfo->base.numOfParams == 1);
    doSetTagValueInParam(tsdb, pTableId, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag,
                         pExprInfo->type, pExprInfo->bytes);
2244 2245
  } else {
    // set tag value, by which the results are aggregated.
2246
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
[td-90]  
Haojun Liao 已提交
2247 2248
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[idx];
  
2249
      // ts_comp column required the tag value for join filter
H
[td-90]  
Haojun Liao 已提交
2250
      if (!TSDB_COL_IS_TAG(pExprInfo->base.colInfo.flag)) {
2251 2252
        continue;
      }
2253

2254
      // todo use tag column index to optimize performance
H
[td-90]  
Haojun Liao 已提交
2255 2256
      doSetTagValueInParam(tsdb, pTableId, pExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
          pExprInfo->type, pExprInfo->bytes);
2257
    }
2258

2259
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2260
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2261
    if (pFuncMsg->functionId == TSDB_FUNC_TS && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2262 2263
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
2264 2265
      assert(0);  // to do fix me
      //      doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag);
2266 2267 2268 2269 2270 2271 2272
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2273

2274
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2275
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2276 2277 2278
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2279

2280 2281 2282
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2283

2284 2285 2286
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2287

2288 2289 2290
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2291 2292 2293 2294 2295 2296 2297 2298
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2299 2300
    }
  }
2301

2302
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2303
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2304 2305 2306
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2307

2308 2309 2310 2311
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2312
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2381
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2382
  SQuery* pQuery = pRuntimeEnv->pQuery;
2383
  int32_t numOfCols = pQuery->numOfOutput;
2384
  printf("super table query intermediate result, total:%d\n", numOfRows);
2385

2386 2387
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2388
      
2389
      switch (pQuery->pSelectExpr[i].type) {
2390
        case TSDB_DATA_TYPE_BINARY: {
2391 2392 2393 2394 2395
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2396
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2397 2398 2399 2400 2401
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2402
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2403 2404
          break;
        case TSDB_DATA_TYPE_INT:
2405
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2406 2407
          break;
        case TSDB_DATA_TYPE_FLOAT:
2408
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2409 2410
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2411
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2412 2413 2414 2415 2416 2417 2418 2419
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2420 2421 2422
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2423 2424 2425 2426 2427
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2428

2429 2430
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2431

2432 2433
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2434

2435 2436 2437 2438
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2439

2440 2441 2442 2443
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2444

H
hjxilinx 已提交
2445
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2446
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2447

2448 2449
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2450

H
hjxilinx 已提交
2451
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2452
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2453

2454 2455
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2456

2457 2458 2459
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2460

2461 2462 2463
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2464
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2465
  int64_t st = taosGetTimestampMs();
2466
  int32_t ret = TSDB_CODE_SUCCESS;
2467

2468
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
2469

2470
  while (pQInfo->groupIndex < numOfGroups) {
2471
    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
2472
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2473 2474 2475 2476
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2477
    pQInfo->groupIndex += 1;
2478 2479

    // this group generates at least one result, return results
2480 2481 2482
    if (ret > 0) {
      break;
    }
2483 2484

    assert(pQInfo->numOfGroupResultPages == 0);
H
hjxilinx 已提交
2485
    qTrace("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2486
  }
2487

2488 2489
  qTrace("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%lldms", pQInfo,
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2490

2491 2492 2493 2494 2495 2496
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2497

2498
    // current results of group has been sent to client, try next group
2499
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2500 2501
      return;  // failed to save data in the disk
    }
2502

2503 2504 2505 2506 2507 2508
    // check if all results has been sent to client
    int32_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
      pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;  // set query completed
      return;
    }
2509
  }
2510 2511

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2512
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2513

2514
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2515
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2516

2517 2518
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
H
Haojun Liao 已提交
2519
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[i]);
2520
    total += pData->num;
2521
  }
2522

2523
  int32_t rows = total;
2524

2525 2526
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
H
Haojun Liao 已提交
2527
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[num]);
2528

2529
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2530
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2531
      char *  pDest = pQuery->sdata[i]->data;
2532

2533 2534
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2535
    }
2536

2537
    offset += pData->num;
2538
  }
2539

2540
  assert(pQuery->rec.rows == 0);
2541

2542
  pQuery->rec.rows += rows;
2543 2544 2545
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2546 2547
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
//  int64_t maxOutput = 0;
2548
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2549
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2550

2551 2552 2553 2554 2555 2556 2557
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2558

2559
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2560 2561 2562 2563
    assert(pResultInfo != NULL);
    
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2564
    }
H
Haojun Liao 已提交
2565 2566 2567 2568 2569 2570 2571 2572 2573
//    if (pResultInfo != NULL && maxOutput < pResultInfo->numOfRes) {
//      maxOutput = pResultInfo->numOfRes;
//
//      if (maxOutput > 0) {
//        break;
//      }
//    }
//
//    assert(pResultInfo != NULL);
2574
  }
2575

H
Haojun Liao 已提交
2576
  return 0;
2577 2578
}

2579
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2580
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2581
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2582

2583
  size_t size = taosArrayGetSize(pGroup);
2584

2585
  tFilePage **buffer = pQuery->sdata;
2586 2587
  int32_t *   posList = calloc(size, sizeof(int32_t));

H
hjxilinx 已提交
2588
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2589

2590
  // todo opt for the case of one table per group
2591
  int32_t numOfTables = 0;
2592
  for (int32_t i = 0; i < size; ++i) {
H
hjxilinx 已提交
2593 2594
    SGroupItem *item = taosArrayGet(pGroup, i);
    STableQueryInfo *pInfo = item->info;
2595

H
hjxilinx 已提交
2596 2597
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, pInfo->id.tid);
    if (list.size > 0 && pInfo->windowResInfo.size > 0) {
2598
      pTableList[numOfTables] = pInfo;
2599
      numOfTables += 1;
2600 2601
    }
  }
2602

2603
  if (numOfTables == 0) {
2604 2605
    tfree(posList);
    tfree(pTableList);
2606

2607 2608
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2609 2610
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
  
2611
  }
2612

2613
  SCompSupporter cs = {pTableList, posList, pQInfo};
2614

2615
  SLoserTreeInfo *pTree = NULL;
2616
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2617

2618
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
2619 2620
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2621

2622 2623
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2624

2625 2626
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2627

H
hjxilinx 已提交
2628
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2629
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2630

2631 2632
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2633

2634
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2635
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2636 2637
    if (num <= 0) {
      cs.position[pos] += 1;
2638

2639 2640
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2641

2642
        // all input sources are exhausted
2643
        if (--numOfTables == 0) {
2644 2645 2646 2647 2648 2649 2650
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2651
        if (buffer[0]->num == pQuery->rec.capacity) {
2652 2653 2654
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2655

2656 2657
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2658

2659
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2660
        buffer[0]->num += 1;
2661
      }
2662

2663
      lastTimestamp = ts;
2664

2665 2666 2667
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2668

2669
        // all input sources are exhausted
2670
        if (--numOfTables == 0) {
2671 2672 2673 2674
          break;
        }
      }
    }
2675

2676 2677
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2678

2679
  if (buffer[0]->num != 0) {  // there are data in buffer
2680
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2681
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2682

2683 2684 2685 2686
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2687

2688 2689 2690
      return -1;
    }
  }
2691

2692 2693 2694
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2695
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2696
#endif
2697

H
Haojun Liao 已提交
2698 2699
  qTrace("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
  
2700 2701
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2702
  tfree(pTree);
2703

2704
  pQInfo->offset = 0;
2705
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2706 2707
    tfree(pResultInfo[i].interResultBuf);
  }
2708

2709 2710 2711 2712 2713
  tfree(pResultInfo);
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2714 2715 2716
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2717
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2718
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2719

2720 2721
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2722

2723
  int32_t remain = pQuery->sdata[0]->num;
2724
  int32_t offset = 0;
2725

2726 2727 2728 2729 2730
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2731

2732
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2733
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2734

2735
    // pagewise copy to dest buffer
2736
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2737
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2738
      buf->num = r;
2739

2740 2741
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2742
    }
2743

2744 2745 2746
    offset += r;
    remain -= r;
  }
2747

2748 2749 2750 2751 2752
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2753
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2754
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2755 2756 2757
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2758

2759
    pQuery->sdata[k]->num = 0;
2760 2761 2762
  }
}

2763 2764 2765 2766 2767 2768 2769
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2770 2771 2772 2773 2774 2775 2776
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2790 2791 2792 2793 2794
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2795

2796
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2797

2798
    // open/close the specified query for each group result
2799
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2800
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2801

2802 2803
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2804 2805 2806 2807 2808 2809 2810 2811
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2812 2813
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2814
  SQuery *pQuery = pRuntimeEnv->pQuery;
2815
  int32_t order = pQuery->order.order;
2816

2817 2818 2819
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
2820
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2821
  } else {  // for simple result of table query,
2822
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2823
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2824

2825
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2826 2827 2828
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2829

2830 2831
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2832 2833 2834 2835 2836 2837
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849
  
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
      SGroupItem *item = taosArrayGet(group, j);
      updateTableQueryInfoForReverseScan(pQuery, item->info);
    }
  }
2850 2851
}

2852
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2853
  SQuery *pQuery = pRuntimeEnv->pQuery;
2854
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2855
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2856 2857 2858 2859
  }
}

void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
2860
  int32_t numOfCols = pQuery->numOfOutput;
2861

2862 2863
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2864

2865 2866 2867 2868 2869 2870
  // set the intermediate result output buffer
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2871

2872
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2873 2874
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2875

2876 2877 2878 2879 2880 2881
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2882

2883
    // set the timestamp output buffer for top/bottom/diff query
2884
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2885 2886 2887
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2888

2889
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2890
  }
2891

2892 2893 2894 2895 2896
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2897

2898
  // reset the execution contexts
2899
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2900
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2901
    assert(functionId != TSDB_FUNC_DIFF);
2902

2903 2904 2905 2906
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
2907

2908 2909 2910 2911 2912 2913 2914 2915 2916 2917
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
2918

2919 2920 2921 2922 2923 2924
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2925

2926
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2927
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2928
    pRuntimeEnv->pCtx[j].currentStage = 0;
H
Haojun Liao 已提交
2929 2930 2931 2932 2933 2934
    
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
    
2935 2936 2937 2938
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

2939
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
2940
  SQuery *pQuery = pRuntimeEnv->pQuery;
2941
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
2942 2943
    return;
  }
2944

2945
  if (pQuery->rec.rows <= pQuery->limit.offset) {
2946 2947 2948
    qTrace("QInfo:%p skip rows:%d, new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
        pQuery->limit.offset - pQuery->rec.rows);
    
2949 2950
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
2951

2952
    resetCtxOutputBuf(pRuntimeEnv);
2953

H
Haojun Liao 已提交
2954
    // clear the buffer full flag if exists
2955
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
2956
  } else {
2957
    int64_t numOfSkip = pQuery->limit.offset;
2958
    pQuery->rec.rows -= numOfSkip;
2959 2960 2961 2962 2963
    pQuery->limit.offset = 0;
  
    qTrace("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
           0, pQuery->rec.rows);
    
2964
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2965
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2966
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2967
      
H
Haojun Liao 已提交
2968 2969
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
2970

2971
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
2972
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
2973 2974
      }
    }
2975
  
H
Haojun Liao 已提交
2976
    
2977
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
2978 2979 2980 2981 2982 2983 2984 2985
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
2986
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
2987 2988 2989 2990 2991 2992
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2993

H
hjxilinx 已提交
2994
  bool toContinue = false;
2995 2996 2997
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
2998

2999 3000 3001 3002 3003
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3004

3005
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3006

3007
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3008
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3009 3010 3011
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3012

3013 3014
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3015

3016 3017 3018 3019
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3020
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3021
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3022 3023 3024
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3025

3026 3027
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3028

3029 3030 3031
      toContinue |= (!pResInfo->complete);
    }
  }
3032

3033 3034 3035
  return toContinue;
}

H
Haojun Liao 已提交
3036
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3037
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3038 3039
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3040 3041 3042
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3043
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3044
      .status      = pQuery->status,
3045
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3046
      .lastKey     = start,
H
hjxilinx 已提交
3047
      .w           = pQuery->window,
H
Haojun Liao 已提交
3048
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3049
  };
3050

3051 3052 3053
  return info;
}

3054 3055 3056 3057
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3058 3059 3060 3061 3062
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3063

3064
  // reverse order time range
3065 3066 3067
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3068
  SWITCH_ORDER(pQuery->order.order);
3069
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3070

3071
  STsdbQueryCond cond = {
3072
      .twindow = pQuery->window,
H
hjxilinx 已提交
3073
      .order   = pQuery->order.order,
3074
      .colList = pQuery->colList,
3075 3076
      .numOfCols = pQuery->numOfCols,
  };
3077

3078 3079 3080 3081
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3082

H
Haojun Liao 已提交
3083
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
3084

3085 3086
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3087
  disableFuncInReverseScan(pQInfo);
3088 3089
}

3090 3091
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3092
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3093

3094 3095
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3096

3097 3098 3099 3100
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3101

3102
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3103

3104 3105
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3106
  pTableQueryInfo->lastKey = pStatus->lastKey;
3107
  pQuery->status = pStatus->status;
3108
  
H
hjxilinx 已提交
3109
  pTableQueryInfo->win = pStatus->w;
3110
  pQuery->window = pTableQueryInfo->win;
3111 3112
}

H
Haojun Liao 已提交
3113
void scanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3114
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3115
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3116 3117
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3118
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3119

3120
  // store the start query position
H
Haojun Liao 已提交
3121
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3122

3123 3124
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3125

3126 3127
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3128

3129 3130
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3131
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3132
      qstatus.lastKey = pTableQueryInfo->lastKey;
3133
    }
3134

3135
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3136
      // restore the status code and jump out of loop
3137
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3138
        pQuery->status = qstatus.status;
3139
      }
3140

3141 3142
      break;
    }
3143

3144
    STsdbQueryCond cond = {
3145
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3146
        .order   = pQuery->order.order,
3147
        .colList = pQuery->colList,
3148
        .numOfCols = pQuery->numOfCols,
3149
    };
3150

3151 3152
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3153
    }
3154

H
Haojun Liao 已提交
3155
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
3156
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3157

3158 3159
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3160 3161 3162
    
    qTrace("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
        cond.twindow.skey, cond.twindow.ekey);
3163

3164
    // check if query is killed or not
3165
    if (isQueryKilled(pQInfo)) {
3166 3167 3168
      return;
    }
  }
3169

H
hjxilinx 已提交
3170
  if (!needReverseScan(pQuery)) {
3171 3172
    return;
  }
3173

3174
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3175

3176
  // reverse scan from current position
3177
  qTrace("QInfo:%p start to reverse scan", pQInfo);
3178
  doScanAllDataBlocks(pRuntimeEnv);
3179 3180

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3181 3182
}

H
hjxilinx 已提交
3183
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3184
  SQuery *pQuery = pRuntimeEnv->pQuery;
3185

3186 3187 3188 3189 3190 3191
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      closeAllTimeWindow(pWindowResInfo);
    }
3192

3193 3194 3195 3196 3197
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3198

3199
      setWindowResOutputBuf(pRuntimeEnv, buf);
3200

3201
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3202
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3203
      }
3204

3205 3206 3207 3208 3209 3210
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3211

3212
  } else {
3213
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3214
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3215 3216 3217 3218 3219
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3220
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3221
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3222

3223 3224 3225 3226
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3227

3228 3229 3230
  return false;
}

weixin_48148422's avatar
weixin_48148422 已提交
3231 3232 3233 3234 3235
static STableQueryInfo *createTableQueryInfo(
  SQueryRuntimeEnv *pRuntimeEnv,
  STableId tableId,
  STimeWindow win
) {
3236
  STableQueryInfo *pTableQueryInfo = calloc(1, sizeof(STableQueryInfo));
3237

H
hjxilinx 已提交
3238 3239
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3240

H
hjxilinx 已提交
3241
  pTableQueryInfo->id = tableId;
3242
  pTableQueryInfo->cur.vgroupIndex = -1;
3243

3244 3245 3246 3247
  initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, 100, 100, TSDB_DATA_TYPE_INT);
  return pTableQueryInfo;
}

3248
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3249 3250 3251
  if (pTableQueryInfo == NULL) {
    return;
  }
3252

3253 3254 3255 3256
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
  free(pTableQueryInfo);
}

3257
void setCurrentQueryTable(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3258
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3259 3260 3261 3262
  pQuery->current = pTableQueryInfo;
  
  assert(((pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) && QUERY_IS_ASC_QUERY(pQuery)) ||
         ((pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) && !QUERY_IS_ASC_QUERY(pQuery)));
3263 3264 3265 3266 3267
}

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3268
 * @param pDataBlockInfo
3269
 */
3270
void setExecutionContext(SQInfo *pQInfo, STableId* pTableId, int32_t groupIndex, TSKEY nextKey) {
3271
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
3272 3273
  STableQueryInfo *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  
3274 3275
  SWindowResInfo *  pWindowResInfo = &pRuntimeEnv->windowResInfo;
  int32_t           GROUPRESULTID = 1;
3276

3277
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex, sizeof(groupIndex));
3278 3279 3280
  if (pWindowRes == NULL) {
    return;
  }
3281

3282 3283 3284 3285 3286 3287 3288 3289 3290 3291
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3292

3293 3294
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
3295

3296
  pTableQueryInfo->lastKey = nextKey;
H
hjxilinx 已提交
3297
  setAdditionalInfo(pQInfo, pTableId, pTableQueryInfo);
3298 3299
}

H
Haojun Liao 已提交
3300
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3301
  SQuery *pQuery = pRuntimeEnv->pQuery;
3302

3303
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3304
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3305 3306
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
H
Haojun Liao 已提交
3307
    
3308
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3309 3310 3311
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3312

3313 3314 3315 3316 3317
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3318

3319 3320 3321 3322 3323 3324
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
    if (pCtx->resultInfo->complete) {
      continue;
    }
    
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
  
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
    
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
  
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

H
hjxilinx 已提交
3357
int32_t setAdditionalInfo(SQInfo *pQInfo, STableId* pTableId, STableQueryInfo *pTableQueryInfo) {
3358
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3359
  assert(pTableQueryInfo->lastKey >= TSKEY_INITIAL_VAL);
3360

H
hjxilinx 已提交
3361
  setTagVal(pRuntimeEnv, pTableId, pQInfo->tsdb);
3362

3363 3364
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3365
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3366
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3367

3368
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3369

3370 3371 3372 3373 3374 3375
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3376

3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3389
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3390 3391
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3392 3393
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3394 3395 3396
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3397
    pTableQueryInfo->win.skey = key;
3398
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3399

3400 3401 3402 3403 3404
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3405

3406 3407 3408 3409 3410 3411
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3412
    STimeWindow     w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3413
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3414

H
Haojun Liao 已提交
3415 3416
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3417
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &realWin, &w);
3418
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3419

3420 3421
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3422
        assert(win.ekey == pQuery->window.ekey);
3423
      }
3424 3425
      
      pWindowResInfo->prevSKey = w.skey;
3426
    }
3427

3428
    pTableQueryInfo->queryRangeSet = 1;
3429
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3430 3431 3432 3433
  }
}

bool requireTimestamp(SQuery *pQuery) {
3434
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3435
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3449 3450 3451
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3452 3453
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3454 3455 3456 3457 3458
  return loadPrimaryTS;
}

static int32_t getNumOfSubset(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3459

3460 3461 3462 3463
  int32_t totalSubset = 0;
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (isIntervalQuery(pQuery))) {
    totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  } else {
3464
    totalSubset = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
3465
  }
3466

3467 3468 3469 3470 3471 3472
  return totalSubset;
}

static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3473

3474 3475 3476
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3477

3478
  qTrace("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3479
  int32_t totalSubset = getNumOfSubset(pQInfo);
3480

3481
  if (orderType == TSDB_ORDER_ASC) {
3482
    startIdx = pQInfo->groupIndex;
3483 3484
    step = 1;
  } else {  // desc order copy all data
3485
    startIdx = totalSubset - pQInfo->groupIndex - 1;
3486 3487
    step = -1;
  }
3488

3489 3490 3491
  for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3492
      pQInfo->groupIndex += 1;
3493 3494
      continue;
    }
3495

3496
    assert(result[i].numOfRows >= 0 && pQInfo->offset <= 1);
3497

3498 3499
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3500

3501 3502 3503 3504
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3505 3506 3507 3508 3509
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3510
      pQInfo->groupIndex += 1;
3511
    }
3512

3513
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3514
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3515

3516 3517 3518 3519
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3520

3521
    numOfResult += numOfRowsToCopy;
3522 3523 3524
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3525
  }
3526

S
slguan 已提交
3527
  qTrace("QInfo:%p copy data to query buf completed", pQInfo);
3528 3529

#ifdef _DEBUG_VIEW
3530
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3546

3547
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3548
  int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
3549

3550
  pQuery->rec.rows += numOfResult;
3551

3552
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3553 3554
}

H
hjxilinx 已提交
3555
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3556
  SQuery *pQuery = pRuntimeEnv->pQuery;
3557

3558 3559
  // update the number of result for each, only update the number of rows for the corresponding window result.
  if (pQuery->intervalTime == 0) {
3560
    int32_t g = pTableQueryInfo->groupIndex;
3561
    assert(pRuntimeEnv->windowResInfo.size > 0);
3562

3563 3564 3565 3566 3567 3568 3569
    SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&g, sizeof(g));
    if (pWindowRes->numOfRows == 0) {
      pWindowRes->numOfRows = getNumOfResult(pRuntimeEnv);
    }
  }
}

3570 3571
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3572
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3573 3574
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3575
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3576
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3577

3578
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) {
3579
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3580
  } else {
3581
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3582
  }
3583

H
hjxilinx 已提交
3584
  updateWindowResNumOfRes(pRuntimeEnv, pTableQueryInfo);
3585 3586
}

3587 3588 3589 3590
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
  
3591 3592
  // todo refactor
  if (pQuery->fillType == TSDB_FILL_NONE || (pQuery->fillType != TSDB_FILL_NONE && isPointInterpoQuery(pQuery))) {
3593
    assert(pFillInfo == NULL);
3594 3595
    return false;
  }
3596

3597
  if (pQuery->limit.limit > 0 && pQuery->rec.rows >= pQuery->limit.limit) {
3598 3599
    return false;
  }
3600

3601 3602 3603
  // There are results not returned to client, fill operation applied to the remain result set in the
  // first place is required.
  int32_t remain = taosNumOfRemainRows(pFillInfo);
3604 3605 3606 3607
  if (remain > 0) {
    return true;
  }
  
3608
  /*
3609
   * While the code reaches here, there are no results returned to client now.
3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621
   * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
   * is retrieved from TSDB.
   *
   * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
   * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
   * first result row in the actual result set will fill nothing.
   */
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    TSKEY ekey = taosGetRevisedEndKey(pQuery->window.ekey, pQuery->order.order, pQuery->slidingTime,
                                         pQuery->slidingTimeUnit, pQuery->precision);
    int32_t numOfTotal = taosGetNumOfResultWithFill(pFillInfo, remain, ekey, pQuery->rec.capacity);
    return numOfTotal > 0;
3622
  }
3623 3624

  return false;
3625 3626 3627
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3628
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3629 3630
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3631

3632 3633 3634
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3635

weixin_48148422's avatar
weixin_48148422 已提交
3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3648 3649
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3650
    if (pQInfo->runtimeEnv.stableQuery) {
3651 3652 3653 3654
      if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3655 3656 3657
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3658
    }
H
hjxilinx 已提交
3659
  }
3660 3661
}

3662
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t numOfRows, int32_t *numOfInterpo) {
3663
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3664
  SQuery *pQuery = pRuntimeEnv->pQuery;
3665 3666
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3667
  while (1) {
3668
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3669
    
3670
    // todo apply limit output function
3671 3672
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3673
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3674 3675
      return ret;
    }
3676

3677
    if (pQuery->limit.offset < ret) {
3678 3679 3680
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%d. Discard due to offset, remain:%d, new offset:%d",
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3681 3682 3683
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3684 3685 3686
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3687
      }
3688
      
3689 3690 3691
      pQuery->limit.offset = 0;
      return ret;
    } else {
3692 3693 3694 3695
      qTrace("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%d. Discard due to offset, "
             "remain:%d, new offset:%d", pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
          pQuery->limit.offset - ret);
      
3696
      pQuery->limit.offset -= ret;
3697
      pQuery->rec.rows = 0;
3698 3699
      ret = 0;
    }
3700 3701

    if (!queryHasRemainResults(pRuntimeEnv)) {
3702 3703 3704 3705 3706
      return ret;
    }
  }
}

3707
static void queryCostStatis(SQInfo *pQInfo) {
3708
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
//  qTrace("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
//  qTrace("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
//  qTrace(
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3727
  
3728 3729 3730
  qTrace("QInfo:%p :cost summary: elpased time:%"PRId64" us, total blocks:%d, use block statis:%d, use block data:%d, "
         "total rows:%"PRId64 ", check rows:%"PRId64, pQInfo, pSummary->elapsedTime, pSummary->totalBlocks,
         pSummary->loadBlockStatis, pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3731 3732 3733 3734 3735 3736 3737 3738

//  qTrace("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
//
//  qTrace("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qTrace("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3739 3740
  
  // todo add the intermediate result save cost!!
3741 3742 3743 3744 3745 3746 3747 3748
//  double computing = total - io;
//
//  qTrace(
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3749 3750
}

3751 3752
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3753 3754
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3755
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3756

3757
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3758
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3759 3760 3761
    pQuery->limit.offset = 0;
    return;
  }
3762

3763 3764 3765 3766 3767
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3768

3769
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3770

3771
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3772
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3773

3774
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3775
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3776 3777

  // update the offset value
H
hjxilinx 已提交
3778
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3779
  pQuery->limit.offset = 0;
3780

H
hjxilinx 已提交
3781
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3782

3783 3784
  qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3785
}
3786

3787 3788 3789 3790 3791
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3792
  }
3793

3794 3795 3796
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3797
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3798
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3799

3800 3801 3802
  while (tsdbNextDataBlock(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      return;
3803
    }
3804

3805
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
3806

3807 3808
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3809 3810
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3811

3812
      qTrace("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3813 3814
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3815 3816 3817
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3818
  }
3819
}
3820

H
Haojun Liao 已提交
3821
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3822
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3823
  *start = pQuery->current->lastKey;
3824

3825
  // if queried with value filter, do NOT forward query start position
3826
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3827
    return true;
3828
  }
3829

3830 3831 3832 3833 3834
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3835
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3836

H
Haojun Liao 已提交
3837
  STimeWindow w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3838
  
3839
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3840
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3841

3842 3843
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle);
3844

H
Haojun Liao 已提交
3845 3846
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3847
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
H
Haojun Liao 已提交
3848 3849 3850
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3851
    } else {
H
Haojun Liao 已提交
3852
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
3853

3854 3855 3856
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3857

3858 3859
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3860

3861 3862 3863 3864 3865 3866
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3867

3868 3869
      STimeWindow tw = win;
      getNextTimeWindow(pQuery, &tw);
3870

3871
      if (pQuery->limit.offset == 0) {
3872 3873
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3874 3875
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3876 3877 3878
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

3879 3880 3881
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
3882 3883 3884 3885
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
3886 3887 3888 3889 3890 3891
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
3892
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
3893 3894
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
3895
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
3896 3897
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
3898 3899 3900
          qTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
3901
          return true;
H
Haojun Liao 已提交
3902 3903 3904 3905
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
3906
          return true;
3907 3908 3909
        }
      }

H
Haojun Liao 已提交
3910 3911 3912 3913 3914 3915 3916
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
3929
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
3930 3931
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
3932
      } else {
H
Haojun Liao 已提交
3933
        break;  // offset is not 0, and next time window begins or ends in the next block.
3934 3935 3936
      }
    }
  }
3937

3938 3939 3940
  return true;
}

B
Bomin Zhang 已提交
3941 3942
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3943 3944
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
3945 3946 3947 3948 3949 3950 3951
  if (onlyQueryTags(pQuery)) {
    return;
  }

  if (isSTableQuery && (!isIntervalQuery(pQuery)) && (!isFixedOutputQuery(pQuery))) {
    return;
  }
3952 3953

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
3954 3955 3956 3957
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
3958
  };
weixin_48148422's avatar
weixin_48148422 已提交
3959

B
Bomin Zhang 已提交
3960 3961 3962 3963 3964 3965 3966 3967 3968 3969
  if (!isSTableQuery
    && (pQInfo->groupInfo.numOfTables == 1)
    && (cond.order == TSDB_ORDER_ASC) 
    && (!isIntervalQuery(pQuery))
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
    && (!isFixedOutputQuery(pQuery))
  ) {
    SArray* pa = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
    SGroupItem* pItem = taosArrayGet(pa, 0);
    cond.twindow = pItem->info->win;
3970
  }
B
Bomin Zhang 已提交
3971

H
Haojun Liao 已提交
3972
  if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
3973
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
3974
  } else if (isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3975
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
H
Haojun Liao 已提交
3976
  } else {
H
Haojun Liao 已提交
3977
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
H
Haojun Liao 已提交
3978
  }
B
Bomin Zhang 已提交
3979 3980
}

3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
3994
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
3995 3996 3997 3998 3999 4000 4001
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4002
int32_t doInitQInfo(SQInfo *pQInfo, void *param, void *tsdb, int32_t vgId, bool isSTableQuery) {
4003 4004
  int32_t code = TSDB_CODE_SUCCESS;
  
4005 4006 4007
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4008
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
4009 4010 4011

  setScanLimitationByResultBuffer(pQuery);
  changeExecuteScanOrder(pQuery, false);
B
Bomin Zhang 已提交
4012
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4013
  
4014
  pQInfo->tsdb = tsdb;
4015
  pQInfo->vgId = vgId;
4016 4017 4018

  pRuntimeEnv->pQuery = pQuery;
  pRuntimeEnv->pTSBuf = param;
4019
  pRuntimeEnv->cur.vgroupIndex = -1;
4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032
  pRuntimeEnv->stableQuery = isSTableQuery;

  if (param != NULL) {
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4033
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4034 4035 4036

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4037
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

      if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // group by columns not tags;
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
    }

  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) {
    int32_t rows = getInitialPageNum(pQInfo);
4056
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);

  /*
H
hjxilinx 已提交
4074 4075
   * in case of last_row query without query range, we set the query timestamp to be
   * STable->lastKey. Otherwise, keep the initial query time range unchanged.
4076
   */
H
hjxilinx 已提交
4077 4078 4079 4080 4081 4082 4083
//  if (isFirstLastRowQuery(pQuery)) {
//    if (!normalizeUnBoundLastRowQuery(pQInfo, &interpInfo)) {
//      sem_post(&pQInfo->dataReady);
//      pointInterpSupporterDestroy(&interpInfo);
//      return TSDB_CODE_SUCCESS;
//    }
//  }
4084

4085
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4086 4087 4088 4089 4090
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, 0, 0, pQuery->rec.capacity, pQuery->numOfOutput,
                                              pQuery->slidingTime, pQuery->fillType, pColInfo);
  }
  
H
Haojun Liao 已提交
4091
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
4092 4093 4094
  return TSDB_CODE_SUCCESS;
}

4095
static UNUSED_FUNC bool isGroupbyEachTable(SSqlGroupbyExpr *pGroupbyExpr, STableGroupInfo *pSidset) {
4096 4097 4098
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
4099

4100
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
4101
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
4102 4103 4104 4105
    if (pColIndex->flag == TSDB_COL_TAG) {
      return true;
    }
  }
4106

4107 4108 4109
  return false;
}

4110
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4111
  SQuery *pQuery = pRuntimeEnv->pQuery;
4112

4113
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4114 4115 4116 4117 4118 4119 4120
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4121
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4122
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4123 4124
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4125
  
H
hjxilinx 已提交
4126
  int64_t st = taosGetTimestampMs();
4127

4128
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
4129
  while (tsdbNextDataBlock(pQueryHandle)) {
4130
    summary->totalBlocks += 1;
4131
    if (isQueryKilled(pQInfo)) {
4132 4133
      break;
    }
4134

4135
    SDataBlockInfo  blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
H
hjxilinx 已提交
4136
    STableQueryInfo *pTableQueryInfo = NULL;
4137

4138 4139
    // todo opt performance using hash table
    size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4140 4141 4142
    for (int32_t i = 0; i < numOfGroup; ++i) {
      SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);

4143
      size_t num = taosArrayGetSize(group);
4144
      for (int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
4145 4146
        SGroupItem *item = taosArrayGet(group, j);
        STableQueryInfo *pInfo = item->info;
4147

H
hjxilinx 已提交
4148 4149 4150
        if (pInfo->id.tid == blockInfo.tid) {
          assert(pInfo->id.uid == blockInfo.uid);
          pTableQueryInfo = item->info;
4151

4152 4153
          break;
        }
H
hjxilinx 已提交
4154
      }
4155

H
hjxilinx 已提交
4156 4157 4158
      if (pTableQueryInfo != NULL) {
        break;
      }
H
hjxilinx 已提交
4159
    }
H
hjxilinx 已提交
4160
  
4161
    assert(pTableQueryInfo != NULL);
4162
    setCurrentQueryTable(pRuntimeEnv, pTableQueryInfo);
4163

4164
    SDataStatis *pStatis = NULL;
H
hjxilinx 已提交
4165
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
4166

4167
    if (!isIntervalQuery(pQuery)) {
H
Haojun Liao 已提交
4168
      int32_t step = QUERY_IS_ASC_QUERY(pQuery)? 1:-1;
4169
      setExecutionContext(pQInfo, &pTableQueryInfo->id, pTableQueryInfo->groupIndex, blockInfo.window.ekey + step);
4170
    } else {  // interval query
H
Haojun Liao 已提交
4171
      TSKEY nextKey = blockInfo.window.skey;
H
hjxilinx 已提交
4172
      setIntervalQueryRange(pQInfo, nextKey);
4173
      /*int32_t ret = */setAdditionalInfo(pQInfo, &pTableQueryInfo->id, pTableQueryInfo);
4174
    }
4175

4176 4177 4178
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
H
Haojun Liao 已提交
4179 4180 4181
    qTrace("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
           GET_QINFO_ADDR(pRuntimeEnv), blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey,
           blockInfo.rows, pQuery->current->lastKey);
4182
  }
4183

H
hjxilinx 已提交
4184 4185
  int64_t et = taosGetTimestampMs();
  return et - st;
4186 4187
}

4188 4189
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4190
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4191

4192
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4193
  SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
H
hjxilinx 已提交
4194
  SGroupItem* item = taosArrayGet(group, index);
4195

H
hjxilinx 已提交
4196
  setTagVal(pRuntimeEnv, &item->id, pQInfo->tsdb);
4197

S
slguan 已提交
4198
  qTrace("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
hjxilinx 已提交
4199
         item->id.uid, item->id.tid, item->info->lastKey, item->info->win.ekey);
4200

4201
  STsdbQueryCond cond = {
H
hjxilinx 已提交
4202 4203 4204
      .twindow   = {item->info->lastKey, item->info->win.ekey},
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4205
      .numOfCols = pQuery->numOfCols,
4206
  };
4207

H
hjxilinx 已提交
4208
  // todo refactor
4209
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
hjxilinx 已提交
4210
  SArray *tx = taosArrayInit(1, sizeof(STableId));
4211

H
hjxilinx 已提交
4212
  taosArrayPush(tx, &item->info->id);
4213
  taosArrayPush(g1, &tx);
4214
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4215

4216
  // include only current table
4217 4218 4219 4220
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4221

H
Haojun Liao 已提交
4222
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4223 4224
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4225

4226
  if (pRuntimeEnv->pTSBuf != NULL) {
4227
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4228 4229
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4230

4231 4232 4233 4234 4235 4236 4237 4238
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4239

4240
  initCtxOutputBuf(pRuntimeEnv);
4241 4242 4243 4244 4245 4246 4247 4248 4249 4250
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4251
static void sequentialTableProcess(SQInfo *pQInfo) {
4252
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4253
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4254
  setQueryStatus(pQuery, QUERY_COMPLETED);
4255

4256
  size_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4257

H
Haojun Liao 已提交
4258
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4259 4260
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4261

4262 4263
    while (pQInfo->groupIndex < numOfGroups) {
      SArray* group = taosArrayGetP(pQInfo->groupInfo.pGroupList, pQInfo->groupIndex);
4264

H
Haojun Liao 已提交
4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286
      qTrace("QInfo:%p last_row query on group:%d, total group:%d, current group:%d", pQInfo, pQInfo->groupIndex,
             numOfGroups);

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4287
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4288
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4289
      } else {
H
Haojun Liao 已提交
4290
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4291
      }
H
Haojun Liao 已提交
4292 4293
      
      initCtxOutputBuf(pRuntimeEnv);
4294 4295 4296 4297 4298 4299 4300 4301 4302
      
      SArray* s = tsdbGetQueriedTableIdList(pRuntimeEnv->pQueryHandle);
      assert(taosArrayGetSize(s) >= 1);
      
      setTagVal(pRuntimeEnv, (STableId*) taosArrayGet(s, 0), pQInfo->tsdb);
      
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318
      
      // here we simply set the first table as current table
      pQuery->current = ((SGroupItem*) taosArrayGet(group, 0))->info;
      scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4319 4320 4321
    }
  } else {
    /*
4322
     * 1. super table projection query, 2. group-by on normal columns query, 3. ts-comp query
4323 4324 4325
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4326
    if (pQInfo->groupIndex > 0) {
4327
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4328
      pQuery->rec.total += pQuery->rec.rows;
4329

4330
      if (pQuery->rec.rows > 0) {
4331 4332 4333
        return;
      }
    }
4334

4335 4336
    // all data have returned already
    if (pQInfo->tableIndex >= pQInfo->groupInfo.numOfTables) {
4337 4338
      return;
    }
4339

4340 4341
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4342 4343 4344 4345 4346

    SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
    assert(taosArrayGetSize(group) == pQInfo->groupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->groupInfo.pGroupList));

4347
    while (pQInfo->tableIndex < pQInfo->groupInfo.numOfTables) {
4348
      if (isQueryKilled(pQInfo)) {
4349 4350
        return;
      }
4351

H
hjxilinx 已提交
4352
      SGroupItem *item = taosArrayGet(group, pQInfo->tableIndex);
H
hjxilinx 已提交
4353
      pQuery->current = item->info;
H
hjxilinx 已提交
4354
      
4355
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4356
        pQInfo->tableIndex++;
4357 4358
        continue;
      }
4359

H
hjxilinx 已提交
4360
      // TODO handle the limit offset problem
4361
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4362
        //        skipBlocks(pRuntimeEnv);
4363 4364
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4365 4366 4367
          continue;
        }
      }
4368

H
Haojun Liao 已提交
4369
      scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4370
      skipResults(pRuntimeEnv);
4371

4372
      // the limitation of output result is reached, set the query completed
4373
      if (limitResults(pRuntimeEnv)) {
4374
        pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
4375 4376
        break;
      }
4377

4378 4379
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4380

4381
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4382 4383 4384 4385 4386 4387
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4388
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4389 4390 4391 4392

        STableIdInfo tidInfo;
        tidInfo.uid = item->id.uid;
        tidInfo.tid = item->id.tid;
weixin_48148422's avatar
weixin_48148422 已提交
4393
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4394 4395
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4396
        // if the buffer is full or group by each table, we need to jump out of the loop
4397 4398
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4399 4400
          break;
        }
4401

4402
      } else {
4403
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4404 4405
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4406 4407
          continue;
        } else {
4408 4409 4410
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4411 4412 4413 4414
        }
      }
    }
  }
4415

4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4428
    finalizeQueryResult(pRuntimeEnv);
4429
  }
4430

4431 4432 4433
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4434

4435 4436 4437
  // todo refactor
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
4438

4439 4440 4441
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
      pStatus->closed = true;  // enable return all results for group by normal columns
4442

4443
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
4444
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
4445 4446 4447
        pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
      }
    }
4448

4449
    pQInfo->groupIndex = 0;
4450
    pQuery->rec.rows = 0;
4451 4452
    copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);
  }
4453 4454

  qTrace(
H
Haojun Liao 已提交
4455
      "QInfo %p numOfTables:%d, index:%d, numOfGroups:%d, %d points returned, total:%"PRId64", offset:%" PRId64,
4456 4457
      pQInfo, pQInfo->groupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
      pQuery->limit.offset);
4458 4459
}

4460 4461 4462 4463
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4464 4465 4466 4467
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4468
  if (pRuntimeEnv->pTSBuf != NULL) {
4469
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4470
  }
4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
  
H
Haojun Liao 已提交
4484
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableIdGroupInfo, pQInfo);
4485 4486 4487 4488
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4489 4490
}

4491 4492 4493 4494
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4495
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4496

4497
  if (pRuntimeEnv->pTSBuf != NULL) {
4498
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4499
  }
4500

4501
  switchCtxOrder(pRuntimeEnv);
4502 4503 4504
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4505 4506 4507
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
4508
  if (isIntervalQuery(pQuery)) {
4509
    size_t numOfGroup = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
4510 4511 4512
    for (int32_t i = 0; i < numOfGroup; ++i) {
      SArray *group = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);

4513
      size_t num = taosArrayGetSize(group);
4514
      for (int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
4515 4516
        SGroupItem* item = taosArrayGet(group, j);
        closeAllTimeWindow(&item->info->windowResInfo);
4517
      }
H
hjxilinx 已提交
4518 4519 4520 4521 4522 4523 4524
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4525 4526 4527
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4528
  if (pQInfo->groupIndex > 0) {
4529
    /*
4530
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4531 4532
     * copy the data into output buffer
     */
H
hjxilinx 已提交
4533
    if (isIntervalQuery(pQuery)) {
4534 4535 4536
      copyResToQueryResultBuf(pQInfo, pQuery);

#ifdef _DEBUG_VIEW
4537
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4538 4539 4540 4541
#endif
    } else {
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
    }
4542

S
slguan 已提交
4543
    qTrace("QInfo:%p current:%lld, total:%lld", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4544 4545
    return;
  }
4546 4547 4548 4549

  qTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4550
  // do check all qualified data blocks
H
Haojun Liao 已提交
4551
  int64_t el = scanMultiTableDataBlocks(pQInfo);
H
hjxilinx 已提交
4552
  qTrace("QInfo:%p master scan completed, elapsed time: %lldms, reverse scan start", pQInfo, el);
4553

H
hjxilinx 已提交
4554 4555
  // query error occurred or query is killed, abort current execution
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
S
slguan 已提交
4556
    qTrace("QInfo:%p query killed or error occurred, code:%d, abort", pQInfo, pQInfo->code);
H
hjxilinx 已提交
4557
    return;
4558
  }
4559

H
hjxilinx 已提交
4560 4561
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4562

H
hjxilinx 已提交
4563 4564
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4565

H
Haojun Liao 已提交
4566
    el = scanMultiTableDataBlocks(pQInfo);
S
slguan 已提交
4567
    qTrace("QInfo:%p reversed scan completed, elapsed time: %lldms", pQInfo, el);
4568

H
hjxilinx 已提交
4569 4570
    doRestoreContext(pQInfo);
  } else {
S
slguan 已提交
4571
    qTrace("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4572
  }
4573

4574
  setQueryStatus(pQuery, QUERY_COMPLETED);
4575

H
hjxilinx 已提交
4576
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
S
slguan 已提交
4577
    qTrace("QInfo:%p query killed or error occurred, code:%d, abort", pQInfo, pQInfo->code);
H
hjxilinx 已提交
4578 4579
    return;
  }
4580

H
hjxilinx 已提交
4581
  if (isIntervalQuery(pQuery) || isSumAvgRateQuery(pQuery)) {
4582
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4583
      copyResToQueryResultBuf(pQInfo, pQuery);
4584 4585

#ifdef _DEBUG_VIEW
4586
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4587 4588 4589 4590 4591
#endif
    }
  } else {  // not a interval query
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
  }
4592

4593
  // handle the limitation of output buffer
S
slguan 已提交
4594
  qTrace("QInfo:%p points returned:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4595 4596 4597 4598 4599 4600 4601 4602
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4603
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4604
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4605 4606
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4607 4608 4609 4610
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4611 4612
  pQuery->current = pTableInfo;  // set current query table info
  
H
Haojun Liao 已提交
4613
  scanAllDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4614
  finalizeQueryResult(pRuntimeEnv);
4615

4616
  if (isQueryKilled(pQInfo)) {
4617 4618
    return;
  }
4619

H
Haojun Liao 已提交
4620
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4621
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4622

4623
  skipResults(pRuntimeEnv);
4624
  limitResults(pRuntimeEnv);
4625 4626
}

H
hjxilinx 已提交
4627
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4628
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4629 4630 4631 4632
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4633 4634 4635 4636
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4637

4638 4639 4640 4641 4642 4643
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4644 4645

  while (1) {
H
Haojun Liao 已提交
4646
    scanAllDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4647
    finalizeQueryResult(pRuntimeEnv);
4648

4649
    if (isQueryKilled(pQInfo)) {
4650 4651 4652
      return;
    }

4653 4654
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4655
      skipResults(pRuntimeEnv);
4656 4657 4658
    }

    /*
H
hjxilinx 已提交
4659 4660
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4661
     */
4662
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4663 4664 4665
      break;
    }

S
slguan 已提交
4666
    qTrace("QInfo:%p vid:%d sid:%d id:%s, skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
H
hjxilinx 已提交
4667
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey);
4668 4669 4670 4671

    resetCtxOutputBuf(pRuntimeEnv);
  }

4672
  limitResults(pRuntimeEnv);
4673
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
hjxilinx 已提交
4674 4675
    qTrace("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4676 4677 4678 4679 4680 4681
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
    tidInfo.uid = pQuery->current->id.uid;
    tidInfo.tid = pQuery->current->id.tid;
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4682 4683
  }

4684 4685 4686
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4687 4688
}

H
Haojun Liao 已提交
4689
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4690
  SQuery *pQuery = pRuntimeEnv->pQuery;
4691

4692
  while (1) {
H
Haojun Liao 已提交
4693
    scanAllDataBlocks(pRuntimeEnv, start);
4694

4695
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
4696 4697
      return;
    }
4698

4699
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4700
    finalizeQueryResult(pRuntimeEnv);
4701

4702 4703 4704
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4705
        pQuery->fillType == TSDB_FILL_NONE) {
4706 4707
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4708

4709 4710 4711 4712
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4713

4714
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4715 4716 4717 4718 4719
      break;
    }
  }
}

4720
// handle time interval query on table
H
hjxilinx 已提交
4721
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4722 4723
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4724 4725
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4726

H
Haojun Liao 已提交
4727 4728 4729
  int32_t numOfInterpo = 0;
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4730
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4731
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4732
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4733 4734 4735 4736
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4737
  while (1) {
H
Haojun Liao 已提交
4738
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4739

H
hjxilinx 已提交
4740
    if (isIntervalQuery(pQuery)) {
4741
      pQInfo->groupIndex = 0;  // always start from 0
4742
      pQuery->rec.rows = 0;
4743
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4744

4745
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4746
    }
4747

4748
    // the offset is handled at prepare stage if no interpolation involved
4749
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4750
      limitResults(pRuntimeEnv);
4751 4752
      break;
    } else {
4753 4754 4755 4756
      TSKEY ekey = taosGetRevisedEndKey(pQuery->window.ekey, pQuery->order.order, pQuery->slidingTime,
                                        pQuery->slidingTimeUnit, pQuery->precision);
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, ekey);
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
4757
      numOfInterpo = 0;
4758
      
4759
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, pQuery->rec.rows, &numOfInterpo);
4760
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4761
        limitResults(pRuntimeEnv);
4762 4763
        break;
      }
4764

4765
      // no result generated yet, continue retrieve data
4766
      pQuery->rec.rows = 0;
4767 4768
    }
  }
4769

4770 4771
  // all data scanned, the group by normal column can return
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // todo refactor with merge interval time result
4772
    pQInfo->groupIndex = 0;
4773
    pQuery->rec.rows = 0;
4774
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4775
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4776
  }
4777

4778 4779 4780
  pQInfo->pointsInterpo += numOfInterpo;
}

4781 4782 4783 4784
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4785
  if (queryHasRemainResults(pRuntimeEnv)) {
4786 4787 4788 4789 4790
    /*
     * There are remain results that are not returned due to result interpolation
     * So, we do keep in this procedure instead of launching retrieve procedure for next results.
     */
    int32_t numOfInterpo = 0;
4791 4792 4793 4794
    int32_t remain = taosNumOfRemainRows(pRuntimeEnv->pFillInfo);
    pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, remain, &numOfInterpo);
  
    if (pQuery->rec.rows > 0) {
4795
      limitResults(pRuntimeEnv);
4796 4797
    }
    
S
slguan 已提交
4798
    qTrace("QInfo:%p current:%d returned, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4799 4800
    return;
  }
4801

4802
  // here we have scan all qualified data in both data file and cache
H
hjxilinx 已提交
4803
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4804 4805
    // continue to get push data from the group result
    if (isGroupbyNormalCol(pQuery->pGroupbyExpr) ||
4806
        ((isIntervalQuery(pQuery) && pQuery->rec.total < pQuery->limit.limit))) {
4807
      // todo limit the output for interval query?
4808
      pQuery->rec.rows = 0;
4809
      pQInfo->groupIndex = 0;  // always start from 0
4810

4811 4812
      if (pRuntimeEnv->windowResInfo.size > 0) {
        copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4813
        pQuery->rec.rows += pQuery->rec.rows;
4814

4815
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4816

4817
        if (pQuery->rec.rows > 0) {
S
slguan 已提交
4818
          qTrace("QInfo:%p %d rows returned from group results, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4819 4820 4821 4822
          return;
        }
      }
    }
4823

S
slguan 已提交
4824
    qTrace("QInfo:%p query over, %d rows are returned", pQInfo, pQuery->rec.total);
4825 4826
    return;
  }
4827

H
hjxilinx 已提交
4828
  // number of points returned during this query
4829
  pQuery->rec.rows = 0;
4830
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
4831 4832 4833 4834 4835
  
  assert(pQInfo->groupInfo.numOfTables == 1);
  SArray* g = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
  SGroupItem* item = taosArrayGet(g, 0);
  
4836
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
[td-98]  
hjxilinx 已提交
4837
  if (isIntervalQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {  // interval (down sampling operation)
H
hjxilinx 已提交
4838
    tableIntervalProcess(pQInfo, item->info);
4839
  } else if (isFixedOutputQuery(pQuery)) {
H
hjxilinx 已提交
4840
    tableFixedOutputProcess(pQInfo, item->info);
4841 4842
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
H
hjxilinx 已提交
4843
    tableMultiOutputProcess(pQInfo, item->info);
4844
  }
4845

4846
  // record the total elapsed time
4847
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
4848
  assert(pQInfo->groupInfo.numOfTables == 1);
4849

4850
  /* check if query is killed or not */
4851
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
4852
    qTrace("QInfo:%p query is killed", pQInfo);
H
Haojun Liao 已提交
4853
  } else {
H
hjxilinx 已提交
4854 4855
    qTrace("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
        pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4856 4857 4858
  }
}

4859 4860
static void stableQueryImpl(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4861
  pQuery->rec.rows = 0;
4862

4863
  int64_t st = taosGetTimestampUs();
4864

H
hjxilinx 已提交
4865
  if (isIntervalQuery(pQuery) ||
4866 4867
      (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr) &&
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
4868
    multiTableQueryProcess(pQInfo);
4869
  } else {
4870
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
4871
            isFirstLastRowQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr));
4872

4873
    sequentialTableProcess(pQInfo);
4874
  }
4875

H
hjxilinx 已提交
4876
  // record the total elapsed time
4877 4878
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
  
4879
  if (pQuery->rec.rows == 0) {
4880
    qTrace("QInfo:%p over, %d tables queried, %d rows are returned", pQInfo, pQInfo->groupInfo.numOfTables, pQuery->rec.total);
4881
  }
H
hjxilinx 已提交
4882 4883
}

4884
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
4885
  int32_t j = 0;
4886

4887 4888 4889 4890 4891
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
4892

4893 4894
      j += 1;
    }
4895

4896 4897 4898 4899 4900
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
4901

4902
      j += 1;
4903 4904 4905
    }
  }

4906
  assert(0);
4907 4908
}

4909 4910 4911
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
4912 4913
}

4914
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
4915
  if (pQueryMsg->intervalTime < 0) {
4916
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
4917
    return false;
4918 4919
  }

H
hjxilinx 已提交
4920
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
4921
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
4922
    return false;
4923 4924
  }

H
hjxilinx 已提交
4925
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
4926
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
4927
    return false;
4928 4929
  }

4930 4931
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
4932
    return false;
4933 4934
  }

4935 4936 4937 4938 4939 4940 4941 4942 4943 4944
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
4945 4946 4947
      if ((pExprMsg[i]->functionId == TSDB_FUNC_TAGPRJ) ||
          (pExprMsg[i]->functionId == TSDB_FUNC_TID_TAG && pExprMsg[i]->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
        continue;
4948
      }
4949 4950
      
      return false;
4951 4952
    }
  }
4953

4954
  return true;
4955 4956
}

4957
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
4958
  assert(pQueryMsg->numOfTables > 0);
4959

weixin_48148422's avatar
weixin_48148422 已提交
4960
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
4961

weixin_48148422's avatar
weixin_48148422 已提交
4962 4963
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
4964

4965
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
4966 4967
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
4968

H
hjxilinx 已提交
4969 4970 4971
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
4972

H
hjxilinx 已提交
4973 4974
  return pMsg;
}
4975

4976
/**
H
hjxilinx 已提交
4977
 * pQueryMsg->head has been converted before this function is called.
4978
 *
H
hjxilinx 已提交
4979
 * @param pQueryMsg
4980 4981 4982 4983
 * @param pTableIdList
 * @param pExpr
 * @return
 */
4984
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
4985
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
4986 4987 4988 4989 4990 4991 4992 4993
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
4994

4995 4996 4997
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
  pQueryMsg->queryType = htons(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
4998
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
4999 5000

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5001
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5002
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5003 5004 5005
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5006
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5007
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5008
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5009

5010
  // query msg safety check
5011
  if (!validateQueryMsg(pQueryMsg)) {
5012 5013 5014
    return TSDB_CODE_INVALID_QUERY_MSG;
  }

H
hjxilinx 已提交
5015
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
5016

H
hjxilinx 已提交
5017
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5018 5019
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5020
    pColInfo->colId = htons(pColInfo->colId);
5021
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5022 5023
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5024

H
hjxilinx 已提交
5025
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5026

H
hjxilinx 已提交
5027
    int32_t numOfFilters = pColInfo->numOfFilters;
5028
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5029
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5030 5031 5032
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5033 5034 5035 5036
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5037 5038 5039

      pMsg += sizeof(SColumnFilterInfo);

5040 5041
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5042

5043
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5044 5045
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5046
      } else {
5047 5048
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5049 5050
      }

5051 5052
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5053 5054 5055
    }
  }

5056 5057
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5058

5059
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5060
    (*pExpr)[i] = pExprMsg;
5061

5062
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5063 5064 5065 5066
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5067

5068
    pMsg += sizeof(SSqlFuncMsg);
5069 5070

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5071
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5072 5073 5074 5075
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5076
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5077 5078 5079 5080 5081
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

5082
    if (pExprMsg->functionId == TSDB_FUNC_TAG || pExprMsg->functionId == TSDB_FUNC_TAGPRJ ||
5083 5084 5085 5086 5087
               pExprMsg->functionId == TSDB_FUNC_TAG_DUMMY) {
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
        return TSDB_CODE_INVALID_QUERY_MSG;
      }
    } else {
5088 5089 5090
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
//        return TSDB_CODE_INVALID_QUERY_MSG;
//      }
5091 5092
    }

5093
    pExprMsg = (SSqlFuncMsg *)pMsg;
5094
  }
5095

5096 5097
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
    tfree(*pExpr);
5098

5099 5100
    return TSDB_CODE_INVALID_QUERY_MSG;
  }
5101

H
hjxilinx 已提交
5102
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5103

H
hjxilinx 已提交
5104
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5105 5106 5107 5108
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5109
      pMsg += sizeof((*groupbyCols)[i].colId);
5110 5111

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5112 5113
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5114
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5115 5116 5117 5118 5119
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5120

H
hjxilinx 已提交
5121 5122
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5123 5124
  }

5125 5126
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5127
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5128 5129

    int64_t *v = (int64_t *)pMsg;
5130
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5131 5132
      v[i] = htobe64(v[i]);
    }
5133

5134
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5135
  }
5136

5137 5138 5139 5140
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5141

5142 5143 5144 5145
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5146

5147
      (*tagCols)[i] = *pTagCol;
5148
      pMsg += sizeof(SColumnInfo);
5149
    }
H
hjxilinx 已提交
5150
  }
5151

5152 5153 5154 5155 5156 5157
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5158

weixin_48148422's avatar
weixin_48148422 已提交
5159
  if (*pMsg != 0) {
5160 5161
    size_t len = strlen(pMsg) + 1;
    *tbnameCond = malloc(len);
weixin_48148422's avatar
weixin_48148422 已提交
5162
    strcpy(*tbnameCond, pMsg);
5163
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5164
  }
5165

5166 5167
  qTrace("qmsg:%p query %d tables, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, limit:%" PRId64 ", offset:%" PRId64,
5168
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5169
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
5170
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->limit, pQueryMsg->offset);
5171 5172 5173 5174

  return 0;
}

H
hjxilinx 已提交
5175
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5176
  qTrace("qmsg:%p create arithmetic expr from binary string", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5177 5178 5179 5180 5181 5182 5183 5184 5185

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5186
  if (pExprNode == NULL) {
5187
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5188 5189
    return TSDB_CODE_APP_ERROR;
  }
5190

5191
  pArithExprInfo->pExpr = pExprNode;
5192 5193 5194
  return TSDB_CODE_SUCCESS;
}

5195 5196 5197
static int32_t createSqlFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5198
  int32_t code = TSDB_CODE_SUCCESS;
5199

H
hjxilinx 已提交
5200
  SExprInfo *pExprs = (SExprInfo *)calloc(1, sizeof(SExprInfo) * pQueryMsg->numOfOutput);
5201 5202 5203 5204 5205 5206 5207
  if (pExprs == NULL) {
    return TSDB_CODE_SERV_OUT_OF_MEMORY;
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5208
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5209
    pExprs[i].base = *pExprMsg[i];
5210
    pExprs[i].bytes = 0;
5211 5212 5213 5214

    int16_t type = 0;
    int16_t bytes = 0;

5215
    // parse the arithmetic expression
5216
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5217
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5218

5219 5220 5221
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5222 5223
      }

5224
      type  = TSDB_DATA_TYPE_DOUBLE;
5225
      bytes = tDataTypeDesc[type].nSize;
5226
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {  // parse the normal column
5227
      type  = TSDB_DATA_TYPE_BINARY;
H
hjxilinx 已提交
5228
      bytes = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE;
5229
    } else{
5230
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
H
hjxilinx 已提交
5231
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
hjxilinx 已提交
5232

5233
      SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
5234 5235
      type = pCol->type;
      bytes = pCol->bytes;
5236 5237
    }

5238 5239
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5240
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5241 5242 5243 5244
      tfree(pExprs);
      return TSDB_CODE_INVALID_QUERY_MSG;
    }

5245
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5246
      tagLen += pExprs[i].bytes;
5247
    }
5248
    assert(isValidDataType(pExprs[i].type, pExprs[i].bytes));
5249 5250 5251
  }

  // TODO refactor
5252
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5253 5254
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5255

5256
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5257
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5258 5259 5260 5261 5262
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5263
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5264
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5265 5266 5267 5268
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }

5269
  tfree(pExprMsg);
5270
  *pExprInfo = pExprs;
5271 5272 5273 5274

  return TSDB_CODE_SUCCESS;
}

5275
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5276 5277 5278 5279 5280
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5281
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5282 5283 5284 5285 5286 5287 5288 5289 5290
  if (pGroupbyExpr == NULL) {
    *code = TSDB_CODE_SERV_OUT_OF_MEMORY;
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5291 5292 5293 5294
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5295

5296 5297 5298
  return pGroupbyExpr;
}

5299
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5300
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5301
    if (pQuery->colList[i].numOfFilters > 0) {
5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5313
    if (pQuery->colList[i].numOfFilters > 0) {
5314 5315
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

H
hjxilinx 已提交
5316
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfoData));
5317
      pFilterInfo->info = pQuery->colList[i];
5318

5319
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5320 5321 5322 5323
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5324
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5325 5326 5327 5328 5329

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5330
          qError("QInfo:%p invalid filter info", pQInfo);
5331 5332 5333
          return TSDB_CODE_INVALID_QUERY_MSG;
        }

5334 5335
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5336

5337 5338 5339
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5340 5341

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5342
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5343 5344 5345
          return TSDB_CODE_INVALID_QUERY_MSG;
        }

5346
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5347
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
5348
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
S
slguan 已提交
5366
              qError("pQInfo:%p failed to get filter function, invalid filter condition", pQInfo, type);
5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383
              return TSDB_CODE_INVALID_QUERY_MSG;
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5384
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5385
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5386

5387
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5388
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5389
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5390 5391
      continue;
    }
5392

5393
    // todo opt performance
H
Haojun Liao 已提交
5394 5395
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5396 5397
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5398 5399
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5400 5401 5402
          break;
        }
      }
5403 5404
      
      assert (f < pQuery->numOfCols);
5405
    } else {
5406 5407
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5408 5409
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5410 5411
          break;
        }
5412
      }
5413 5414
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5415 5416 5417 5418
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5419 5420 5421 5422 5423 5424 5425 5426 5427 5428

static int compareTableIdInfo( const void* a, const void* b ) {
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5429
                               STableGroupInfo *groupInfo, SColumnInfo* pTagCols) {
5430 5431
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5432
    return NULL;
5433 5434 5435 5436 5437 5438
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5439
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5440

5441
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5442
  pQuery->numOfOutput     = numOfOutput;
5443 5444 5445
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5446
  pQuery->order.orderColId = pQueryMsg->orderColId;
5447 5448 5449 5450
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5451
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5452
  pQuery->fillType        = pQueryMsg->fillType;
5453
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5454
  
5455
  // todo do not allocate ??
5456
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5457
  if (pQuery->colList == NULL) {
5458
    goto _cleanup;
5459
  }
5460

H
hjxilinx 已提交
5461
  for (int16_t i = 0; i < numOfCols; ++i) {
5462
    pQuery->colList[i] = pQueryMsg->colList[i];
5463
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5464
  }
5465

5466
  pQuery->tagColList = pTagCols;
5467

5468
  // calculate the result row size
5469 5470 5471
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5472
  }
5473

5474
  doUpdateExprColumnIndex(pQuery);
5475

5476
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5477
  if (ret != TSDB_CODE_SUCCESS) {
5478
    goto _cleanup;
5479 5480 5481
  }

  // prepare the result buffer
5482
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5483
  if (pQuery->sdata == NULL) {
5484
    goto _cleanup;
5485 5486
  }

H
hjxilinx 已提交
5487
  // set the output buffer capacity
H
hjxilinx 已提交
5488
  pQuery->rec.capacity = 4096;
5489
  pQuery->rec.threshold = 4000;
5490

5491
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5492
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5493 5494

    // allocate additional memory for interResults that are usually larger then final results
5495 5496
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5497
    if (pQuery->sdata[col] == NULL) {
5498
      goto _cleanup;
5499 5500 5501
    }
  }

5502
  if (pQuery->fillType != TSDB_FILL_NONE) {
5503 5504
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5505
      goto _cleanup;
5506 5507 5508
    }

    // the first column is the timestamp
5509
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5510 5511 5512
  }

  // to make sure third party won't overwrite this structure
5513
  pQInfo->signature = pQInfo;
5514

H
hjxilinx 已提交
5515 5516
  pQInfo->tableIdGroupInfo = *groupInfo;
  size_t numOfGroups = taosArrayGetSize(groupInfo->pGroupList);
5517

H
hjxilinx 已提交
5518 5519 5520
  pQInfo->groupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
  pQInfo->groupInfo.numOfTables = groupInfo->numOfTables;
  
weixin_48148422's avatar
weixin_48148422 已提交
5521 5522
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5523
  taosArraySort(pTableIdList, compareTableIdInfo);
5524

H
hjxilinx 已提交
5525 5526 5527
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray* pa = taosArrayGetP(groupInfo->pGroupList, i);
    size_t s = taosArrayGetSize(pa);
5528

H
hjxilinx 已提交
5529
    SArray* p1 = taosArrayInit(s, sizeof(SGroupItem));
5530

H
hjxilinx 已提交
5531
    for(int32_t j = 0; j < s; ++j) {
weixin_48148422's avatar
weixin_48148422 已提交
5532 5533 5534
      STableId id = *(STableId*) taosArrayGet(pa, j);
      SGroupItem item = { .id = id };
      // NOTE: compare STableIdInfo with STableId
5535
      STableIdInfo* pTableId = taosArraySearch( pTableIdList, &id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5536 5537 5538
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5539
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5540 5541
      }
      item.info = createTableQueryInfo(&pQInfo->runtimeEnv, item.id, window);
5542
      item.info->groupIndex = i;
weixin_48148422's avatar
weixin_48148422 已提交
5543
      item.info->tableIndex = tableIndex++;
H
hjxilinx 已提交
5544 5545
      taosArrayPush(p1, &item);
    }
5546

H
hjxilinx 已提交
5547 5548
    taosArrayPush(pQInfo->groupInfo.pGroupList, &p1);
  }
5549

weixin_48148422's avatar
weixin_48148422 已提交
5550 5551
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5552
  pQuery->pos = -1;
5553
  pQuery->window = pQueryMsg->window;
5554

5555
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
S
slguan 已提交
5556
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, strerror(errno));
5557
    goto _cleanup;
5558
  }
5559

5560
  colIdCheck(pQuery);
5561

S
slguan 已提交
5562
  qTrace("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5563 5564
  return pQInfo;

5565
_cleanup:
5566
  tfree(pQuery->fillVal);
5567 5568

  if (pQuery->sdata != NULL) {
5569
    for (int16_t col = 0; col < pQuery->numOfOutput; ++col) {
5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585
      tfree(pQuery->sdata[col]);
    }
  }

  tfree(pQuery->sdata);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);

  tfree(pExprs);
  tfree(pGroupbyExpr);

  tfree(pQInfo);

  return NULL;
}

H
hjxilinx 已提交
5586
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5587 5588 5589 5590
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5591

H
hjxilinx 已提交
5592 5593 5594 5595
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5596
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5597 5598 5599
  return (sig == (uint64_t)pQInfo);
}

H
hjxilinx 已提交
5600 5601
static void freeQInfo(SQInfo *pQInfo);

5602
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
5603
  int32_t code = TSDB_CODE_SUCCESS;
5604
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5605

H
hjxilinx 已提交
5606 5607 5608 5609
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
    char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset;
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5610

H
hjxilinx 已提交
5611 5612 5613
    tsBufResetPos(pTSBuf);
    tsBufNextPos(pTSBuf);
  }
5614

5615 5616 5617
  // only the successful complete requries the sem_post/over = 1 operations.
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
S
slguan 已提交
5618
    qTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5619
           pQuery->window.ekey, pQuery->order.order);
5620
    setQueryStatus(pQuery, QUERY_COMPLETED);
5621

5622 5623 5624
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5625 5626 5627 5628 5629 5630 5631 5632
  
  if (pQInfo->groupInfo.numOfTables == 0) {
    qTrace("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5633 5634

  // filter the qualified
5635
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5636 5637
    goto _error;
  }
H
hjxilinx 已提交
5638
  
H
hjxilinx 已提交
5639 5640 5641 5642
  return code;

_error:
  // table query ref will be decrease during error handling
5643
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5644 5645 5646 5647 5648 5649 5650
  return code;
}

static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5651 5652

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5653
  setQueryKilled(pQInfo);
5654

S
slguan 已提交
5655
  qTrace("QInfo:%p start to free QInfo", pQInfo);
5656
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5657 5658
    tfree(pQuery->sdata[col]);
  }
5659

H
hjxilinx 已提交
5660
  sem_destroy(&(pQInfo->dataReady));
5661
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5662

H
hjxilinx 已提交
5663 5664 5665 5666 5667 5668
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5669

H
hjxilinx 已提交
5670
  if (pQuery->pSelectExpr != NULL) {
5671
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5672
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5673

H
hjxilinx 已提交
5674 5675 5676
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5677
    }
5678

H
hjxilinx 已提交
5679 5680
    tfree(pQuery->pSelectExpr);
  }
5681

5682 5683
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5684
  }
5685

5686
  // todo refactor, extract method to destroytableDataInfo
H
hjxilinx 已提交
5687
  int32_t numOfGroups = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
5688 5689
  for (int32_t i = 0; i < numOfGroups; ++i) {
    SArray *p = taosArrayGetP(pQInfo->groupInfo.pGroupList, i);
5690

5691 5692
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
H
hjxilinx 已提交
5693 5694 5695
      SGroupItem* item = taosArrayGet(p, j);
      if (item->info != NULL) {
        destroyTableQueryInfo(item->info, pQuery->numOfOutput);
5696 5697
      }
    }
5698

H
hjxilinx 已提交
5699 5700
    taosArrayDestroy(p);
  }
5701

H
hjxilinx 已提交
5702
  taosArrayDestroy(pQInfo->groupInfo.pGroupList);
5703

H
hjxilinx 已提交
5704 5705 5706 5707
  for(int32_t i = 0; i < numOfGroups; ++i) {
    SArray* p = taosArrayGetP(pQInfo->tableIdGroupInfo.pGroupList, i);
    taosArrayDestroy(p);
  }
5708

H
hjxilinx 已提交
5709
  taosArrayDestroy(pQInfo->tableIdGroupInfo.pGroupList);
weixin_48148422's avatar
weixin_48148422 已提交
5710
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5711
  
5712 5713 5714 5715
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5716

5717 5718 5719 5720
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);
  tfree(pQuery->sdata);
5721

5722
  tfree(pQuery);
5723

S
slguan 已提交
5724
  qTrace("QInfo:%p QInfo is freed", pQInfo);
5725

5726
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5727 5728 5729 5730
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5731
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5732 5733
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5745
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5746 5747 5748 5749
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5750
  }
H
hjxilinx 已提交
5751
}
5752

H
hjxilinx 已提交
5753 5754 5755
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5756

H
hjxilinx 已提交
5757 5758 5759
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5760

H
hjxilinx 已提交
5761 5762 5763
    // make sure file exist
    if (FD_VALID(fd)) {
      size_t s = lseek(fd, 0, SEEK_END);
S
slguan 已提交
5764
      qTrace("QInfo:%p ts comp data return, file:%s, size:%zu", pQInfo, pQuery->sdata[0]->data, s);
5765

H
hjxilinx 已提交
5766 5767 5768
      lseek(fd, 0, SEEK_SET);
      read(fd, data, s);
      close(fd);
5769

H
hjxilinx 已提交
5770 5771
      unlink(pQuery->sdata[0]->data);
    } else {
H
hjxilinx 已提交
5772
      // todo return the error code to client
S
slguan 已提交
5773
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5774 5775
             pQuery->sdata[0]->data, strerror(errno));
    }
5776

H
hjxilinx 已提交
5777 5778 5779 5780
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5781
  } else {
5782
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5783
  }
5784

5785
  pQuery->rec.total += pQuery->rec.rows;
5786
  qTrace("QInfo:%p current numOfRes rows:%d, total:%d", pQInfo, pQuery->rec.rows, pQuery->rec.total);
5787

5788 5789 5790 5791 5792
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
    qTrace("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
5793
  return TSDB_CODE_SUCCESS;
5794

H
hjxilinx 已提交
5795
  // todo if interpolation exists, the result may be dump to client by several rounds
5796 5797
}

5798
int32_t qCreateQueryInfo(void *tsdb, int32_t vgId, SQueryTableMsg *pQueryMsg, qinfo_t *pQInfo) {
H
hjxilinx 已提交
5799
  assert(pQueryMsg != NULL);
5800 5801

  int32_t code = TSDB_CODE_SUCCESS;
5802

weixin_48148422's avatar
weixin_48148422 已提交
5803
  char *        tagCond = NULL, *tbnameCond = NULL;
5804
  SArray *      pTableIdList = NULL;
5805
  SSqlFuncMsg **pExprMsg = NULL;
5806 5807
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
5808

weixin_48148422's avatar
weixin_48148422 已提交
5809
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
5810
         TSDB_CODE_SUCCESS) {
5811 5812 5813
    return code;
  }

H
hjxilinx 已提交
5814
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5815
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
5816
    code = TSDB_CODE_INVALID_QUERY_MSG;
H
hjxilinx 已提交
5817
    goto _over;
5818 5819
  }

H
hjxilinx 已提交
5820
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
5821
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
5822
    code = TSDB_CODE_INVALID_QUERY_MSG;
H
hjxilinx 已提交
5823
    goto _over;
5824 5825
  }

H
hjxilinx 已提交
5826
  SExprInfo *pExprs = NULL;
5827
  if ((code = createSqlFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5828
    goto _over;
5829 5830
  }

5831
  SSqlGroupbyExpr *pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
5832
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5833
    goto _over;
5834
  }
5835

H
hjxilinx 已提交
5836
  bool isSTableQuery = false;
H
hjxilinx 已提交
5837
  STableGroupInfo groupInfo = {0};
5838
  
H
hjxilinx 已提交
5839
  //todo multitable_query??
5840 5841 5842
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_TABLE_QUERY)) {
    isSTableQuery = TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY);
    
weixin_48148422's avatar
weixin_48148422 已提交
5843
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
5844 5845
    qTrace("qmsg:%p query table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
    
5846
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &groupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5847
      goto _over;
5848 5849
    }
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_STABLE_QUERY)) {
5850
    isSTableQuery = true;
weixin_48148422's avatar
weixin_48148422 已提交
5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862
    // TODO: need a macro from TSDB to check if table is super table,
    // also note there's possiblity that only one table in the super table
    if (taosArrayGetSize(pTableIdList) == 1) {
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);
      // if array size is 1 and assert super table

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
5863
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &groupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
5864
                                          numOfGroupByCols);
5865 5866 5867
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
5868 5869 5870 5871 5872 5873 5874 5875 5876 5877
    } else {
      SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES);

      SArray* sa = taosArrayInit(groupInfo.numOfTables, sizeof(STableId));
      for(int32_t i = 0; i < groupInfo.numOfTables; ++i) {
        STableIdInfo* tableId = taosArrayGet(pTableIdList, i);
        taosArrayPush(sa, tableId);
      }
      taosArrayPush(pTableGroup, &sa);
      groupInfo.pGroupList = pTableGroup;
5878
    }
H
hjxilinx 已提交
5879
  } else {
5880
    assert(0);
5881
  }
5882

weixin_48148422's avatar
weixin_48148422 已提交
5883
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &groupInfo, pTagColumnInfo);
5884 5885
  if ((*pQInfo) == NULL) {
    code = TSDB_CODE_SERV_OUT_OF_MEMORY;
H
hjxilinx 已提交
5886
    goto _over;
5887
  }
5888

5889
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
5890

H
hjxilinx 已提交
5891
_over:
weixin_48148422's avatar
weixin_48148422 已提交
5892 5893
  tfree(tagCond);
  tfree(tbnameCond);
H
hjxilinx 已提交
5894
  taosArrayDestroy(pTableIdList);
5895 5896

  if (code != TSDB_CODE_SUCCESS) {
5897
    //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null; 
5898 5899 5900
    *pQInfo = NULL;
  }

5901
  // if failed to add ref for all meters in this query, abort current query
5902
  return code;
H
hjxilinx 已提交
5903 5904
}

H
hjxilinx 已提交
5905
void qDestroyQueryInfo(qinfo_t pQInfo) {
S
slguan 已提交
5906
  qTrace("QInfo:%p query completed", pQInfo);
5907 5908 5909
  
  // print the query cost summary
  queryCostStatis(pQInfo);
5910 5911 5912
  freeQInfo(pQInfo);
}

H
hjxilinx 已提交
5913
void qTableQuery(qinfo_t qinfo) {
5914 5915
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5916
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
5917
    qTrace("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
5918 5919
    return;
  }
5920

H
hjxilinx 已提交
5921
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
5922
    qTrace("QInfo:%p it is already killed, abort", pQInfo);
H
hjxilinx 已提交
5923 5924
    return;
  }
5925

S
slguan 已提交
5926
  qTrace("QInfo:%p query task is launched", pQInfo);
5927

H
hjxilinx 已提交
5928
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
hjxilinx 已提交
5929
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
5930
  } else if (pQInfo->runtimeEnv.stableQuery) {
5931
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
5932
  } else {
5933
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
5934
  }
5935

H
hjxilinx 已提交
5936
  sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
5937 5938 5939
  //  vnodeDecRefCount(pQInfo);
}

H
hjxilinx 已提交
5940
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
5941 5942
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5943
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
H
hjxilinx 已提交
5944 5945
    return TSDB_CODE_INVALID_QHANDLE;
  }
5946

H
hjxilinx 已提交
5947
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5948
  if (isQueryKilled(pQInfo)) {
S
slguan 已提交
5949
    qTrace("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
5950
    return pQInfo->code;
H
hjxilinx 已提交
5951
  }
5952

H
hjxilinx 已提交
5953
  sem_wait(&pQInfo->dataReady);
S
slguan 已提交
5954
  qTrace("QInfo:%p retrieve result info, rowsize:%d, rows:%d, code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
5955 5956
         pQInfo->code);

H
hjxilinx 已提交
5957
  return pQInfo->code;
H
hjxilinx 已提交
5958
}
5959

H
hjxilinx 已提交
5960
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
5961 5962
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5963 5964 5965
  if (pQInfo == NULL || pQInfo->signature != pQInfo || pQInfo->code != TSDB_CODE_SUCCESS) {
    return false;
  }
5966 5967

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5968 5969 5970 5971 5972 5973 5974 5975
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
    return false;
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    return true;
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    return true;
  } else {
    assert(0);
5976 5977 5978
  }
}

5979 5980 5981
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
5982
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
5983 5984
    return TSDB_CODE_INVALID_QHANDLE;
  }
5985

5986
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
5987 5988
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
5989 5990
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
5991
  *contLen = size + sizeof(SRetrieveTableRsp);
5992

5993 5994
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
5995
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
5996

5997 5998 5999
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6000
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6001 6002 6003 6004
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6005 6006
  
  (*pRsp)->precision = htons(pQuery->precision);
6007
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6008
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6009
  } else {
H
hjxilinx 已提交
6010
    setQueryStatus(pQuery, QUERY_OVER);
6011
    code = pQInfo->code;
6012
  }
6013

H
hjxilinx 已提交
6014
  if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6015
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6016
  }
6017

H
hjxilinx 已提交
6018
  return code;
6019 6020 6021 6022 6023 6024

  //  if (numOfRows == 0 && (pRetrieve->qhandle == (uint64_t)pObj->qhandle) && (code != TSDB_CODE_ACTION_IN_PROGRESS)) {
  //    qTrace("QInfo:%p %s free qhandle code:%d", pObj->qhandle, __FUNCTION__, code);
  //    vnodeDecRefCount(pObj->qhandle);
  //    pObj->qhandle = NULL;
  //  }
6025
}
H
hjxilinx 已提交
6026 6027 6028 6029

static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6030

H
hjxilinx 已提交
6031
  size_t num = taosArrayGetSize(pQInfo->groupInfo.pGroupList);
6032 6033 6034 6035
  assert(num == 0 || num == 1);
  if (num == 0) {
    return;
  }
H
hjxilinx 已提交
6036 6037 6038
  
  SArray* pa = taosArrayGetP(pQInfo->groupInfo.pGroupList, 0);
  num = taosArrayGetSize(pa);
6039

6040 6041 6042 6043
  assert(num == pQInfo->groupInfo.numOfTables);
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6044
    
6045 6046
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
6047

6048
    for(int32_t i = 0; i < num; ++i) {
6049
      SGroupItem *item = taosArrayGet(pa, i);
6050

6051
      char *output = pQuery->sdata[0]->data + i * rsize;
6052
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6053

6054
      output = varDataVal(output);
6055
      *(int64_t *)output = item->id.uid;  // memory align problem, todo serialize
6056
      output += sizeof(item->id.uid);
6057

6058
      *(int32_t *)output = item->id.tid;
6059
      output += sizeof(item->id.tid);
6060

6061
      *(int32_t *)output = pQInfo->vgId;
6062
      output += sizeof(pQInfo->vgId);
6063

H
[td-90]  
Haojun Liao 已提交
6064 6065
      int16_t bytes = pExprInfo->bytes;
      int16_t type = pExprInfo->type;
6066 6067 6068 6069

      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
        char *data = tsdbGetTableName(pQInfo->tsdb, &item->id);
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6070
      } else {
6071 6072 6073 6074 6075 6076 6077 6078 6079
        char *val = tsdbGetTableTagVal(pQInfo->tsdb, &item->id, pExprInfo->base.colInfo.colId, type, bytes);

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6080
        } else {
6081 6082 6083 6084 6085
          if (val == NULL) {
            setNull(output, type, bytes);
          } else {
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6086 6087
        }
      }
6088
    }
6089 6090
  
    pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
H
hjxilinx 已提交
6091
    qTrace("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, num);
6092
    
6093 6094 6095 6096
  } else {  // return only the tags|table name etc.
    for(int32_t i = 0; i < num; ++i) {
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
      SGroupItem* item = taosArrayGet(pa, i);
6097

6098
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
H
hjxilinx 已提交
6099
        // todo check the return value, refactor codes
6100
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
H
[td-90]  
Haojun Liao 已提交
6101
          char* data = tsdbGetTableName(pQInfo->tsdb, &item->id);
H
hjxilinx 已提交
6102
          
6103
          char* dst = pQuery->sdata[j]->data + i * (TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE);
H
hjxilinx 已提交
6104
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6105 6106 6107 6108 6109
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
          char* data = tsdbGetTableTagVal(pQInfo->tsdb, &item->id, pExprInfo[j].base.colInfo.colId, type, bytes);
H
hjxilinx 已提交
6110
          
6111
          char* dst = pQuery->sdata[j]->data + i * pExprInfo[j].bytes;
H
hjxilinx 已提交
6112
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6113 6114 6115 6116 6117
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6118
          } else {
H
[td-90]  
Haojun Liao 已提交
6119 6120 6121 6122 6123
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6124
          }
6125
        }
H
hjxilinx 已提交
6126
      }
H
hjxilinx 已提交
6127
    }
6128
  
H
Haojun Liao 已提交
6129
    pQInfo->tableIndex = pQInfo->groupInfo.numOfTables;
H
hjxilinx 已提交
6130
    qTrace("QInfo:%p create tag values results completed, rows:%d", pQInfo, num);
H
hjxilinx 已提交
6131
  }
6132

H
hjxilinx 已提交
6133 6134
  pQuery->rec.rows = num;
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6135 6136
}