qExecutor.c 221.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
16 17
#include "tcache.h"
#include "tglobal.h"
H
Haojun Liao 已提交
18
#include "qfill.h"
19
#include "taosmsg.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
23 24
#include "qExecutor.h"
#include "qUtil.h"
25
#include "qresultBuf.h"
H
hjxilinx 已提交
26
#include "query.h"
S
slguan 已提交
27
#include "queryLog.h"
H
Haojun Liao 已提交
28
#include "qast.h"
29 30 31
#include "tlosertree.h"
#include "tscompression.h"
#include "ttime.h"
32 33 34 35 36

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
37 38
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f)    (((f)&TSDB_COL_TAG) != 0)
39 40
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

H
Haojun Liao 已提交
46
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50

H
Haojun Liao 已提交
51 52
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

53 54
/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
55 56
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
57

58
enum {
H
hjxilinx 已提交
59
  // when query starts to execute, this status will set
60 61
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
62 63
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
64
   */
65 66
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
67 68 69
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
70
   */
71
  QUERY_COMPLETED = 0x4u,
72

H
hjxilinx 已提交
73 74
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
75
   */
76
  QUERY_OVER = 0x8u,
77
};
78 79

enum {
80 81
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
82 83 84
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

85
typedef struct {
86 87 88 89 90 91
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
92 93
} SQueryStatusInfo;

H
Haojun Liao 已提交
94
#if 0
H
Haojun Liao 已提交
95
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
96 97 98 99
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
100
    return malloc(__size);
H
Haojun Liao 已提交
101
  }
H
Haojun Liao 已提交
102 103
}

H
Haojun Liao 已提交
104 105 106 107 108 109 110 111 112 113
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
114
#define malloc  u_malloc
H
Haojun Liao 已提交
115
#endif
H
Haojun Liao 已提交
116

117
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
118 119 120
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

121
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
122
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
123

H
Haojun Liao 已提交
124
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
125

H
Haojun Liao 已提交
126 127 128 129 130 131 132 133
// previous time window may not be of the same size of pQuery->intervalTime
#define GET_NEXT_TIMEWINDOW(_q, tw)                                   \
  do {                                                                \
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR((_q)->order.order); \
    (tw)->skey += ((_q)->slidingTime * factor);                       \
    (tw)->ekey = (tw)->skey + ((_q)->intervalTime - 1);               \
  } while (0)

H
hjxilinx 已提交
134
// todo move to utility
135
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
136

H
hjxilinx 已提交
137
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
138
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
139 140
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
141

142 143 144
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

145
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
146
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo);
147 148
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
149
static void buildTagQueryResult(SQInfo *pQInfo);
150

151
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
152
static int32_t flushFromResultBuf(SQInfo *pQInfo);
153

154
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
155 156
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
157

158 159
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
160 161
      return false;
    }
162

163 164
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
165
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
166

167 168 169 170 171
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
172

173 174 175 176
    if (!qualified) {
      return false;
    }
  }
177

178 179 180 181 182 183
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
184

185
  int64_t maxOutput = 0;
186
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
187
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
188

189 190 191 192 193 194 195 196
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
197

198 199 200 201 202
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
203

204
  assert(maxOutput >= 0);
205 206 207
  return maxOutput;
}

208 209 210 211 212 213 214 215 216
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
217 218 219 220 221 222 223
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
224 225 226 227
    pResInfo->numOfRes = numOfRes;
  }
}

228 229 230 231 232 233 234 235 236
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
237

238
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
239
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
240 241 242 243 244
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
245
        assert(pColIndex->colIndex > 0);
246
      }
247

248 249 250
      return true;
    }
  }
251

252 253 254 255 256
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
257

258 259
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
260

261
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
262
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
263 264 265 266 267
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
268

269
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
270 271
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
272 273 274
      break;
    }
  }
275

276 277 278 279 280 281
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
282

283
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
284
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
285 286 287 288
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
289

290 291 292 293
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
294

295 296 297
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
298

299 300 301
  return false;
}

302
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
303

304 305 306 307
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
308 309
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
310
    
311
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
312 313
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
314 315 316
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
317

318 319 320 321
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
322
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
323
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
324 325 326
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
327

328 329 330 331
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
332

333 334 335
  return false;
}

H
Haojun Liao 已提交
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

354 355 356 357 358 359 360 361
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
362
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
363
  if (pStatis != NULL && !TSDB_COL_IS_TAG(pColIndex->flag)) {
H
Haojun Liao 已提交
364 365
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
366 367
  } else {
    *pColStatis = NULL;
368
  }
369

H
Haojun Liao 已提交
370 371 372 373
  if (TSDB_COL_IS_TAG(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }

374 375 376
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
377

378 379 380 381
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
382
                                             int16_t bytes, bool masterscan) {
383
  SQuery *pQuery = pRuntimeEnv->pQuery;
384

385
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
386 387
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
388
  } else {
H
Haojun Liao 已提交
389 390 391
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
392

H
Haojun Liao 已提交
393 394 395 396 397 398 399 400 401 402 403
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 1.5;
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;

        int32_t inc = newCap - pWindowResInfo->capacity;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);
      } else {
        // todo
404
      }
405

H
Haojun Liao 已提交
406 407 408 409 410
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
      }

      pWindowResInfo->capacity = newCap;
411
    }
H
Haojun Liao 已提交
412 413 414 415

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
416
  }
417

418 419 420 421 422 423
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
424

425 426 427 428 429 430 431
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
432

433 434
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
435

436 437 438
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
439

440 441 442 443
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
444

445 446 447
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
448

449 450 451 452 453 454 455
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
456

457
  assert(ts >= w.skey && ts <= w.ekey);
458

459 460 461 462 463 464 465 466
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
467

468
  tFilePage *pData = NULL;
469

470 471 472
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
473

H
Haojun Liao 已提交
474
  if (taosArrayGetSize(list) == 0) {
475 476
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
477
    pageId = getLastPageId(list);
H
Haojun Liao 已提交
478
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
479

480
    if (pData->num >= numOfRowsPerPage) {
481 482
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
483
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
484 485 486
      }
    }
  }
487

488 489 490
  if (pData == NULL) {
    return -1;
  }
491

492 493 494
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
495
    pWindowRes->pos.rowId = pData->num++;
496
  }
497

498 499 500 501
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
502
                                       STimeWindow *win, bool masterscan, bool* newWind) {
503 504
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
505

506 507
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
508
  if (pWindowRes == NULL) {
509 510 511
    *newWind = false;

    return masterscan? -1:0;
512
  }
513

514
  *newWind = true;
H
Haojun Liao 已提交
515

516 517 518
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
519
    if (ret != TSDB_CODE_SUCCESS) {
520 521 522
      return -1;
    }
  }
523

524 525
  // set time window for current result
  pWindowRes->window = *win;
526

H
Haojun Liao 已提交
527
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
528 529 530 531 532 533 534 535
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
536
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
537 538
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
539

H
Haojun Liao 已提交
540 541 542 543
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
544

H
Haojun Liao 已提交
545 546 547 548 549 550 551 552 553 554 555 556
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
557 558
    }
  }
559

H
Haojun Liao 已提交
560
  assert(forwardStep > 0);
561 562 563 564 565 566
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
567
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
568
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
569
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
570
    return pWindowResInfo->size;
571
  }
572

573
  // no qualified results exist, abort check
574 575
  int32_t numOfClosed = 0;
  
576
  if (pWindowResInfo->size == 0) {
577
    return pWindowResInfo->size;
578
  }
579

580
  // query completed
H
hjxilinx 已提交
581 582
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
583
    closeAllTimeWindow(pWindowResInfo);
584

585 586 587 588
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
589
    int64_t skey = TSKEY_INITIAL_VAL;
590

591 592 593
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
594
        numOfClosed += 1;
595 596
        continue;
      }
597

598 599 600 601 602 603 604 605
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
606

607
    // all windows are closed, set the last one to be the skey
608
    if (skey == TSKEY_INITIAL_VAL) {
609 610 611 612 613
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
614

615
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
616

617 618
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
619
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
620 621
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
622
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
623
    } else {
624
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
625
             numOfClosed);
626 627
    }
  }
628 629 630 631 632 633 634
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
635
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
636
  return numOfClosed;
637 638 639
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
640
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
641
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
642

643 644 645
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
646

H
hjxilinx 已提交
647 648
  STableQueryInfo* item = pQuery->current;
  
649 650
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
651
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
652 653
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
654 655
      }
    } else {
656
      num = pDataBlockInfo->rows - startPos;
657
      if (updateLastKey) {
H
hjxilinx 已提交
658
        item->lastKey = pDataBlockInfo->window.ekey + step;
659 660 661 662
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
663
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
664 665
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
666 667 668 669
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
670
        item->lastKey = pDataBlockInfo->window.skey + step;
671 672 673
      }
    }
  }
674

H
Haojun Liao 已提交
675
  assert(num > 0);
676 677 678 679
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
680
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
681 682
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
683

684 685 686
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
687

688 689 690
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
691

692 693 694
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
695

696 697 698 699
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
700

701 702 703
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
704 705 706 707 708 709 710 711
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
712

713 714 715
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
716

717 718 719 720
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
721 722 723 724
    }
  }
}

H
Haojun Liao 已提交
725 726
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
727
  SQuery *pQuery = pRuntimeEnv->pQuery;
728

H
Haojun Liao 已提交
729
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
730

H
Haojun Liao 已提交
731
  // next time window is not in current block
H
Haojun Liao 已提交
732 733
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
734 735
    return -1;
  }
736

H
Haojun Liao 已提交
737 738
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
739
    startKey = pNext->skey;
H
Haojun Liao 已提交
740 741
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
742
    }
H
Haojun Liao 已提交
743
  } else {
H
Haojun Liao 已提交
744
    startKey = pNext->ekey;
H
Haojun Liao 已提交
745 746
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
747
    }
H
Haojun Liao 已提交
748
  }
749

H
Haojun Liao 已提交
750 751 752 753 754 755 756 757
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime && prevPosition != -1) {
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
758

H
Haojun Liao 已提交
759 760 761 762
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
763
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
764
    TSKEY next = primaryKeys[startPos];
765

H
Haojun Liao 已提交
766 767 768
    pNext->ekey += ((next - pNext->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNext->skey = pNext->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
769
    TSKEY next = primaryKeys[startPos];
770

H
Haojun Liao 已提交
771 772
    pNext->skey -= ((pNext->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNext->ekey = pNext->skey + pQuery->intervalTime - 1;
773
  }
774

H
Haojun Liao 已提交
775
  return startPos;
776 777
}

H
Haojun Liao 已提交
778
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
779 780 781 782 783 784 785 786 787 788 789 790
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
791

792 793 794
  return ekey;
}

H
hjxilinx 已提交
795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
810
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
811 812 813
  if (pDataBlock == NULL) {
    return NULL;
  }
814

H
Haojun Liao 已提交
815
  char *dataBlock = NULL;
H
Haojun Liao 已提交
816
  SQuery *pQuery = pRuntimeEnv->pQuery;
817
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
818

819
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
820
  if (functionId == TSDB_FUNC_ARITHM) {
821
    sas->pArithExpr = &pQuery->pSelectExpr[col];
822

823 824 825 826 827 828
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
829

830 831 832 833
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
834

H
Haojun Liao 已提交
835
    if (sas->data == NULL) {
H
Haojun Liao 已提交
836
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
837 838 839
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

840
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
841
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
842
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
843
      SColumnInfo *pColMsg = &pQuery->colList[i];
844

845 846 847 848 849 850 851 852
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
853

854
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
855
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
856
    }
857

858
  } else {  // other type of query function
859
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
860
    if (TSDB_COL_IS_TAG(pCol->flag)) {
861 862
      dataBlock = NULL;
    } else {
H
Haojun Liao 已提交
863 864 865 866 867
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
868 869
    }
  }
870

871 872 873 874
  return dataBlock;
}

/**
H
Haojun Liao 已提交
875
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
876 877
 * @param pRuntimeEnv
 * @param forwardStep
878
 * @param tsCols
879 880 881 882 883
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
884
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
885 886
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
887
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
888 889
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

890 891
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
892
  if (pDataBlock != NULL) {
893
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
894
    tsCols = (TSKEY *)(pColInfo->pData);
895
  }
896

897
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
898
  if (sasArray == NULL) {
H
Haojun Liao 已提交
899
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
900 901
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
902

903
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
904
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
905
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
906
  }
907

908
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
909 910
  if (QUERY_IS_INTERVAL_QUERY(pQuery)/* && tsCols != NULL*/) {
    TSKEY ts = TSKEY_INITIAL_VAL;
911

H
Haojun Liao 已提交
912 913 914 915 916 917 918 919
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
920
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
921 922
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
923
      tfree(sasArray);
H
hjxilinx 已提交
924
      return;
925
    }
926

H
Haojun Liao 已提交
927 928 929
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

930
    if (hasTimeWindow) {
H
Haojun Liao 已提交
931
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
932
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
933

934
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
H
Haojun Liao 已提交
935
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
936
    }
937

938 939
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
940

941
    while (1) {
H
Haojun Liao 已提交
942 943
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
944 945 946
      if (startPos < 0) {
        break;
      }
947

948
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
949
      hasTimeWindow = false;
H
Haojun Liao 已提交
950 951
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
952 953
        break;
      }
954

955 956 957 958 959
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
960
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
961

H
Haojun Liao 已提交
962
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
963
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
964
    }
965

966 967 968 969 970 971 972
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
973
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
974
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
975 976 977 978 979
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
980

981 982 983 984
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
985

986 987
    tfree(sasArray[i].data);
  }
988

989 990 991 992 993 994 995
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
996

997
  int32_t GROUPRESULTID = 1;
998

999
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1000

1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
1012
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1013 1014 1015
  if (pWindowRes == NULL) {
    return -1;
  }
1016

1017 1018 1019
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

1020 1021 1022 1023 1024 1025
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1026

1027 1028 1029 1030 1031
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1032
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1033
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1034

1035
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1036 1037
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1038 1039
      continue;
    }
1040

1041
    int16_t colIndex = -1;
1042
    int32_t colId = pColIndex->colId;
1043

1044
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1045
      if (pQuery->colList[i].colId == colId) {
1046 1047 1048 1049
        colIndex = i;
        break;
      }
    }
1050

1051
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1052

1053 1054
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1055 1056 1057 1058 1059 1060
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
1061

1062 1063 1064 1065 1066 1067
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1068
  }
1069

1070
  return NULL;
1071 1072 1073 1074
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1075

1076 1077
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1078

1079 1080 1081 1082
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1083

1084 1085 1086
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1087 1088
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1089 1090
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1091

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1105

1106 1107 1108 1109 1110
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1111
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1112 1113 1114 1115 1116

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1117
  
1118 1119 1120
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1121

1122
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1123 1124
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1125 1126 1127 1128 1129 1130

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1131
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1132
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1133 1134
    return false;
  }
1135

1136 1137 1138
  return true;
}

1139 1140
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1141
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1142
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1143

1144
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1145
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1146 1147 1148 1149

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1150 1151
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1152
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1153
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1154
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1155 1156
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1157

1158 1159
  int16_t type = 0;
  int16_t bytes = 0;
1160

1161
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1162
  if (groupbyColumnValue) {
1163
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1164
  }
1165

1166
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1167
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1168
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1169
  }
1170

1171 1172
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1173
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1174 1175
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1176
  }
1177

1178
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1179

1180 1181 1182
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1183
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1184
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1185 1186
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1187

1188
  int32_t j = 0;
H
hjxilinx 已提交
1189
  int32_t offset = -1;
1190

1191
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1192
    offset = GET_COL_DATA_POS(pQuery, j, step);
1193

1194 1195 1196 1197 1198 1199 1200 1201 1202 1203
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1204

1205
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1206 1207
      continue;
    }
1208

1209
    // interval window query
H
Haojun Liao 已提交
1210
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1211
      // decide the time window according to the primary timestamp
1212
      int64_t     ts = tsCols[offset];
1213
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1214

1215 1216
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1217 1218 1219
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1220

1221 1222 1223 1224
      if (!hasTimeWindow) {
        continue;
      }

1225 1226
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1227

1228 1229
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1230

1231
      while (1) {
H
Haojun Liao 已提交
1232
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
H
Haojun Liao 已提交
1233
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1234
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1235
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1236 1237
          break;
        }
1238

1239 1240 1241
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1242

1243
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1244
        hasTimeWindow = false;
1245
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1246 1247
          break;
        }
1248

1249 1250 1251 1252
        if (hasTimeWindow) {
          pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
        }
1253
      }
1254

1255 1256 1257
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1258
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1259
        char *val = groupbyColumnData + bytes * offset;
1260

H
hjxilinx 已提交
1261
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1262 1263 1264 1265
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1266

1267
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1268
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1269 1270 1271 1272 1273
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1274

1275 1276 1277
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1278
        setQueryStatus(pQuery, QUERY_COMPLETED);
1279 1280 1281 1282
        break;
      }
    }
  }
H
Haojun Liao 已提交
1283 1284 1285 1286 1287 1288 1289 1290

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1291 1292 1293 1294 1295
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1296

1297 1298
    tfree(sasArray[i].data);
  }
1299

1300 1301 1302 1303
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1304
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1305
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1306 1307 1308
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1309
  
H
Haojun Liao 已提交
1310
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1311
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1312
  } else {
1313
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1314
  }
1315

1316
  // update the lastkey of current table
1317
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1318
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1319

1320
  // interval query with limit applied
1321
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1322
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1323 1324 1325
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1326

1327 1328 1329 1330
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1331

1332 1333 1334
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1335

1336 1337 1338
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1339
    }
1340
  }
1341

1342
  return numOfRes;
1343 1344
}

H
Haojun Liao 已提交
1345
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1346 1347 1348 1349 1350 1351
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1352
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1353
  pCtx->aInputElemBuf = inputData;
1354

1355
  if (tpField != NULL) {
H
Haojun Liao 已提交
1356
    pCtx->preAggVals.isSet  = true;
1357 1358
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1359 1360 1361
  } else {
    pCtx->preAggVals.isSet = false;
  }
1362

H
Haojun Liao 已提交
1363 1364
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1365 1366 1367
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1368

1369 1370
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1371
    pCtx->ptsList = tsCol;
1372
  }
1373

1374 1375 1376 1377 1378
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1379
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1380
    /*
H
Haojun Liao 已提交
1381
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1392

1393 1394
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1395 1396 1397 1398 1399 1400
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1414
  }
1415

1416 1417 1418 1419 1420 1421
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1422
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1423 1424 1425
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1426
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1427 1428 1429 1430 1431 1432
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1433 1434 1435
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1436
  if (isSelectivityWithTagsQuery(pQuery)) {
1437
    int32_t num = 0;
1438
    int16_t tagLen = 0;
1439 1440
    
    SQLFunctionCtx *p = NULL;
1441
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1442

1443
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1444
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1445
      
1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1459 1460 1461 1462 1463 1464 1465
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
      tfree(pTagCtx); 
    }
1466 1467 1468
  }
}

H
Haojun Liao 已提交
1469 1470
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1471
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1472 1473 1474 1475
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1476 1477 1478
  }
}

1479
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1480
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1481 1482
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1483 1484 1485
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1486
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1487

1488
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1489
    goto _clean;
1490
  }
1491

1492
  pRuntimeEnv->offset[0] = 0;
1493
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1494
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1495

1496
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1497
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1498

1499 1500
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1501
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1502 1503 1504 1505
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1506 1507 1508 1509
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1510
      
1511 1512 1513 1514
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1515
  
1516
    assert(isValidDataType(pCtx->inputType));
1517
    pCtx->ptsOutputBuf = NULL;
1518

1519 1520
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1521

1522 1523
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1524

1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1535

1536 1537
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1538

1539
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1540
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1541
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1542

1543 1544 1545 1546
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1547

1548 1549
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1550

1551 1552 1553 1554
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1555

H
Haojun Liao 已提交
1556
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1557

1558
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1559
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1560

1561
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1562
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1563 1564
    resetCtxOutputBuf(pRuntimeEnv);
  }
1565

H
Haojun Liao 已提交
1566
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1567
  return TSDB_CODE_SUCCESS;
1568

1569
_clean:
1570 1571
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1572

1573
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1574 1575 1576 1577 1578 1579
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1580

1581
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1582
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1583

1584
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1585
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1586

1587
  if (pRuntimeEnv->pCtx != NULL) {
1588
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1589
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1590

1591 1592 1593
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1594

1595 1596 1597
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
    }
1598

1599 1600 1601
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1602

H
Haojun Liao 已提交
1603
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1604

H
hjxilinx 已提交
1605
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1606
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1607
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1608

H
Haojun Liao 已提交
1609
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1610 1611
}

H
Haojun Liao 已提交
1612
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1613

H
Haojun Liao 已提交
1614
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1615

H
Haojun Liao 已提交
1616 1617 1618
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1619 1620
    return false;
  }
1621

1622
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1623
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1624 1625
    return true;
  }
1626

1627
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1628
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1629

1630 1631
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1632
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1633 1634
      continue;
    }
1635

1636 1637 1638
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1639

1640 1641 1642 1643
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1644

1645 1646 1647
  return false;
}

1648
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1649
static bool isPointInterpoQuery(SQuery *pQuery) {
1650
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1651
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1652
    if (functionID == TSDB_FUNC_INTERP) {
1653 1654 1655
      return true;
    }
  }
1656

1657 1658 1659 1660
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1661
static bool isSumAvgRateQuery(SQuery *pQuery) {
1662
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1663
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1664 1665 1666
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1667

1668 1669 1670 1671 1672
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1673

1674 1675 1676
  return false;
}

H
hjxilinx 已提交
1677
static bool isFirstLastRowQuery(SQuery *pQuery) {
1678
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1679
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1680 1681 1682 1683
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1684

1685 1686 1687
  return false;
}

H
hjxilinx 已提交
1688
static bool needReverseScan(SQuery *pQuery) {
1689
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1690
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1691 1692 1693
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1694

1695
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1696 1697
      return true;
    }
1698 1699 1700 1701 1702

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1703
  }
1704

1705 1706
  return false;
}
H
hjxilinx 已提交
1707 1708 1709

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1710 1711 1712 1713 1714
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1715 1716 1717
      return false;
    }
  }
1718

H
hjxilinx 已提交
1719 1720 1721
  return true;
}

1722 1723
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1724
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1725
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1726
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1727 1728 1729 1730 1731

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1732 1733 1734 1735
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1736 1737
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1738 1739 1740 1741 1742
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1743
    pQuery->checkBuffer = 0;
1744
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1745
    pQuery->checkBuffer = 0;
1746 1747
  } else {
    bool hasMultioutput = false;
1748
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1749
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1750 1751 1752
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1753

1754 1755 1756 1757 1758
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1759

1760
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1761 1762 1763 1764 1765 1766
  }
}

/*
 * todo add more parameters to check soon..
 */
1767
bool colIdCheck(SQuery *pQuery) {
1768 1769
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1770
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1771
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1772 1773 1774
      return false;
    }
  }
1775
  
1776 1777 1778 1779 1780 1781
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1782
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1783
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1784

1785 1786 1787 1788
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1789

1790 1791 1792 1793
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1794

1795 1796 1797 1798 1799 1800 1801
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
    }
  }
}

H
Haojun Liao 已提交
1816 1817 1818
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1819 1820 1821
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1822

1823 1824 1825
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1826
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1827
           pQuery->order.order, TSDB_ORDER_DESC);
1828

1829
    pQuery->order.order = TSDB_ORDER_DESC;
1830

1831 1832
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1833

1834 1835
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1836

1837 1838
    return;
  }
1839

1840 1841
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1842
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1843
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1844 1845
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1846

1847
    pQuery->order.order = TSDB_ORDER_ASC;
1848 1849
    return;
  }
1850

1851 1852 1853
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1854
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1855 1856
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1857
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1858
        doExchangeTimeWindow(pQInfo);
1859
      }
1860

1861
      pQuery->order.order = TSDB_ORDER_ASC;
1862 1863
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1864
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1865 1866
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1867
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1868
        doExchangeTimeWindow(pQInfo);
1869
      }
1870

1871
      pQuery->order.order = TSDB_ORDER_DESC;
1872
    }
1873

1874
  } else {  // interval query
1875
    if (stableQuery) {
1876 1877
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1878
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1879 1880
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1881 1882
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1883

1884
        pQuery->order.order = TSDB_ORDER_ASC;
1885 1886
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1887
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1888 1889
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1890 1891
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1892

1893
        pQuery->order.order = TSDB_ORDER_DESC;
1894 1895 1896 1897 1898 1899 1900 1901
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1902

1903
  int32_t num = 0;
1904

1905 1906
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1907
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1908
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1909
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1910 1911
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1912
  }
1913

1914 1915 1916 1917
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1918 1919
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1920
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1921 1922
}

H
Haojun Liao 已提交
1923
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
1924

H
Haojun Liao 已提交
1925 1926 1927 1928
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
1929 1930 1931 1932 1933
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1934

H
Haojun Liao 已提交
1935 1936 1937 1938 1939 1940 1941 1942
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

H
Haojun Liao 已提交
1943
    // no statistics data
H
Haojun Liao 已提交
1944
    if (index == -1) {
H
Haojun Liao 已提交
1945
      return true;
1946
    }
1947

1948
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
1949
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
1950
      return true;
1951
    }
1952

1953
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
1954
    if (pDataStatis[index].numOfNull == numOfRows) {
1955 1956
      continue;
    }
1957

H
Haojun Liao 已提交
1958 1959 1960 1961 1962
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataBlockst->min);
      float maxval = *(double *)(&pDataBlockst->max);
1963

1964 1965 1966 1967 1968 1969 1970
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
1971
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
1972 1973 1974 1975 1976
          return true;
        }
      }
    }
  }
1977

H
Haojun Liao 已提交
1978 1979 1980 1981 1982 1983 1984 1985
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
1986

H
Haojun Liao 已提交
1987
  return false;
1988 1989
}

H
Haojun Liao 已提交
1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036
#define PT_IN_WINDOW(_p, _w)  ((_p) > (_w).skey && (_p) < (_w).ekey)

static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);


  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);

    if (PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.skey > pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
    if (PT_IN_WINDOW(w.skey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2037
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock) {
2038
  SQuery *pQuery = pRuntimeEnv->pQuery;
2039

H
Haojun Liao 已提交
2040
  uint32_t status = 0;
H
Haojun Liao 已提交
2041
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
H
Haojun Liao 已提交
2042 2043
    status = BLK_DATA_ALL_NEEDED;
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2044

H
Haojun Liao 已提交
2045 2046 2047 2048 2049
    // Calculate all time windows that are overlapping or contain current data block.
    // If current data block is contained by all possible time window, loading current
    // data block is not needed.
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
      status = BLK_DATA_ALL_NEEDED;
2050
    }
2051

H
Haojun Liao 已提交
2052 2053 2054 2055 2056 2057 2058 2059
    if (status != BLK_DATA_ALL_NEEDED) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;

        status |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
H
Haojun Liao 已提交
2060
        if ((status & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2061 2062 2063
          break;
        }
      }
2064 2065
    }
  }
2066

H
Haojun Liao 已提交
2067 2068 2069
  if (status == BLK_DATA_NO_NEEDED) {
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2070
    pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2071
  } else if (status == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2072
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2073
      //        return DISK_DATA_LOAD_FAILED;
2074
    }
2075 2076 2077 2078
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2079
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2080
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2081 2082
    }
  } else {
H
Haojun Liao 已提交
2083
    assert(status == BLK_DATA_ALL_NEEDED);
2084 2085
  
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2086
    pRuntimeEnv->summary.loadBlockStatis += 1;
H
hjxilinx 已提交
2087
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2088
    }
2089
    
H
Haojun Liao 已提交
2090
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2091
#if defined(_DEBUG_VIEW)
2092
      qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2093
#endif
2094 2095
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2096 2097 2098
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
      return BLK_DATA_DISCARD;
2099
    }
2100
  
2101
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2102
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2103
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2104
  }
2105

H
Haojun Liao 已提交
2106
  return TSDB_CODE_SUCCESS;
2107 2108
}

H
hjxilinx 已提交
2109
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2110
  int32_t midPos = -1;
H
Haojun Liao 已提交
2111
  int32_t numOfRows;
2112

2113 2114 2115
  if (num <= 0) {
    return -1;
  }
2116

2117
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2118 2119

  TSKEY * keyList = (TSKEY *)pValue;
2120
  int32_t firstPos = 0;
2121
  int32_t lastPos = num - 1;
2122

2123
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2124 2125 2126 2127 2128
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2129

H
Haojun Liao 已提交
2130 2131
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2132

H
hjxilinx 已提交
2133 2134 2135 2136 2137 2138 2139 2140
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2141

H
hjxilinx 已提交
2142 2143 2144 2145 2146
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2147

H
hjxilinx 已提交
2148 2149 2150 2151 2152 2153 2154
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2155

H
Haojun Liao 已提交
2156 2157
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2158

H
hjxilinx 已提交
2159 2160 2161 2162 2163 2164 2165 2166 2167
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2168

H
hjxilinx 已提交
2169 2170 2171
  return midPos;
}

2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2194
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2195 2196 2197 2198 2199
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2200 2201 2202
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2203
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv)) {
2204 2205 2206 2207 2208 2209 2210 2211
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2212 2213
        assert(bytes > 0 && newSize > 0);

2214 2215 2216 2217
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
H
Hongze Cheng 已提交
2218
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (newSize - pRec->rows) * bytes);
2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2231
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2232 2233 2234 2235 2236 2237 2238
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2239 2240
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2241
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2242
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2243

2244
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2245 2246
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2247

2248
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2249
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2250

H
Haojun Liao 已提交
2251
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
B
Bomin Zhang 已提交
2252 2253 2254 2255 2256 2257 2258
  while (true) {
    if (!tsdbNextDataBlock(pQueryHandle)) {
      if (terrno != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, terrno);
      }
      break;
    }
H
Haojun Liao 已提交
2259
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2260

H
Haojun Liao 已提交
2261
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2262
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2263
    }
2264

H
Haojun Liao 已提交
2265
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2266

2267
    // todo extract methods
H
Haojun Liao 已提交
2268
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2269
      STimeWindow w = TSWINDOW_INITIALIZER;
2270 2271
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2272
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2273
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
2274 2275 2276 2277
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2278
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
2279

H
hjxilinx 已提交
2280
        pWindowResInfo->startTime = pQuery->window.skey;
2281 2282 2283
        pWindowResInfo->prevSKey = w.skey;
      }
    }
2284

H
hjxilinx 已提交
2285
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2286
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2287

2288
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2289 2290 2291 2292 2293
    SArray *pDataBlock   = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }
2294

H
Haojun Liao 已提交
2295 2296
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2297
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2298

H
Haojun Liao 已提交
2299
    summary->totalRows += blockInfo.rows;
2300
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2301
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2302

2303 2304
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2305
      break;
2306 2307
    }
  }
2308

H
hjxilinx 已提交
2309
  // if the result buffer is not full, set the query complete
2310 2311 2312
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2313

H
Haojun Liao 已提交
2314
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2315
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2316
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
2317
//      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2318
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2319 2320 2321 2322
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2323

2324
  return 0;
2325 2326 2327 2328 2329 2330
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2331
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2332
  tVariantDestroy(tag);
2333

2334
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2335
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2336 2337 2338
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2339
  } else {
2340
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2341 2342 2343 2344
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2345 2346
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2347
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2348 2349 2350 2351
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2352
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2353
    } else {
H
Haojun Liao 已提交
2354 2355 2356 2357 2358
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2359
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2360
    }
2361
  }
2362 2363
}

2364
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2365
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2366
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2367

H
[td-90]  
Haojun Liao 已提交
2368 2369 2370
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2371 2372 2373 2374 2375 2376 2377 2378 2379 2380

    // todo refactor extract function.
    int16_t type = -1, bytes = -1;
    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
        type = pQuery->tagColList[i].type;
        bytes = pQuery->tagColList[i].bytes;
      }
    }

2381
    doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2382 2383
  } else {
    // set tag value, by which the results are aggregated.
2384
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2385
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2386
  
2387
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2388
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2389 2390
        continue;
      }
2391

2392
      // todo use tag column index to optimize performance
2393
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2394
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2395
    }
2396

2397
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2398
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2399
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2400 2401
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2402 2403 2404 2405 2406 2407 2408 2409 2410 2411

      // todo refactor
      int16_t type = -1, bytes = -1;
      for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
        if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
          type = pQuery->tagColList[i].type;
          bytes = pQuery->tagColList[i].bytes;
        }
      }

2412
      doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2413
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2414
          pRuntimeEnv->pCtx[0].tag.i64Key)
2415 2416 2417 2418 2419 2420 2421
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2422

2423
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2424
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2425 2426 2427
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2428

2429 2430 2431
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2432

2433 2434 2435
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2436

2437 2438 2439
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2440 2441 2442 2443 2444 2445 2446 2447
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2448 2449
    }
  }
2450

2451
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2452
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2453 2454 2455
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2456

2457 2458 2459 2460
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2461
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2530
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2531
  SQuery* pQuery = pRuntimeEnv->pQuery;
2532
  int32_t numOfCols = pQuery->numOfOutput;
2533
  printf("super table query intermediate result, total:%d\n", numOfRows);
2534

2535 2536
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2537
      
2538
      switch (pQuery->pSelectExpr[i].type) {
2539
        case TSDB_DATA_TYPE_BINARY: {
2540
          int32_t type = pQuery->pSelectExpr[i].type;
2541
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2542 2543 2544 2545 2546
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2547
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2548 2549
          break;
        case TSDB_DATA_TYPE_INT:
2550
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2551 2552
          break;
        case TSDB_DATA_TYPE_FLOAT:
2553
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2554 2555
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2556
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2557 2558 2559 2560 2561 2562 2563 2564
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2565 2566 2567
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2568 2569 2570 2571 2572
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2573

2574 2575
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2576

2577 2578
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2579

2580 2581 2582 2583
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2584

2585 2586 2587 2588
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2589

H
hjxilinx 已提交
2590
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2591
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2592

2593 2594
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2595

H
hjxilinx 已提交
2596
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2597
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2598

2599 2600
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2601

2602 2603 2604
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2605

2606 2607 2608
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2609
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2610
  int64_t st = taosGetTimestampMs();
2611
  int32_t ret = TSDB_CODE_SUCCESS;
2612

H
Haojun Liao 已提交
2613
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
2614

2615
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2616
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2617
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2618 2619 2620 2621
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2622
    pQInfo->groupIndex += 1;
2623 2624

    // this group generates at least one result, return results
2625 2626 2627
    if (ret > 0) {
      break;
    }
2628 2629

    assert(pQInfo->numOfGroupResultPages == 0);
2630
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2631
  }
2632

2633
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2634
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2635

2636 2637 2638 2639 2640 2641
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2642

2643
    // current results of group has been sent to client, try next group
2644
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2645 2646
      return;  // failed to save data in the disk
    }
2647

2648
    // check if all results has been sent to client
H
Haojun Liao 已提交
2649
    int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
2650
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2651
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2652 2653
      return;
    }
2654
  }
2655 2656

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2657
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2658

2659
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2660
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2661

2662
  int32_t total = 0;
H
Haojun Liao 已提交
2663 2664 2665 2666
  int32_t size = taosArrayGetSize(list);
  for (int32_t i = 0; i < size; ++i) {
    int32_t* pgId = taosArrayGet(list, i);
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, *pgId);
2667
    total += pData->num;
2668
  }
2669

2670
  int32_t rows = total;
2671

2672
  int32_t offset = 0;
H
Haojun Liao 已提交
2673 2674 2675
  for (int32_t j = 0; j < size; ++j) {
    int32_t* pgId = taosArrayGet(list, j);
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, *pgId);
2676

2677
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2678
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2679
      char *  pDest = pQuery->sdata[i]->data;
2680

2681 2682
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2683
    }
2684

2685
    offset += pData->num;
2686
  }
2687

2688
  assert(pQuery->rec.rows == 0);
2689

2690
  pQuery->rec.rows += rows;
2691 2692 2693
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2694
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2695
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2696
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2697

2698 2699 2700 2701 2702 2703 2704
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2705

2706
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2707
    assert(pResultInfo != NULL);
2708

H
Haojun Liao 已提交
2709 2710
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2711 2712
    }
  }
2713

H
Haojun Liao 已提交
2714
  return 0;
2715 2716
}

2717
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2718
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2719
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2720

2721
  size_t size = taosArrayGetSize(pGroup);
2722
  tFilePage **buffer = pQuery->sdata;
2723

2724
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2725
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2726

2727 2728 2729 2730 2731
  if (pTableList == NULL || posList == NULL) {
    tfree(posList);
    tfree(pTableList);

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2732
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2733 2734
  }

2735
  // todo opt for the case of one table per group
2736
  int32_t numOfTables = 0;
2737
  for (int32_t i = 0; i < size; ++i) {
2738
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2739

H
Haojun Liao 已提交
2740
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2741
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
2742
      pTableList[numOfTables] = item;
2743
      numOfTables += 1;
2744 2745
    }
  }
2746

2747
  if (numOfTables == 0) {
2748 2749
    tfree(posList);
    tfree(pTableList);
2750

2751 2752
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2753
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2754

2755
  }
2756

2757
  SCompSupporter cs = {pTableList, posList, pQInfo};
2758

2759
  SLoserTreeInfo *pTree = NULL;
2760
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2761

2762
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2763 2764 2765 2766
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2767 2768
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
2769
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2770

2771 2772
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2773

2774 2775
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2776

H
hjxilinx 已提交
2777
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2778
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2779

2780 2781
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2782

2783
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2784
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2785 2786
    if (num <= 0) {
      cs.position[pos] += 1;
2787

2788 2789
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2790

2791
        // all input sources are exhausted
2792
        if (--numOfTables == 0) {
2793 2794 2795 2796 2797 2798 2799
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2800
        if (buffer[0]->num == pQuery->rec.capacity) {
2801 2802 2803
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2804

2805 2806
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2807

2808
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2809
        buffer[0]->num += 1;
2810
      }
2811

2812
      lastTimestamp = ts;
2813

2814 2815 2816
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2817

2818
        // all input sources are exhausted
2819
        if (--numOfTables == 0) {
2820 2821 2822 2823
          break;
        }
      }
    }
2824

2825 2826
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2827

2828
  if (buffer[0]->num != 0) {  // there are data in buffer
2829
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2830
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2831

2832 2833 2834 2835
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2836

2837 2838 2839
      return -1;
    }
  }
2840

2841 2842 2843
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2844
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2845
#endif
2846

2847
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2848

2849 2850
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2851
  tfree(pTree);
2852

2853
  pQInfo->offset = 0;
2854

2855
  tfree(pResultInfo);
H
Haojun Liao 已提交
2856
  tfree(buf);
2857 2858 2859 2860
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2861 2862 2863
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2864
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2865
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2866

2867 2868
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2869

2870
  int32_t remain = pQuery->sdata[0]->num;
2871
  int32_t offset = 0;
2872

2873 2874 2875 2876 2877
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2878

2879
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2880
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2881

2882
    // pagewise copy to dest buffer
2883
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2884
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2885
      buf->num = r;
2886

2887 2888
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2889
    }
2890

2891 2892 2893
    offset += r;
    remain -= r;
  }
2894

2895 2896 2897 2898 2899
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2900
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2901
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2902 2903 2904
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2905

2906
    pQuery->sdata[k]->num = 0;
2907 2908 2909
  }
}

2910 2911 2912 2913 2914 2915 2916
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2917 2918 2919 2920 2921 2922 2923
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2924 2925 2926 2927 2928 2929 2930 2931
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
2932 2933 2934

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
2935 2936 2937 2938 2939
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2940 2941 2942 2943 2944
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2945

2946
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2947

2948
    // open/close the specified query for each group result
2949
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2950
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2951

2952 2953
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2954 2955 2956 2957 2958 2959 2960 2961
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2962 2963
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2964
  SQuery *pQuery = pRuntimeEnv->pQuery;
2965
  int32_t order = pQuery->order.order;
2966

2967 2968
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
2969
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
2970
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2971
  } else {  // for simple result of table query,
2972
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2973
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2974

2975
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2976 2977 2978
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2979

2980 2981
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2982 2983 2984 2985 2986 2987
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2988
  
H
Haojun Liao 已提交
2989
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
hjxilinx 已提交
2990 2991
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
2992
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
hjxilinx 已提交
2993 2994 2995
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
2996 2997
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
hjxilinx 已提交
2998 2999
    }
  }
3000 3001
}

3002
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3003
  SQuery *pQuery = pRuntimeEnv->pQuery;
3004
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3005
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3006 3007 3008
  }
}

H
Haojun Liao 已提交
3009
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3010
  int32_t numOfCols = pQuery->numOfOutput;
3011

H
Haojun Liao 已提交
3012 3013
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3014 3015 3016
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3017

H
Haojun Liao 已提交
3018
  pResultRow->pos = (SPosInfo) {-1, -1};
3019

H
Haojun Liao 已提交
3020
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3021

3022
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3023
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3024
  return TSDB_CODE_SUCCESS;
3025 3026 3027 3028
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3029

3030
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3031 3032
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3033

3034 3035 3036 3037 3038 3039
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3040

3041
    // set the timestamp output buffer for top/bottom/diff query
3042
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3043 3044 3045
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3046

3047
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
3048
  }
3049

3050 3051 3052 3053 3054
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3055

3056
  // reset the execution contexts
3057
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3058
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3059
    assert(functionId != TSDB_FUNC_DIFF);
3060

3061 3062 3063 3064
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3065

3066 3067 3068 3069 3070 3071 3072 3073 3074 3075
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
3076

3077 3078 3079 3080 3081 3082
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3083

3084
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3085
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3086
    pRuntimeEnv->pCtx[j].currentStage = 0;
3087

H
Haojun Liao 已提交
3088 3089 3090 3091
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3092

3093 3094 3095 3096
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3097
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3098
  SQuery *pQuery = pRuntimeEnv->pQuery;
3099
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3100 3101
    return;
  }
3102

3103
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3104
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3105 3106
        pQuery->limit.offset - pQuery->rec.rows);
    
3107 3108
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3109

3110
    resetCtxOutputBuf(pRuntimeEnv);
3111

H
Haojun Liao 已提交
3112
    // clear the buffer full flag if exists
3113
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3114
  } else {
3115
    int64_t numOfSkip = pQuery->limit.offset;
3116
    pQuery->rec.rows -= numOfSkip;
3117 3118
    pQuery->limit.offset = 0;
  
3119
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3120 3121
           0, pQuery->rec.rows);
    
3122
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3123
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3124
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3125
      
H
Haojun Liao 已提交
3126 3127
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3128

3129
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3130
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3131 3132
      }
    }
3133

3134
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
3135 3136 3137 3138 3139 3140 3141 3142
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3143
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3144 3145 3146 3147 3148 3149
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3150

H
hjxilinx 已提交
3151
  bool toContinue = false;
H
Haojun Liao 已提交
3152
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3153 3154
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3155

3156 3157 3158 3159 3160
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3161

3162
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3163

3164
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3165
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3166 3167 3168
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3169

3170 3171
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3172

3173 3174 3175 3176
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3177
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3178
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3179 3180 3181
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3182

3183 3184
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3185

3186 3187 3188
      toContinue |= (!pResInfo->complete);
    }
  }
3189

3190 3191 3192
  return toContinue;
}

H
Haojun Liao 已提交
3193
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3194
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3195 3196
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3197 3198 3199
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3200
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3201
      .status      = pQuery->status,
3202
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3203
      .lastKey     = start,
H
hjxilinx 已提交
3204
      .w           = pQuery->window,
H
Haojun Liao 已提交
3205
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3206
  };
3207

3208 3209 3210
  return info;
}

3211 3212 3213 3214
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3215 3216 3217 3218 3219
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3220

3221
  // reverse order time range
3222 3223 3224
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3225
  SWITCH_ORDER(pQuery->order.order);
3226
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3227

3228
  STsdbQueryCond cond = {
3229
      .twindow = pQuery->window,
H
hjxilinx 已提交
3230
      .order   = pQuery->order.order,
3231
      .colList = pQuery->colList,
3232 3233
      .numOfCols = pQuery->numOfCols,
  };
3234

3235 3236 3237 3238
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3239

3240 3241
  // add ref for table
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3242 3243 3244
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3245

3246 3247
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3248
  disableFuncInReverseScan(pQInfo);
3249 3250
}

3251 3252
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3253
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3254

3255 3256
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3257

3258 3259 3260 3261
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3262

3263
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3264

3265 3266
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3267
  pTableQueryInfo->lastKey = pStatus->lastKey;
3268
  pQuery->status = pStatus->status;
3269
  
H
hjxilinx 已提交
3270
  pTableQueryInfo->win = pStatus->w;
3271
  pQuery->window = pTableQueryInfo->win;
3272 3273
}

3274
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3275
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3276
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3277 3278
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3279
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3280

3281
  // store the start query position
H
Haojun Liao 已提交
3282
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3283

3284 3285
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3286

3287 3288
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3289

3290 3291
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3292
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3293
      qstatus.lastKey = pTableQueryInfo->lastKey;
3294
    }
3295

3296
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3297
      // restore the status code and jump out of loop
3298
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3299
        pQuery->status = qstatus.status;
3300
      }
3301

3302 3303
      break;
    }
3304

3305
    STsdbQueryCond cond = {
3306
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3307
        .order   = pQuery->order.order,
3308
        .colList = pQuery->colList,
3309
        .numOfCols = pQuery->numOfCols,
3310
    };
3311

3312 3313
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3314
    }
3315

3316
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3317 3318 3319
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3320
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3321

3322 3323
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3324
    
3325
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3326
        cond.twindow.skey, cond.twindow.ekey);
3327

3328
    // check if query is killed or not
H
Haojun Liao 已提交
3329
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3330 3331
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3332 3333
    }
  }
3334

H
hjxilinx 已提交
3335
  if (!needReverseScan(pQuery)) {
3336 3337
    return;
  }
3338

3339
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3340

3341
  // reverse scan from current position
3342
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3343
  doScanAllDataBlocks(pRuntimeEnv);
3344 3345

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3346 3347
}

H
hjxilinx 已提交
3348
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3349
  SQuery *pQuery = pRuntimeEnv->pQuery;
3350

H
Haojun Liao 已提交
3351
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3352 3353
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3354
    if (pRuntimeEnv->groupbyNormalCol) {
3355 3356
      closeAllTimeWindow(pWindowResInfo);
    }
3357

3358 3359 3360 3361 3362
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3363

3364
      setWindowResOutputBuf(pRuntimeEnv, buf);
3365

3366
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3367
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3368
      }
3369

3370 3371 3372 3373 3374 3375
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3376

3377
  } else {
3378
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3379
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3380 3381 3382 3383 3384
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3385
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3386
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3387

3388 3389 3390 3391
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3392

3393 3394 3395
  return false;
}

H
Haojun Liao 已提交
3396
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3397
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3398

H
Haojun Liao 已提交
3399
  STableQueryInfo *pTableQueryInfo = buf;
3400

H
hjxilinx 已提交
3401 3402
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3403

3404
  pTableQueryInfo->pTable = pTable;
3405
  pTableQueryInfo->cur.vgroupIndex = -1;
3406

H
Haojun Liao 已提交
3407 3408
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3409
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3410
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3411 3412 3413 3414
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3415
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3416 3417
  }

3418 3419 3420
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3421
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo) {
3422 3423 3424
  if (pTableQueryInfo == NULL) {
    return;
  }
3425

H
Haojun Liao 已提交
3426
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3427 3428
}

H
Haojun Liao 已提交
3429 3430 3431 3432
#define CHECK_QUERY_TIME_RANGE(_q, _tableInfo)                                              \
  do {                                                                                      \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_q)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_q))); \
H
Haojun Liao 已提交
3433
  } while (0)
3434 3435 3436 3437

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3438
 * @param pDataBlockInfo
3439
 */
H
Haojun Liao 已提交
3440
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3441
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3442 3443 3444
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3445 3446
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3447 3448 3449 3450

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3451

H
Haojun Liao 已提交
3452 3453 3454
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3455

H
Haojun Liao 已提交
3456
  int32_t GROUPRESULTID = 1;
3457 3458
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3459 3460 3461
  if (pWindowRes == NULL) {
    return;
  }
3462

3463 3464 3465 3466 3467 3468 3469 3470 3471 3472
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3473

H
Haojun Liao 已提交
3474 3475
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3476 3477 3478 3479
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3480
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3481
  SQuery *pQuery = pRuntimeEnv->pQuery;
3482

3483
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3484
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3485 3486
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3487

3488
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3489 3490 3491
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3492

3493 3494 3495 3496 3497
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3498

3499 3500 3501 3502 3503 3504
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3505 3506
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3507

H
Haojun Liao 已提交
3508 3509 3510 3511 3512
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3513
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3514 3515
      continue;
    }
3516

H
Haojun Liao 已提交
3517 3518
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3519

H
Haojun Liao 已提交
3520 3521 3522 3523
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3524

H
Haojun Liao 已提交
3525 3526 3527 3528 3529
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3530

H
Haojun Liao 已提交
3531 3532 3533 3534 3535 3536
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3537
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3538
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3539

3540
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3541

3542 3543
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3544
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3545
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3546

3547
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3548

3549 3550 3551 3552 3553 3554
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3555

3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3568
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3569 3570
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3571 3572
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3573 3574 3575
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3576
    pTableQueryInfo->win.skey = key;
3577
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3578

3579 3580 3581 3582 3583
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3584

3585 3586 3587 3588 3589 3590
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3591
    STimeWindow     w = TSWINDOW_INITIALIZER;
3592
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3593

H
Haojun Liao 已提交
3594 3595
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3596
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3597
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3598

3599 3600
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3601
        assert(win.ekey == pQuery->window.ekey);
3602
      }
3603 3604
      
      pWindowResInfo->prevSKey = w.skey;
3605
    }
3606

3607
    pTableQueryInfo->queryRangeSet = 1;
3608
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3609 3610 3611 3612
  }
}

bool requireTimestamp(SQuery *pQuery) {
3613
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3614
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3628 3629 3630
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3631 3632
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3633 3634 3635
  return loadPrimaryTS;
}

3636
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3637 3638
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3639

3640 3641 3642
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3643

3644
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3645 3646
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3647

3648
  if (orderType == TSDB_ORDER_ASC) {
3649
    startIdx = pQInfo->groupIndex;
3650 3651
    step = 1;
  } else {  // desc order copy all data
3652
    startIdx = totalSet - pQInfo->groupIndex - 1;
3653 3654
    step = -1;
  }
3655

3656
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3657 3658
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3659
      pQInfo->groupIndex += 1;
3660 3661
      continue;
    }
3662

dengyihao's avatar
dengyihao 已提交
3663
    assert(pQInfo->offset <= 1);
3664

3665 3666
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3667

3668 3669 3670 3671
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3672 3673 3674 3675 3676
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3677
      pQInfo->groupIndex += 1;
3678
    }
3679

3680
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3681
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3682

3683 3684 3685 3686
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3687

3688
    numOfResult += numOfRowsToCopy;
3689 3690 3691
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3692
  }
3693

3694
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3695 3696

#ifdef _DEBUG_VIEW
3697
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
3711
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
3712
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3713

3714
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3715
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
3716

3717
  pQuery->rec.rows += numOfResult;
3718

3719
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3720 3721
}

H
Haojun Liao 已提交
3722
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
3723
  SQuery *pQuery = pRuntimeEnv->pQuery;
3724

3725
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3726 3727 3728
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
3729

H
Haojun Liao 已提交
3730 3731
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
3732

H
Haojun Liao 已提交
3733 3734 3735 3736
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
3737
      }
H
Haojun Liao 已提交
3738 3739

      pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
3740 3741 3742 3743
    }
  }
}

H
Haojun Liao 已提交
3744
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
3745
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3746
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3747 3748
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3749
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3750
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3751

H
Haojun Liao 已提交
3752
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3753
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3754
  } else {
3755
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3756 3757 3758
  }
}

3759 3760 3761
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3762

H
Haojun Liao 已提交
3763
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3764 3765
    return false;
  }
3766

3767
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791
    // There are results not returned to client yet, so filling operation applied to the remain result is required
    // in the first place.
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3792
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3793 3794 3795
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3796
  }
3797 3798

  return false;
3799 3800 3801
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3802
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3803

3804 3805
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3806

3807 3808 3809
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3810

weixin_48148422's avatar
weixin_48148422 已提交
3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3823 3824
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3825
    if (pQInfo->runtimeEnv.stableQuery) {
3826
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3827 3828 3829
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3830 3831 3832
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3833
    }
H
hjxilinx 已提交
3834
  }
3835 3836
}

H
Haojun Liao 已提交
3837
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3838
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3839
  SQuery *pQuery = pRuntimeEnv->pQuery;
3840 3841
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3842
  while (1) {
3843
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3844
    
3845
    // todo apply limit output function
3846 3847
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3848
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3849 3850
      return ret;
    }
3851

3852
    if (pQuery->limit.offset < ret) {
3853
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3854 3855
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3856 3857 3858
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3859 3860 3861
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3862
      }
3863
      
3864 3865 3866
      pQuery->limit.offset = 0;
      return ret;
    } else {
3867
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3868
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3869 3870
          pQuery->limit.offset - ret);
      
3871
      pQuery->limit.offset -= ret;
3872
      pQuery->rec.rows = 0;
3873 3874
      ret = 0;
    }
3875 3876

    if (!queryHasRemainResults(pRuntimeEnv)) {
3877 3878 3879 3880 3881
      return ret;
    }
  }
}

3882
static void queryCostStatis(SQInfo *pQInfo) {
3883
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3884
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
3885 3886

  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, total blocks:%d, load block statis:%d,"
H
Haojun Liao 已提交
3887
         " load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
H
Haojun Liao 已提交
3888
         pQInfo, pSummary->elapsedTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
3889
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3890 3891
}

3892 3893
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3894 3895
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3896
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3897

3898
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3899
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3900 3901 3902
    pQuery->limit.offset = 0;
    return;
  }
3903

3904 3905 3906 3907 3908
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3909

3910
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3911

3912
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3913
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3914

3915
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3916
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3917 3918

  // update the offset value
H
hjxilinx 已提交
3919
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3920
  pQuery->limit.offset = 0;
3921

H
hjxilinx 已提交
3922
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3923

3924
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
3925
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3926
}
3927

3928 3929 3930 3931 3932
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3933
  }
3934

3935 3936 3937
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3938
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3939
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3940

H
Haojun Liao 已提交
3941
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
B
Bomin Zhang 已提交
3942 3943 3944 3945 3946 3947 3948 3949
  while (true) {
    if (!tsdbNextDataBlock(pQueryHandle)) {
      if (terrno != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, terrno);
      }
      break;
    }

H
Haojun Liao 已提交
3950
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
3951 3952
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3953
    }
3954

H
Haojun Liao 已提交
3955
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
3956

3957 3958
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3959 3960
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3961

3962
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3963 3964
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3965 3966 3967
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3968
  }
3969
}
3970

H
Haojun Liao 已提交
3971
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3972
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3973
  *start = pQuery->current->lastKey;
3974

3975
  // if queried with value filter, do NOT forward query start position
3976
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3977
    return true;
3978
  }
3979

3980 3981 3982 3983 3984
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3985
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3986

H
Haojun Liao 已提交
3987
  STimeWindow w = TSWINDOW_INITIALIZER;
3988
  
3989
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3990
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3991

H
Haojun Liao 已提交
3992
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
B
Bomin Zhang 已提交
3993 3994 3995 3996 3997 3998 3999 4000
  while (true) {
    if (!tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
      if (terrno != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, terrno);
      }
      break;
    }

H
Haojun Liao 已提交
4001
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4002

H
Haojun Liao 已提交
4003 4004
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4005
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4006 4007 4008
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4009
    } else {
H
Haojun Liao 已提交
4010
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4011

4012 4013 4014
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4015

4016 4017
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4018

4019 4020 4021 4022 4023 4024
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4025

4026
      STimeWindow tw = win;
H
Haojun Liao 已提交
4027
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4028

4029
      if (pQuery->limit.offset == 0) {
4030 4031
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4032 4033
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4034 4035 4036
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4037 4038
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4039
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4040 4041 4042 4043
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
4044 4045 4046 4047 4048 4049
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4050
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4051 4052
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4053
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4054 4055
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4056
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4057 4058
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4059
          return true;
H
Haojun Liao 已提交
4060 4061 4062 4063
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4064
          return true;
4065 4066 4067
        }
      }

H
Haojun Liao 已提交
4068 4069 4070 4071 4072 4073 4074
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4075 4076 4077 4078 4079 4080 4081
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4082
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4083 4084 4085 4086
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4087
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4088 4089
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4090
      } else {
H
Haojun Liao 已提交
4091
        break;  // offset is not 0, and next time window begins or ends in the next block.
4092 4093 4094
      }
    }
  }
4095

4096 4097 4098
  return true;
}

B
Bomin Zhang 已提交
4099
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4100
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4101 4102
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4103
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4104
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4105 4106
  }

H
Haojun Liao 已提交
4107
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4108
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4109
  }
4110 4111

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4112 4113 4114 4115
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4116
  };
weixin_48148422's avatar
weixin_48148422 已提交
4117

B
Bomin Zhang 已提交
4118
  if (!isSTableQuery
4119
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4120
    && (cond.order == TSDB_ORDER_ASC) 
H
Haojun Liao 已提交
4121
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4122
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4123
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4124
  ) {
H
Haojun Liao 已提交
4125
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4126 4127
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4128
  }
B
Bomin Zhang 已提交
4129

B
Bomin Zhang 已提交
4130
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4131
  if (isFirstLastRowQuery(pQuery)) {
4132
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4133
  } else if (isPointInterpoQuery(pQuery)) {
4134
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4135
  } else {
4136
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4137
  }
B
Bomin Zhang 已提交
4138
  return terrno;
B
Bomin Zhang 已提交
4139 4140
}

4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4154
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4155 4156 4157 4158 4159 4160 4161
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4162
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4163 4164
  int32_t code = TSDB_CODE_SUCCESS;
  
4165 4166 4167
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4168

4169
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4170 4171
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4172 4173

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4174
  changeExecuteScanOrder(pQInfo, false);
B
Bomin Zhang 已提交
4175 4176 4177 4178
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4179
  
4180
  pQInfo->tsdb = tsdb;
4181
  pQInfo->vgId = vgId;
4182 4183

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4184
  pRuntimeEnv->pTSBuf = pTsBuf;
4185
  pRuntimeEnv->cur.vgroupIndex = -1;
4186
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4187
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4188
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4189

H
Haojun Liao 已提交
4190
  if (pTsBuf != NULL) {
4191 4192 4193 4194 4195 4196 4197 4198 4199 4200
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4201
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4202

H
Haojun Liao 已提交
4203
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
4204
    int32_t rows = getInitialPageNum(pQInfo);
4205
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4206 4207 4208 4209
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4210
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4211 4212
      int16_t type = TSDB_DATA_TYPE_NULL;

H
Haojun Liao 已提交
4213
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4214 4215 4216 4217 4218
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

B
Bomin Zhang 已提交
4219 4220 4221 4222
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 32, 4096, type);
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4223 4224
    }

H
Haojun Liao 已提交
4225
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4226
    int32_t rows = getInitialPageNum(pQInfo);
4227
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4228 4229 4230 4231 4232
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4233
    if (pRuntimeEnv->groupbyNormalCol) {
4234 4235 4236 4237 4238
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

B
Bomin Zhang 已提交
4239 4240 4241 4242
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4243 4244
  }

4245
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4246
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4247 4248 4249 4250 4251 4252 4253
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4254 4255
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4256
  }
4257

H
Haojun Liao 已提交
4258
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4259
  return TSDB_CODE_SUCCESS;
4260 4261
}

4262
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4263
  SQuery *pQuery = pRuntimeEnv->pQuery;
4264

4265
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4266 4267 4268 4269 4270 4271 4272
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4290
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4291
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4292 4293
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4294
  
H
hjxilinx 已提交
4295
  int64_t st = taosGetTimestampMs();
4296

4297
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4298
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4299

H
Haojun Liao 已提交
4300 4301
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

B
Bomin Zhang 已提交
4302 4303 4304 4305 4306 4307 4308 4309
  while (true) {
    if (!tsdbNextDataBlock(pQueryHandle)) {
      if (terrno != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, terrno);
      }
      break;
    }

4310
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4311
    
H
Haojun Liao 已提交
4312
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4313
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4314
    }
4315

H
Haojun Liao 已提交
4316
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4317 4318 4319 4320
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4321

H
Haojun Liao 已提交
4322 4323
    pQuery->current = *pTableQueryInfo;
    CHECK_QUERY_TIME_RANGE(pQuery, *pTableQueryInfo);
4324

H
Haojun Liao 已提交
4325
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4326
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4327
    }
4328

H
Haojun Liao 已提交
4329 4330 4331 4332 4333 4334 4335
    SDataStatis *pStatis = NULL;
    SArray *pDataBlock = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }

4336 4337 4338
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4339
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4340
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4341
  }
4342

H
Haojun Liao 已提交
4343 4344
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4345 4346
  int64_t et = taosGetTimestampMs();
  return et - st;
4347 4348
}

4349 4350
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4351
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4352

4353
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4354
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4355
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4356

H
Haojun Liao 已提交
4357 4358 4359
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4360

H
Haojun Liao 已提交
4361
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4362
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4363
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4364

4365
  STsdbQueryCond cond = {
4366
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4367 4368
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4369
      .numOfCols = pQuery->numOfCols,
4370
  };
4371

H
hjxilinx 已提交
4372
  // todo refactor
4373
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
4374
  SArray *tx = taosArrayInit(1, POINTER_BYTES);
4375

4376
  taosArrayPush(tx, &pCheckInfo->pTable);
4377
  taosArrayPush(g1, &tx);
4378
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4379

4380
  // include only current table
4381 4382 4383 4384
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4385

H
Haojun Liao 已提交
4386
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4387 4388
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4389 4390 4391
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4392

4393
  if (pRuntimeEnv->pTSBuf != NULL) {
4394
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4395 4396
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4397

4398 4399 4400 4401 4402 4403 4404 4405
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4406

4407
  initCtxOutputBuf(pRuntimeEnv);
4408 4409 4410 4411 4412 4413 4414 4415 4416 4417
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4418
static void sequentialTableProcess(SQInfo *pQInfo) {
4419
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4420
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4421
  setQueryStatus(pQuery, QUERY_COMPLETED);
4422

H
Haojun Liao 已提交
4423
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4424

H
Haojun Liao 已提交
4425
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4426 4427
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4428

4429
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4430
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4431

4432
      qDebug("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4433
             numOfGroups, group);
H
Haojun Liao 已提交
4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4454
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4455
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4456
      } else {
H
Haojun Liao 已提交
4457
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4458
      }
B
Bomin Zhang 已提交
4459 4460 4461 4462 4463 4464

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4465

H
Haojun Liao 已提交
4466
      initCtxOutputBuf(pRuntimeEnv);
4467
      
4468
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4469 4470
      assert(taosArrayGetSize(s) >= 1);
      
4471
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4472 4473 4474
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4475

dengyihao's avatar
dengyihao 已提交
4476
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4477

H
Haojun Liao 已提交
4478
      // here we simply set the first table as current table
4479 4480 4481
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4482
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4495 4496 4497 4498 4499 4500

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4501
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4502
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4503
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4504

4505
      qDebug("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
B
Bomin Zhang 已提交
4527 4528
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4529 4530 4531
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4532

4533
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4534 4535
      assert(taosArrayGetSize(s) >= 1);

4536
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4537 4538 4539 4540 4541 4542 4543 4544

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4545
      taosArrayDestroy(s); 
4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4560
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4561 4562 4563 4564 4565 4566 4567
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4568
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4569 4570 4571 4572 4573 4574

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4575 4576 4577
    }
  } else {
    /*
4578
     * 1. super table projection query, 2. ts-comp query
4579 4580 4581
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4582
    if (pQInfo->groupIndex > 0) {
4583
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4584
      pQuery->rec.total += pQuery->rec.rows;
4585

4586
      if (pQuery->rec.rows > 0) {
4587 4588 4589
        return;
      }
    }
4590

4591
    // all data have returned already
4592
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4593 4594
      return;
    }
4595

4596 4597
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4598

H
Haojun Liao 已提交
4599
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4600 4601
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4602

4603
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4604
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4605
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4606
      }
4607

4608
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4609
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4610
        pQInfo->tableIndex++;
4611 4612
        continue;
      }
4613

H
hjxilinx 已提交
4614
      // TODO handle the limit offset problem
4615
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4616 4617
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4618 4619 4620
          continue;
        }
      }
4621

4622
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4623
      skipResults(pRuntimeEnv);
4624

4625
      // the limitation of output result is reached, set the query completed
4626
      if (limitResults(pRuntimeEnv)) {
4627
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4628 4629
        break;
      }
4630

4631 4632
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4633

4634
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4635 4636 4637 4638 4639 4640
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4641
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4642

H
Haojun Liao 已提交
4643
        STableIdInfo tidInfo = {0};
4644

H
Haojun Liao 已提交
4645 4646 4647
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4648
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4649 4650
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4651
        // if the buffer is full or group by each table, we need to jump out of the loop
4652 4653
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4654 4655
          break;
        }
4656

4657
      } else {
4658
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4659 4660
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4661 4662
          continue;
        } else {
4663 4664 4665
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4666 4667 4668
        }
      }
    }
H
Haojun Liao 已提交
4669

4670
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4671 4672
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4673
  }
4674

4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4687
    finalizeQueryResult(pRuntimeEnv);
4688
  }
4689

4690 4691 4692
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4693

4694
  qDebug(
B
Bomin Zhang 已提交
4695
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4696
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4697
      pQuery->limit.offset);
4698 4699
}

4700 4701 4702 4703
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4704 4705 4706 4707
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4708
  if (pRuntimeEnv->pTSBuf != NULL) {
4709
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4710
  }
4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4723 4724

  pRuntimeEnv->prevGroupId = INT32_MIN;
4725
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
4726 4727 4728
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
Haojun Liao 已提交
4729

4730 4731 4732
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4733 4734
}

4735 4736 4737 4738
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4739
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4740

4741
  if (pRuntimeEnv->pTSBuf != NULL) {
4742
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4743
  }
4744

4745
  switchCtxOrder(pRuntimeEnv);
4746 4747 4748
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4749 4750 4751
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4752
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4753
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4754
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4755
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4756

4757
      size_t num = taosArrayGetSize(group);
4758
      for (int32_t j = 0; j < num; ++j) {
4759 4760
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
4761
      }
H
hjxilinx 已提交
4762 4763 4764 4765 4766 4767 4768
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4769 4770 4771
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4772
  if (pQInfo->groupIndex > 0) {
4773
    /*
4774
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4775 4776
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
4777
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4778 4779
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4780
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4781 4782
#endif
    } else {
4783
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4784
    }
4785

4786
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4787 4788
    return;
  }
4789

4790
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4791 4792
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4793
  // do check all qualified data blocks
H
Haojun Liao 已提交
4794
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4795
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4796

H
hjxilinx 已提交
4797
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
4798
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4799
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4800 4801
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4802
  }
4803

H
hjxilinx 已提交
4804 4805
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4806

H
hjxilinx 已提交
4807 4808
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4809

H
Haojun Liao 已提交
4810
    el = scanMultiTableDataBlocks(pQInfo);
4811
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4812

H
Haojun Liao 已提交
4813
//    doCloseAllTimeWindowAfterScan(pQInfo);
H
Haojun Liao 已提交
4814
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
4815
  } else {
4816
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4817
  }
4818

4819
  setQueryStatus(pQuery, QUERY_COMPLETED);
4820

H
Haojun Liao 已提交
4821
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4822
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4823 4824
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
4825
  }
4826

H
Haojun Liao 已提交
4827
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4828
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4829
      copyResToQueryResultBuf(pQInfo, pQuery);
4830 4831

#ifdef _DEBUG_VIEW
4832
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4833 4834 4835
#endif
    }
  } else {  // not a interval query
4836
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4837
  }
4838

4839
  // handle the limitation of output buffer
4840
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4841 4842 4843 4844 4845 4846 4847 4848
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4849
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4850
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4851 4852
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4853
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
4854 4855 4856
    return;
  }
  
H
hjxilinx 已提交
4857 4858
  pQuery->current = pTableInfo;  // set current query table info
  
4859
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4860
  finalizeQueryResult(pRuntimeEnv);
4861

H
Haojun Liao 已提交
4862
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4863 4864
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4865
  }
4866

H
Haojun Liao 已提交
4867
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4868
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4869

4870
  skipResults(pRuntimeEnv);
4871
  limitResults(pRuntimeEnv);
4872 4873
}

H
hjxilinx 已提交
4874
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4875
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4876 4877 4878 4879
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4880 4881 4882 4883
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4884

4885 4886 4887 4888 4889 4890
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4891 4892

  while (1) {
4893
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4894
    finalizeQueryResult(pRuntimeEnv);
4895

4896 4897
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4898
      skipResults(pRuntimeEnv);
4899 4900 4901
    }

    /*
H
hjxilinx 已提交
4902 4903
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4904
     */
4905
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4906 4907 4908
      break;
    }

4909
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
4910
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4911 4912 4913 4914

    resetCtxOutputBuf(pRuntimeEnv);
  }

4915
  limitResults(pRuntimeEnv);
4916
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4917
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
4918
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4919 4920
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
H
Haojun Liao 已提交
4921
    STableId* id = TSDB_TABLEID(pQuery->current);
4922

H
Haojun Liao 已提交
4923 4924
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4925 4926
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4927 4928
  }

4929 4930 4931
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4932 4933
}

H
Haojun Liao 已提交
4934
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4935
  SQuery *pQuery = pRuntimeEnv->pQuery;
4936

4937
  while (1) {
4938
    scanOneTableDataBlocks(pRuntimeEnv, start);
4939

4940
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4941
    finalizeQueryResult(pRuntimeEnv);
4942

4943 4944 4945
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4946
        pQuery->fillType == TSDB_FILL_NONE) {
4947 4948
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4949

4950 4951 4952 4953
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4954

4955
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4956 4957 4958 4959 4960
      break;
    }
  }
}

4961
// handle time interval query on table
H
hjxilinx 已提交
4962
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4963 4964
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4965 4966
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4967

H
Haojun Liao 已提交
4968
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
4969 4970
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4971
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4972
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4973
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4974 4975 4976 4977
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4978
  while (1) {
H
Haojun Liao 已提交
4979
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4980

H
Haojun Liao 已提交
4981
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4982
      pQInfo->groupIndex = 0;  // always start from 0
4983
      pQuery->rec.rows = 0;
4984
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4985

4986
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4987
    }
4988

4989
    // the offset is handled at prepare stage if no interpolation involved
4990
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4991
      limitResults(pRuntimeEnv);
4992 4993
      break;
    } else {
H
Haojun Liao 已提交
4994
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4995
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
4996
      numOfFilled = 0;
4997
      
H
Haojun Liao 已提交
4998
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
4999
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5000
        limitResults(pRuntimeEnv);
5001 5002
        break;
      }
5003

5004
      // no result generated yet, continue retrieve data
5005
      pQuery->rec.rows = 0;
5006 5007
    }
  }
5008

5009
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5010
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5011
    pQInfo->groupIndex = 0;
5012
    pQuery->rec.rows = 0;
5013
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5014
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5015
  }
5016

H
Haojun Liao 已提交
5017
  pQInfo->pointsInterpo += numOfFilled;
5018 5019
}

5020 5021 5022 5023
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5024
  if (queryHasRemainResults(pRuntimeEnv)) {
5025

H
Haojun Liao 已提交
5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5038
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5039
      return;
H
Haojun Liao 已提交
5040
    } else {
5041
      pQuery->rec.rows = 0;
5042
      pQInfo->groupIndex = 0;  // always start from 0
5043

5044
      if (pRuntimeEnv->windowResInfo.size > 0) {
5045
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5046
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5047

5048
        if (pQuery->rec.rows > 0) {
5049
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5050 5051 5052

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5053
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5054 5055
          }

5056 5057 5058 5059 5060
          return;
        }
      }
    }
  }
5061

H
hjxilinx 已提交
5062
  // number of points returned during this query
5063
  pQuery->rec.rows = 0;
5064
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
5065
  
5066
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5067
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5068
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
5069
  
5070
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5071
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5072
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5073
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5074
    tableFixedOutputProcess(pQInfo, item);
5075 5076
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5077
    tableMultiOutputProcess(pQInfo, item);
5078
  }
5079

5080
  // record the total elapsed time
5081
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5082
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5083 5084
}

5085
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5086 5087
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5088
  pQuery->rec.rows = 0;
5089

5090
  int64_t st = taosGetTimestampUs();
5091

H
Haojun Liao 已提交
5092
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
H
Haojun Liao 已提交
5093
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5094
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5095
    multiTableQueryProcess(pQInfo);
5096
  } else {
5097
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5098
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5099

5100
    sequentialTableProcess(pQInfo);
H
Haojun Liao 已提交
5101

5102
  }
5103

H
hjxilinx 已提交
5104
  // record the total elapsed time
5105
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5106 5107
}

5108
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5109
  int32_t j = 0;
5110

5111
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5112 5113 5114 5115
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5116 5117 5118 5119
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5120

5121 5122
      j += 1;
    }
5123

5124 5125 5126 5127 5128
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5129

5130
      j += 1;
5131 5132 5133
    }
  }

5134
  assert(0);
5135 5136
}

5137 5138 5139
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5140 5141
}

5142
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5143
  if (pQueryMsg->intervalTime < 0) {
5144
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5145
    return false;
5146 5147
  }

H
hjxilinx 已提交
5148
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5149
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5150
    return false;
5151 5152
  }

H
hjxilinx 已提交
5153
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5154
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5155
    return false;
5156 5157
  }

5158 5159
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5160
    return false;
5161 5162
  }

5163 5164 5165 5166 5167 5168 5169 5170 5171 5172
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5173 5174 5175 5176 5177
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5178
        continue;
5179
      }
5180

5181
      return false;
5182 5183
    }
  }
5184

5185
  return true;
5186 5187
}

5188
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5189
  assert(pQueryMsg->numOfTables > 0);
5190

weixin_48148422's avatar
weixin_48148422 已提交
5191
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5192

weixin_48148422's avatar
weixin_48148422 已提交
5193 5194
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5195

5196
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5197 5198
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5199

H
hjxilinx 已提交
5200 5201 5202
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5203

H
hjxilinx 已提交
5204 5205
  return pMsg;
}
5206

5207
/**
H
hjxilinx 已提交
5208
 * pQueryMsg->head has been converted before this function is called.
5209
 *
H
hjxilinx 已提交
5210
 * @param pQueryMsg
5211 5212 5213 5214
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5215
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5216
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5217 5218
  int32_t code = TSDB_CODE_SUCCESS;

5219 5220 5221 5222 5223 5224 5225 5226
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5227

5228 5229
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5230
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5231
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5232 5233

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5234
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5235
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5236 5237 5238
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5239
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5240
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5241
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5242

5243
  // query msg safety check
5244
  if (!validateQueryMsg(pQueryMsg)) {
5245 5246
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5247 5248
  }

H
hjxilinx 已提交
5249 5250
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5251 5252
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5253
    pColInfo->colId = htons(pColInfo->colId);
5254
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5255 5256
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5257

H
hjxilinx 已提交
5258
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5259

H
hjxilinx 已提交
5260
    int32_t numOfFilters = pColInfo->numOfFilters;
5261
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5262
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5263 5264 5265
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5266 5267 5268 5269
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5270 5271 5272

      pMsg += sizeof(SColumnFilterInfo);

5273 5274
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5275

5276
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5277 5278
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5279
      } else {
5280 5281
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5282 5283
      }

5284 5285
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5286 5287 5288
    }
  }

5289 5290
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5291

5292
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5293
    (*pExpr)[i] = pExprMsg;
5294

5295
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5296 5297 5298 5299
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5300

5301
    pMsg += sizeof(SSqlFuncMsg);
5302 5303

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5304
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5305 5306 5307 5308
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5309
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5310 5311 5312 5313 5314
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5315 5316
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5317
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5318 5319
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5320 5321
      }
    } else {
5322
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5323
//        return TSDB_CODE_QRY_INVALID_MSG;
5324
//      }
5325 5326
    }

5327
    pExprMsg = (SSqlFuncMsg *)pMsg;
5328
  }
5329

5330
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5331
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5332
    goto _cleanup;
5333
  }
5334

H
hjxilinx 已提交
5335
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5336

H
hjxilinx 已提交
5337
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5338
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5339 5340 5341 5342
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5343 5344 5345

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5346
      pMsg += sizeof((*groupbyCols)[i].colId);
5347 5348

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5349 5350
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5351
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5352 5353 5354 5355 5356
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5357

H
hjxilinx 已提交
5358 5359
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5360 5361
  }

5362 5363
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5364
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5365 5366

    int64_t *v = (int64_t *)pMsg;
5367
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5368 5369
      v[i] = htobe64(v[i]);
    }
5370

5371
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5372
  }
5373

5374 5375 5376 5377
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5378

5379 5380 5381 5382
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5383

5384
      (*tagCols)[i] = *pTagCol;
5385
      pMsg += sizeof(SColumnInfo);
5386
    }
H
hjxilinx 已提交
5387
  }
5388

5389 5390 5391 5392 5393 5394
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5395

weixin_48148422's avatar
weixin_48148422 已提交
5396
  if (*pMsg != 0) {
5397
    size_t len = strlen(pMsg) + 1;
5398

5399
    *tbnameCond = malloc(len);
5400 5401 5402 5403 5404
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5405
    strcpy(*tbnameCond, pMsg);
5406
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5407
  }
5408

5409
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5410 5411
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5412
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5413
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5414 5415

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5416 5417 5418 5419 5420 5421 5422 5423 5424

_cleanup:
  tfree(*pExpr);
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
5425 5426

  return code;
5427 5428
}

H
hjxilinx 已提交
5429
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5430
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5431 5432

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5433
  TRY(TSDB_MAX_TAGS) {
weixin_48148422's avatar
weixin_48148422 已提交
5434 5435 5436
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5437
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5438 5439 5440
    return code;
  } END_TRY

H
hjxilinx 已提交
5441
  if (pExprNode == NULL) {
5442
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5443
    return TSDB_CODE_QRY_APP_ERROR;
5444
  }
5445

5446
  pArithExprInfo->pExpr = pExprNode;
5447 5448 5449
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5450
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5451 5452
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5453
  int32_t code = TSDB_CODE_SUCCESS;
5454

H
Haojun Liao 已提交
5455
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5456
  if (pExprs == NULL) {
5457
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5458 5459 5460 5461 5462
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5463
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5464
    pExprs[i].base = *pExprMsg[i];
5465
    pExprs[i].bytes = 0;
5466 5467 5468 5469

    int16_t type = 0;
    int16_t bytes = 0;

5470
    // parse the arithmetic expression
5471
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5472
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5473

5474 5475 5476
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5477 5478
      }

5479
      type  = TSDB_DATA_TYPE_DOUBLE;
5480
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5481
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5482 5483 5484
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5485
    } else{
5486
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5487
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5488

dengyihao's avatar
dengyihao 已提交
5489
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5490 5491 5492 5493
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5494
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5495

H
Haojun Liao 已提交
5496 5497 5498
        type  = s.type;
        bytes = s.bytes;
      }
5499 5500
    }

5501 5502
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5503
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5504
      tfree(pExprs);
5505
      return TSDB_CODE_QRY_INVALID_MSG;
5506 5507
    }

5508
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5509
      tagLen += pExprs[i].bytes;
5510
    }
5511
    assert(isValidDataType(pExprs[i].type));
5512 5513 5514
  }

  // TODO refactor
5515
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5516 5517
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5518

5519
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5520
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5521 5522 5523 5524 5525
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5526
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5527
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5528 5529 5530
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5531
  *pExprInfo = pExprs;
5532 5533 5534 5535

  return TSDB_CODE_SUCCESS;
}

5536
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5537 5538 5539 5540 5541
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5542
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5543
  if (pGroupbyExpr == NULL) {
5544
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5545 5546 5547 5548 5549 5550 5551
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5552 5553 5554 5555
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5556

5557 5558 5559
  return pGroupbyExpr;
}

5560
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5561
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5562
    if (pQuery->colList[i].numOfFilters > 0) {
5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5574
    if (pQuery->colList[i].numOfFilters > 0) {
5575 5576
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5577
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5578
      pFilterInfo->info = pQuery->colList[i];
5579

5580
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5581 5582 5583 5584
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5585
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5586 5587 5588 5589 5590

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5591
          qError("QInfo:%p invalid filter info", pQInfo);
5592
          return TSDB_CODE_QRY_INVALID_MSG;
5593 5594
        }

5595 5596
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5597

5598 5599 5600
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5601 5602

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5603
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5604
          return TSDB_CODE_QRY_INVALID_MSG;
5605 5606
        }

5607
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5608
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5609
          assert(rangeFilterArray != NULL);
5610
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5624
          assert(filterArray != NULL);
5625 5626 5627 5628
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5629
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5630
              return TSDB_CODE_QRY_INVALID_MSG;
5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5647
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5648
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5649

5650
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5651
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5652
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5653 5654
      continue;
    }
5655

5656
    // todo opt performance
H
Haojun Liao 已提交
5657 5658
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5659 5660
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5661 5662
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5663 5664 5665
          break;
        }
      }
5666 5667
      
      assert (f < pQuery->numOfCols);
5668
    } else {
5669 5670
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5671 5672
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5673 5674
          break;
        }
5675
      }
5676 5677
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5678 5679 5680 5681
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5682

5683
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5684 5685 5686 5687 5688 5689 5690
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5691 5692
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
  const float RESULT_THRESHOLD_RATIO = 0.85;

  int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
  if (numOfRes < RESULT_MSG_MIN_ROWS) {
    numOfRes = RESULT_MSG_MIN_ROWS;
  }

  pQuery->rec.capacity = numOfRes;
  pQuery->rec.threshold = numOfRes * RESULT_THRESHOLD_RATIO;
}

weixin_48148422's avatar
weixin_48148422 已提交
5707
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5708
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
B
Bomin Zhang 已提交
5709 5710 5711
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

5712 5713
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
5714
    goto _cleanup_qinfo;
5715
  }
B
Bomin Zhang 已提交
5716 5717 5718
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
5719 5720

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
5721 5722 5723
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
5724 5725
  pQInfo->runtimeEnv.pQuery = pQuery;

5726
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5727
  pQuery->numOfOutput     = numOfOutput;
5728 5729 5730
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5731
  pQuery->order.orderColId = pQueryMsg->orderColId;
5732 5733 5734 5735
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5736
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5737
  pQuery->fillType        = pQueryMsg->fillType;
5738
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
5739
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
5740

5741
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5742
  if (pQuery->colList == NULL) {
5743
    goto _cleanup;
5744
  }
5745

H
hjxilinx 已提交
5746
  for (int16_t i = 0; i < numOfCols; ++i) {
5747
    pQuery->colList[i] = pQueryMsg->colList[i];
5748
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5749
  }
5750

5751
  // calculate the result row size
5752 5753 5754
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5755
  }
5756

5757
  doUpdateExprColumnIndex(pQuery);
5758

5759
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5760
  if (ret != TSDB_CODE_SUCCESS) {
5761
    goto _cleanup;
5762 5763 5764
  }

  // prepare the result buffer
5765
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5766
  if (pQuery->sdata == NULL) {
5767
    goto _cleanup;
5768 5769
  }

H
Haojun Liao 已提交
5770
  calResultBufSize(pQuery);
5771

5772
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5773
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5774 5775

    // allocate additional memory for interResults that are usually larger then final results
5776 5777
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5778
    if (pQuery->sdata[col] == NULL) {
5779
      goto _cleanup;
5780 5781 5782
    }
  }

5783
  if (pQuery->fillType != TSDB_FILL_NONE) {
5784 5785
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5786
      goto _cleanup;
5787 5788 5789
    }

    // the first column is the timestamp
5790
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5791 5792
  }

dengyihao's avatar
dengyihao 已提交
5793 5794 5795 5796 5797 5798
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5799 5800 5801
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
  }
5802

weixin_48148422's avatar
weixin_48148422 已提交
5803 5804
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5805
  taosArraySort(pTableIdList, compareTableIdInfo);
5806

H
Haojun Liao 已提交
5807
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
5808 5809 5810
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5811
  for(int32_t i = 0; i < numOfGroups; ++i) {
5812
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5813

H
Haojun Liao 已提交
5814
    size_t s = taosArrayGetSize(pa);
5815
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
5816 5817 5818
    if (p1 == NULL) {
      goto _cleanup;
    }
5819

H
hjxilinx 已提交
5820
    for(int32_t j = 0; j < s; ++j) {
5821
      void* pTable = taosArrayGetP(pa, j);
H
Haojun Liao 已提交
5822
      STableId* id = TSDB_TABLEID(pTable);
5823

H
Haojun Liao 已提交
5824
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5825 5826 5827
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5828
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5829
      }
5830

H
Haojun Liao 已提交
5831 5832
      void* buf = pQInfo->pBuf + index * sizeof(STableQueryInfo);
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, pTable, window, buf);
B
Bomin Zhang 已提交
5833 5834 5835
      if (item == NULL) {
        goto _cleanup;
      }
5836
      item->groupIndex = i;
H
hjxilinx 已提交
5837
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5838 5839
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5840
    }
5841

5842
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5843
  }
5844

weixin_48148422's avatar
weixin_48148422 已提交
5845 5846
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5847
  pQuery->pos = -1;
5848
  pQuery->window = pQueryMsg->window;
5849

5850
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
5851 5852
    int32_t code = TAOS_SYSTEM_ERROR(errno);
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
5853
    goto _cleanup;
5854
  }
5855

5856
  colIdCheck(pQuery);
5857

5858
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5859 5860
  return pQInfo;

B
Bomin Zhang 已提交
5861
_cleanup_qinfo:
H
Haojun Liao 已提交
5862
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875

_cleanup_query:
  taosArrayDestroy(pGroupbyExpr->columnInfo);
  tfree(pGroupbyExpr);
  tfree(pTagCols);
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
  tfree(pExprs);

5876
_cleanup:
dengyihao's avatar
dengyihao 已提交
5877
  freeQInfo(pQInfo);
5878 5879 5880
  return NULL;
}

H
hjxilinx 已提交
5881
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5882 5883 5884 5885
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5886

H
hjxilinx 已提交
5887 5888 5889 5890
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5891
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5892 5893 5894
  return (sig == (uint64_t)pQInfo);
}

H
Haojun Liao 已提交
5895
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param) {
H
hjxilinx 已提交
5896
  int32_t code = TSDB_CODE_SUCCESS;
5897
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5898

H
hjxilinx 已提交
5899 5900
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5901
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5902
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5903

H
hjxilinx 已提交
5904
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
5905 5906
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
5907
  }
5908

5909 5910
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
5911
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5912
           pQuery->window.ekey, pQuery->order.order);
5913
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
5914
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
5915 5916 5917
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5918

5919 5920
  pQInfo->param = param;

5921
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
5922
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
5923 5924 5925 5926 5927
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5928 5929

  // filter the qualified
5930
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5931 5932
    goto _error;
  }
H
hjxilinx 已提交
5933
  
H
hjxilinx 已提交
5934 5935 5936 5937
  return code;

_error:
  // table query ref will be decrease during error handling
5938
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5939 5940 5941
  return code;
}

B
Bomin Zhang 已提交
5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
5954 5955 5956 5957
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5958 5959

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5960
  setQueryKilled(pQInfo);
5961

5962
  qDebug("QInfo:%p start to free QInfo", pQInfo);
5963
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5964 5965
    tfree(pQuery->sdata[col]);
  }
5966

H
hjxilinx 已提交
5967
  sem_destroy(&(pQInfo->dataReady));
5968
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5969

H
hjxilinx 已提交
5970 5971 5972 5973 5974 5975
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5976

H
hjxilinx 已提交
5977
  if (pQuery->pSelectExpr != NULL) {
5978
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5979
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5980

H
hjxilinx 已提交
5981 5982 5983
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5984
    }
5985

H
hjxilinx 已提交
5986 5987
    tfree(pQuery->pSelectExpr);
  }
5988

5989 5990
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5991
  }
5992

5993
  // todo refactor, extract method to destroytableDataInfo
B
Bomin Zhang 已提交
5994 5995 5996 5997 5998 5999 6000 6001
  if (pQInfo->tableqinfoGroupInfo.pGroupList != NULL) {
    int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = GET_TABLEGROUP(pQInfo, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
H
Haojun Liao 已提交
6002
        destroyTableQueryInfo(item);
6003
      }
6004

B
Bomin Zhang 已提交
6005 6006
      taosArrayDestroy(p);
    }
H
hjxilinx 已提交
6007
  }
6008

H
Haojun Liao 已提交
6009
  tfree(pQInfo->pBuf);
6010
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
6011
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
H
Haojun Liao 已提交
6012
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6013
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
6014
  
6015 6016 6017 6018
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
6019

6020 6021
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
B
Bomin Zhang 已提交
6022 6023 6024 6025 6026 6027 6028 6029 6030

  if (pQuery->colList != NULL) {
    for (int32_t i = 0; i < pQuery->numOfCols; i++) {
      SColumnInfo* column = pQuery->colList + i;
      freeColumnFilterInfo(column->filters, column->numOfFilters);
    }
    tfree(pQuery->colList);
  }

6031 6032
  tfree(pQuery->sdata);
  tfree(pQuery);
6033

6034
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6035

6036
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
6037 6038 6039 6040
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
6041
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6042 6043
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6055
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6056 6057 6058 6059
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
6060
  }
H
hjxilinx 已提交
6061
}
6062

H
hjxilinx 已提交
6063 6064 6065
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6066

H
hjxilinx 已提交
6067 6068 6069
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6070

H
hjxilinx 已提交
6071 6072
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
6073 6074
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
6075
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6076
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
6077 6078
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
6079 6080
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
6081
      }
H
Haojun Liao 已提交
6082

H
hjxilinx 已提交
6083 6084 6085
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6086
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6087
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6088
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6089 6090 6091
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
6092
    }
6093

H
hjxilinx 已提交
6094 6095 6096 6097
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6098
  } else {
6099
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
6100
  }
6101

6102
  pQuery->rec.total += pQuery->rec.rows;
6103
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6104

6105
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6106
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6107 6108 6109
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
6110
  return TSDB_CODE_SUCCESS;
6111 6112
}

6113 6114 6115 6116 6117 6118 6119
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

H
Haojun Liao 已提交
6120
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, qinfo_t* pQInfo) {
6121
  assert(pQueryMsg != NULL && tsdb != NULL);
6122 6123

  int32_t code = TSDB_CODE_SUCCESS;
6124

weixin_48148422's avatar
weixin_48148422 已提交
6125
  char *        tagCond = NULL, *tbnameCond = NULL;
6126
  SArray *      pTableIdList = NULL;
6127
  SSqlFuncMsg **pExprMsg = NULL;
6128 6129
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
dengyihao's avatar
dengyihao 已提交
6130 6131
  SExprInfo     *pExprs = NULL;
  SSqlGroupbyExpr *pGroupbyExpr = NULL;
6132

weixin_48148422's avatar
weixin_48148422 已提交
6133
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
6134
         TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6135
    goto _over;
6136 6137
  }

H
hjxilinx 已提交
6138
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6139
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6140
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6141
    goto _over;
6142 6143
  }

H
hjxilinx 已提交
6144
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6145
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6146
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6147
    goto _over;
6148 6149
  }

H
Haojun Liao 已提交
6150
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6151
    goto _over;
6152 6153
  }

dengyihao's avatar
dengyihao 已提交
6154
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6155
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6156
    goto _over;
6157
  }
6158

H
hjxilinx 已提交
6159
  bool isSTableQuery = false;
6160
  STableGroupInfo tableGroupInfo = {0};
6161
  
H
Haojun Liao 已提交
6162
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6163
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6164

6165
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
6166
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6167
      goto _over;
6168
    }
H
Haojun Liao 已提交
6169
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6170
    isSTableQuery = true;
H
Haojun Liao 已提交
6171 6172 6173 6174
    // TODO: need a macro from TSDB to check if table is super table

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6175 6176 6177 6178 6179 6180 6181 6182
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
6183
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
6184
                                          numOfGroupByCols);
6185
      if (code != TSDB_CODE_SUCCESS) {
H
Haojun Liao 已提交
6186
        qError("qmsg:%p failed to QueryStable, reason: %s", pQueryMsg, tstrerror(code));
6187 6188
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6189
    } else {
6190 6191 6192 6193
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6194

6195
      qDebug("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6196
    }
H
hjxilinx 已提交
6197
  } else {
6198
    assert(0);
6199
  }
6200

6201
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6202 6203 6204 6205
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6206
  if ((*pQInfo) == NULL) {
6207
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6208
    goto _over;
6209
  }
6210

H
Haojun Liao 已提交
6211
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param);
6212

H
hjxilinx 已提交
6213
_over:
dengyihao's avatar
dengyihao 已提交
6214 6215 6216
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6217 6218
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6219
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6220
  } 
dengyihao's avatar
dengyihao 已提交
6221 6222
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6223
  free(pExprMsg);
H
hjxilinx 已提交
6224
  taosArrayDestroy(pTableIdList);
6225

B
Bomin Zhang 已提交
6226 6227 6228 6229 6230
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6231
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6232 6233 6234 6235
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6236
  // if failed to add ref for all meters in this query, abort current query
6237
  return code;
H
hjxilinx 已提交
6238 6239
}

H
Haojun Liao 已提交
6240
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6241 6242 6243 6244 6245
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6246 6247 6248
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6249 6250
}

6251
void qTableQuery(qinfo_t qinfo) {
6252 6253
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6254
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
6255
    qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
6256 6257
    return;
  }
6258

H
Haojun Liao 已提交
6259
  if (IS_QUERY_KILLED(pQInfo)) {
6260
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6261
    sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
6262 6263
    return;
  }
6264

6265 6266
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6267 6268 6269 6270 6271
    sem_post(&pQInfo->dataReady);
    return;
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6272
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6273 6274
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6275
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6276
    sem_post(&pQInfo->dataReady);
6277 6278 6279
    return;
  }

6280
  qDebug("QInfo:%p query task is launched", pQInfo);
6281

6282
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6283
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6284
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
6285
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
6286
  } else if (pQInfo->runtimeEnv.stableQuery) {
6287
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6288
  } else {
6289
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6290
  }
6291

6292
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6293
  if (IS_QUERY_KILLED(pQInfo)) {
6294 6295 6296 6297 6298 6299 6300 6301
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
    qDebug("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

H
hjxilinx 已提交
6302
  sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
6303 6304
}

H
hjxilinx 已提交
6305
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
6306 6307
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6308
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6309
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6310
  }
6311

H
hjxilinx 已提交
6312
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6313
  if (IS_QUERY_KILLED(pQInfo)) {
6314
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6315
    return pQInfo->code;
H
hjxilinx 已提交
6316
  }
6317

H
hjxilinx 已提交
6318
  sem_wait(&pQInfo->dataReady);
6319
  qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6320 6321
         pQInfo->code);

H
hjxilinx 已提交
6322
  return pQInfo->code;
H
hjxilinx 已提交
6323
}
6324

H
hjxilinx 已提交
6325
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6326 6327
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6328
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
6329
    qDebug("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6330 6331
    return false;
  }
6332 6333

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6334
  bool ret = false;
H
hjxilinx 已提交
6335
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6336
    ret = false;
H
hjxilinx 已提交
6337
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6338
    ret = true;
H
hjxilinx 已提交
6339
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6340
    ret = true;
H
hjxilinx 已提交
6341 6342
  } else {
    assert(0);
6343
  }
H
Haojun Liao 已提交
6344 6345

  if (ret) {
6346
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6347 6348 6349
  }

  return ret;
6350 6351
}

6352 6353 6354
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6355
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6356
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6357
  }
6358

6359
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6360 6361
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6362 6363
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6364
  *contLen = size + sizeof(SRetrieveTableRsp);
6365

6366 6367
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6368
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6369

6370 6371 6372
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6373
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6374 6375 6376 6377
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6378 6379
  
  (*pRsp)->precision = htons(pQuery->precision);
6380
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6381
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6382
  } else {
H
hjxilinx 已提交
6383
    setQueryStatus(pQuery, QUERY_OVER);
6384
    code = pQInfo->code;
6385
  }
6386

H
Haojun Liao 已提交
6387
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6388
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6389
  }
6390

H
hjxilinx 已提交
6391
  return code;
6392
}
H
hjxilinx 已提交
6393

H
Haojun Liao 已提交
6394
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6395 6396 6397 6398 6399 6400
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

6401
  sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
6402 6403 6404 6405
  setQueryKilled(pQInfo);
  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6406 6407 6408
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6409

H
Haojun Liao 已提交
6410
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6411
  assert(numOfGroup == 0 || numOfGroup == 1);
6412

H
Haojun Liao 已提交
6413
  if (numOfGroup == 0) {
6414 6415
    return;
  }
H
hjxilinx 已提交
6416
  
H
Haojun Liao 已提交
6417
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6418

H
Haojun Liao 已提交
6419
  size_t num = taosArrayGetSize(pa);
6420
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6421

H
Haojun Liao 已提交
6422
  int32_t count = 0;
6423 6424 6425
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6426

6427 6428
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6429
    count = 0;
6430

H
Haojun Liao 已提交
6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6442 6443
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6444
      STableQueryInfo *item = taosArrayGetP(pa, i);
6445

6446
      char *output = pQuery->sdata[0]->data + i * rsize;
6447
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6448

6449
      output = varDataVal(output);
H
Haojun Liao 已提交
6450
      STableId* id = TSDB_TABLEID(item->pTable);
6451

H
Haojun Liao 已提交
6452 6453
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6454

H
Haojun Liao 已提交
6455 6456
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6457

6458
      *(int32_t *)output = pQInfo->vgId;
6459
      output += sizeof(pQInfo->vgId);
6460

6461
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6462
        char *data = tsdbGetTableName(item->pTable);
6463
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6464
      } else {
6465
        char *val = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
6466 6467 6468 6469 6470 6471 6472 6473

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6474
        } else {
6475 6476
          if (val == NULL) {
            setNull(output, type, bytes);
H
Haojun Liao 已提交
6477
          } else {  // todo here stop will cause client crash
6478 6479
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6480 6481
        }
      }
6482

H
Haojun Liao 已提交
6483
      count += 1;
6484
    }
6485

6486
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6487

H
Haojun Liao 已提交
6488 6489 6490 6491 6492
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
6493
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6494
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6495
    count = 0;
H
Haojun Liao 已提交
6496
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6497 6498
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6499

6500
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6501
      STableQueryInfo* item = taosArrayGetP(pa, i);
6502

6503 6504
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6505
          char* data = tsdbGetTableName(item->pTable);
H
Haojun Liao 已提交
6506
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6507
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6508 6509 6510 6511
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
6512
          char* data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6513
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6514

H
hjxilinx 已提交
6515
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6516 6517 6518 6519 6520
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6521
          } else {
H
[td-90]  
Haojun Liao 已提交
6522 6523 6524 6525 6526
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6527
          }
6528
        }
H
hjxilinx 已提交
6529
      }
H
Haojun Liao 已提交
6530
      count += 1;
H
hjxilinx 已提交
6531
    }
6532

6533
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6534
  }
6535

H
Haojun Liao 已提交
6536
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6537
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6538 6539
}

6540 6541 6542 6543 6544 6545 6546
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6547
  qDestroyQueryInfo(*handle);
6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565
}

void* qOpenQueryMgmt(int32_t vgId) {
  const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

  SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt));

  pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryHandle->closed    = false;
  pthread_mutex_init(&pQueryHandle->lock, NULL);

  qDebug("vgId:%d, open querymgmt success", vgId);
  return pQueryHandle;
}

H
Haojun Liao 已提交
6566
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
6567 6568
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
6569 6570 6571
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = true;
  pthread_mutex_unlock(&pQueryMgmt->lock);

H
Haojun Liao 已提交
6583
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
  tfree(pQueryMgmt);

  qDebug("vgId:%d querymgmt cleanup completed", vgId);
}

6606
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6607 6608 6609 6610
  if (pMgmt == NULL) {
    return NULL;
  }

6611 6612
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2;

6613 6614
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
6615
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
6616 6617 6618 6619 6620 6621
    return NULL;
  }

  pthread_mutex_lock(&pQueryMgmt->lock);
  if (pQueryMgmt->closed) {
    pthread_mutex_unlock(&pQueryMgmt->lock);
6622
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
6623 6624
    return NULL;
  } else {
6625 6626 6627
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
6628 6629 6630 6631 6632 6633
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return handle;
  }
}

6634
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6635 6636 6637 6638 6639 6640
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6641
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, needFree);
  return 0;
}