qExecutor.c 219.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
16 17
#include "tcache.h"
#include "tglobal.h"
H
Haojun Liao 已提交
18
#include "qfill.h"
19
#include "taosmsg.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
23 24
#include "qExecutor.h"
#include "qUtil.h"
25
#include "qresultBuf.h"
H
hjxilinx 已提交
26
#include "query.h"
S
slguan 已提交
27
#include "queryLog.h"
H
Haojun Liao 已提交
28 29
#include "qast.h"
#include "tfile.h"
30 31 32
#include "tlosertree.h"
#include "tscompression.h"
#include "ttime.h"
33 34 35 36 37 38 39 40 41

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

42
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
43
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
44
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
45
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
46

H
Haojun Liao 已提交
47
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
48

49
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
50
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
51

H
Haojun Liao 已提交
52 53
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

54 55
/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
56 57
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
58

59
enum {
H
hjxilinx 已提交
60
  // when query starts to execute, this status will set
61 62
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
63 64
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
65
   */
66 67
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
68 69 70
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
71
   */
72
  QUERY_COMPLETED = 0x4u,
73

H
hjxilinx 已提交
74 75
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
76
   */
77
  QUERY_OVER = 0x8u,
78
};
79 80

enum {
81 82
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
83 84 85
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

86
typedef struct {
87 88 89 90 91 92
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
93 94
} SQueryStatusInfo;

H
Haojun Liao 已提交
95
#if 0
H
Haojun Liao 已提交
96
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
97 98 99 100
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
101
    return malloc(__size);
H
Haojun Liao 已提交
102
  }
H
Haojun Liao 已提交
103 104
}

H
Haojun Liao 已提交
105 106 107 108 109 110 111 112 113 114
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
115
#define malloc  u_malloc
H
Haojun Liao 已提交
116
#endif
H
Haojun Liao 已提交
117

118
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
119 120 121
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

122
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
123
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
124

H
Haojun Liao 已提交
125
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
126

H
Haojun Liao 已提交
127 128 129 130 131 132 133 134
// previous time window may not be of the same size of pQuery->intervalTime
#define GET_NEXT_TIMEWINDOW(_q, tw)                                   \
  do {                                                                \
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR((_q)->order.order); \
    (tw)->skey += ((_q)->slidingTime * factor);                       \
    (tw)->ekey = (tw)->skey + ((_q)->intervalTime - 1);               \
  } while (0)

H
hjxilinx 已提交
135
// todo move to utility
136
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
137

H
hjxilinx 已提交
138
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
139
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
140 141
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
142

143 144 145
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

146
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
147
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
148 149
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
150
static void buildTagQueryResult(SQInfo *pQInfo);
151

152
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
153
static int32_t flushFromResultBuf(SQInfo *pQInfo);
154

155
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
156 157
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
158

159 160
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
161 162
      return false;
    }
163

164 165
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
166
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
167

168 169 170 171 172
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
173

174 175 176 177
    if (!qualified) {
      return false;
    }
  }
178

179 180 181 182 183 184
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
185

186
  int64_t maxOutput = 0;
187
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
188
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
189

190 191 192 193 194 195 196 197
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
198

199 200 201 202 203
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
204

205
  assert(maxOutput >= 0);
206 207 208
  return maxOutput;
}

209 210 211 212 213 214 215 216 217
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
218 219 220 221 222 223 224
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
225 226 227 228
    pResInfo->numOfRes = numOfRes;
  }
}

229 230 231 232 233 234 235 236 237
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
238

239
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
240
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
241 242 243 244 245
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
246
        assert(pColIndex->colIndex > 0);
247
      }
248

249 250 251
      return true;
    }
  }
252

253 254 255 256 257
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
258

259 260
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
261

262
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
263
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
264 265 266 267 268
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
269

270
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
271 272
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
273 274 275
      break;
    }
  }
276

277 278 279 280 281 282
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
283

284
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
285
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
286 287 288 289
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
290

291 292 293 294
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
295

296 297 298
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
299

300 301 302
  return false;
}

303
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
304

305 306 307 308
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
309 310
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
311
    
312
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
313 314
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
315 316 317
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
318

319 320 321 322
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
323
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
324
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
325 326 327
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
328

329 330 331 332
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
333

334 335 336
  return false;
}

H
Haojun Liao 已提交
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

355 356 357 358 359 360 361 362
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
363 364
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
  if (TSDB_COL_IS_TAG(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
365 366
    return false;
  }
367

368
  if (pStatis != NULL) {
H
Haojun Liao 已提交
369 370
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
371 372
  } else {
    *pColStatis = NULL;
373
  }
374

375 376 377
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
378

379 380 381 382
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
383
                                             int16_t bytes, bool masterscan) {
384
  SQuery *pQuery = pRuntimeEnv->pQuery;
385

386
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
387 388
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
389 390 391 392 393 394 395 396 397 398 399 400
  } else {
    if (masterscan) {  // more than the capacity, reallocate the resources
      if (pWindowResInfo->size >= pWindowResInfo->capacity) {
        int64_t newCap = pWindowResInfo->capacity * 2;

        char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
        if (t != NULL) {
          pWindowResInfo->pResult = (SWindowResult *)t;
          memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
        } else {
          // todo
        }
401

402 403
        for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
          SPosInfo pos = {-1, -1};
H
Haojun Liao 已提交
404
          createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos, pRuntimeEnv->interBufSize);
405 406
        }
        pWindowResInfo->capacity = newCap;
407
      }
408

409 410 411 412 413 414
      // add a new result set for a new group
      pWindowResInfo->curIndex = pWindowResInfo->size++;
      taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
    } else {
      return NULL;
    }
415
  }
416

417 418 419 420 421 422
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
423

424 425 426 427 428 429 430
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
431

432 433
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
434

435 436 437
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
438

439 440 441 442
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
443

444 445 446
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
447

448 449 450 451 452 453 454
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
455

456
  assert(ts >= w.skey && ts <= w.ekey);
457

458 459 460 461 462 463 464 465
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
466

467
  tFilePage *pData = NULL;
468

469 470 471
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
472

473 474 475 476
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
H
Haojun Liao 已提交
477
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
478

479
    if (pData->num >= numOfRowsPerPage) {
480 481
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
482
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
483 484 485
      }
    }
  }
486

487 488 489
  if (pData == NULL) {
    return -1;
  }
490

491 492 493
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
494
    pWindowRes->pos.rowId = pData->num++;
495
  }
496

497 498 499 500
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
501
                                       STimeWindow *win, bool masterscan, bool* newWind) {
502 503
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
504

505 506
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
507
  if (pWindowRes == NULL) {
508 509 510
    *newWind = false;

    return masterscan? -1:0;
511
  }
512

513
  *newWind = true;
514 515 516 517 518 519 520
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
521

522 523
  // set time window for current result
  pWindowRes->window = *win;
524

H
Haojun Liao 已提交
525
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
526 527 528 529 530 531 532 533
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
534
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
535 536
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
537

H
Haojun Liao 已提交
538 539 540 541
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
542

H
Haojun Liao 已提交
543 544 545 546 547 548 549 550 551 552 553 554
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
555 556
    }
  }
557

H
Haojun Liao 已提交
558
  assert(forwardStep > 0);
559 560 561 562 563 564
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
565
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
566
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
567
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
568
    return pWindowResInfo->size;
569
  }
570

571
  // no qualified results exist, abort check
572 573
  int32_t numOfClosed = 0;
  
574
  if (pWindowResInfo->size == 0) {
575
    return pWindowResInfo->size;
576
  }
577

578
  // query completed
H
hjxilinx 已提交
579 580
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
581
    closeAllTimeWindow(pWindowResInfo);
582

583 584 585 586
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
587
    int64_t skey = TSKEY_INITIAL_VAL;
588

589 590 591
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
592
        numOfClosed += 1;
593 594
        continue;
      }
595

596 597 598 599 600 601 602 603
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
604

605
    // all windows are closed, set the last one to be the skey
606
    if (skey == TSKEY_INITIAL_VAL) {
607 608 609 610 611
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
612

613
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
614

615 616
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
617
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
618 619
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
620
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
621
    } else {
622
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
623
             numOfClosed);
624 625
    }
  }
626 627 628 629 630 631 632
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
633
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
634
  return numOfClosed;
635 636 637
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
638
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
639
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
640

641 642 643
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
644

H
hjxilinx 已提交
645 646
  STableQueryInfo* item = pQuery->current;
  
647 648
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
649
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
650 651 652 653
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
654
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
655 656 657
        }
      }
    } else {
658
      num = pDataBlockInfo->rows - startPos;
659
      if (updateLastKey) {
H
hjxilinx 已提交
660
        item->lastKey = pDataBlockInfo->window.ekey + step;
661 662 663 664
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
665
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
666 667 668 669
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
670
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
671 672 673 674 675
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
676
        item->lastKey = pDataBlockInfo->window.skey + step;
677 678 679
      }
    }
  }
680

H
Haojun Liao 已提交
681
  assert(num > 0);
682 683 684 685
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
686
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
687 688
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
689

690 691 692
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
693

694 695 696
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
697

698 699 700
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
701

702 703 704 705
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
706

707 708 709
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
710 711 712 713 714 715 716 717
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
718

719 720 721
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
722

723 724 725 726
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
727 728 729 730
    }
  }
}

H
Haojun Liao 已提交
731 732
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
733
  SQuery *pQuery = pRuntimeEnv->pQuery;
734

H
Haojun Liao 已提交
735
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
736

H
Haojun Liao 已提交
737
  // next time window is not in current block
H
Haojun Liao 已提交
738 739
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
740 741
    return -1;
  }
742

H
Haojun Liao 已提交
743 744
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
745
    startKey = pNext->skey;
H
Haojun Liao 已提交
746 747
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
748
    }
H
Haojun Liao 已提交
749
  } else {
H
Haojun Liao 已提交
750
    startKey = pNext->ekey;
H
Haojun Liao 已提交
751 752
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
753
    }
H
Haojun Liao 已提交
754
  }
755

H
Haojun Liao 已提交
756 757 758 759 760 761 762 763
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime && prevPosition != -1) {
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
764

H
Haojun Liao 已提交
765 766 767 768
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
769
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
770
    TSKEY next = primaryKeys[startPos];
771

H
Haojun Liao 已提交
772 773 774
    pNext->ekey += ((next - pNext->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNext->skey = pNext->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
775
    TSKEY next = primaryKeys[startPos];
776

H
Haojun Liao 已提交
777 778
    pNext->skey -= ((pNext->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNext->ekey = pNext->skey + pQuery->intervalTime - 1;
779
  }
780

H
Haojun Liao 已提交
781
  return startPos;
782 783
}

H
Haojun Liao 已提交
784
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
785 786 787 788 789 790 791 792 793 794 795 796
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
797

798 799 800
  return ekey;
}

H
hjxilinx 已提交
801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
816
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
817 818 819
  if (pDataBlock == NULL) {
    return NULL;
  }
820

H
Haojun Liao 已提交
821
  char *dataBlock = NULL;
H
Haojun Liao 已提交
822
  SQuery *pQuery = pRuntimeEnv->pQuery;
823
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
824

825
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
826
  if (functionId == TSDB_FUNC_ARITHM) {
827
    sas->pArithExpr = &pQuery->pSelectExpr[col];
828

829 830 831 832 833 834
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
835

836 837 838 839
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
840

H
Haojun Liao 已提交
841
    if (sas->data == NULL) {
H
Haojun Liao 已提交
842
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
843 844 845
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

846
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
847
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
848
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
849
      SColumnInfo *pColMsg = &pQuery->colList[i];
850

851 852 853 854 855 856 857 858
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
859

860
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
861
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
862
    }
863

864
  } else {  // other type of query function
865
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
866
    if (TSDB_COL_IS_TAG(pCol->flag)) {
867 868
      dataBlock = NULL;
    } else {
H
Haojun Liao 已提交
869 870 871 872 873
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
874 875
    }
  }
876

877 878 879 880
  return dataBlock;
}

/**
H
Haojun Liao 已提交
881
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
882 883
 * @param pRuntimeEnv
 * @param forwardStep
884
 * @param tsCols
885 886 887 888 889
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
890
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
891 892
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
893
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
894 895
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

896 897
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
898
  if (pDataBlock != NULL) {
899
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
900
    tsCols = (TSKEY *)(pColInfo->pData);
901
  }
902

903
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
904
  if (sasArray == NULL) {
H
Haojun Liao 已提交
905
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
906 907
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
908

909
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
910
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
911
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
912
  }
913

914
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
915
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && tsCols != NULL) {
916
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
917
    TSKEY   ts = tsCols[offset];
918

919
    bool hasTimeWindow = false;
920
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
921
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
922
      tfree(sasArray);
H
hjxilinx 已提交
923
      return;
924
    }
925

H
Haojun Liao 已提交
926 927 928
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

929 930
    if (hasTimeWindow) {
      TSKEY   ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
931
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
932

933
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
H
Haojun Liao 已提交
934
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
935
    }
936

937 938
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
939

940
    while (1) {
H
Haojun Liao 已提交
941 942
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
943 944 945
      if (startPos < 0) {
        break;
      }
946

947
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
948
      hasTimeWindow = false;
949
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
950 951
        break;
      }
952

953 954 955 956 957
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
958
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
959

960
      SWindowStatus* pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
961
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
962
    }
963

964 965 966 967 968 969 970
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
971
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
972
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
973 974 975 976 977
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
978

979 980 981 982
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
983

984 985
    tfree(sasArray[i].data);
  }
986

987 988 989 990 991 992 993
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
994

995
  int32_t GROUPRESULTID = 1;
996

997
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
998

999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
1010
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1011 1012 1013
  if (pWindowRes == NULL) {
    return -1;
  }
1014

1015 1016 1017
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

1018 1019 1020 1021 1022 1023
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1024

1025 1026 1027 1028 1029
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1030
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1031
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1032

1033
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1034 1035
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1036 1037
      continue;
    }
1038

1039
    int16_t colIndex = -1;
1040
    int32_t colId = pColIndex->colId;
1041

1042
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1043
      if (pQuery->colList[i].colId == colId) {
1044 1045 1046 1047
        colIndex = i;
        break;
      }
    }
1048

1049
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1050

1051 1052
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1053 1054 1055 1056 1057 1058
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
1059

1060 1061 1062 1063 1064 1065
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1066
  }
1067

1068
  return NULL;
1069 1070 1071 1072
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1073

1074 1075
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1076

1077 1078 1079 1080
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1081

1082 1083 1084
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1085 1086
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1087 1088
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1089

1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1103

1104 1105 1106 1107 1108
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1109
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1110 1111 1112 1113 1114

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1115
  
1116 1117 1118
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1119

1120
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1121 1122
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1123 1124 1125 1126 1127 1128

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1129
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1130
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1131 1132
    return false;
  }
1133

1134 1135 1136
  return true;
}

1137 1138
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1139
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1140
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1141

1142
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1143
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1144 1145 1146 1147

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1148 1149
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1150
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1151
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1152
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1153 1154
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1155

1156 1157
  int16_t type = 0;
  int16_t bytes = 0;
1158

1159
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1160
  if (groupbyColumnValue) {
1161
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1162
  }
1163

1164
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1165
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1166
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1167
  }
1168

1169 1170
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1171
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1172 1173
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1174
  }
1175

1176
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1177

1178 1179 1180
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1181
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1182
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1183 1184
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1185

1186
  int32_t j = 0;
H
hjxilinx 已提交
1187
  int32_t offset = -1;
1188

1189
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1190
    offset = GET_COL_DATA_POS(pQuery, j, step);
1191

1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1202

1203
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1204 1205
      continue;
    }
1206

1207
    // interval window query
H
Haojun Liao 已提交
1208
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1209
      // decide the time window according to the primary timestamp
1210
      int64_t     ts = tsCols[offset];
1211
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1212

1213 1214
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1215 1216 1217
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1218

1219 1220 1221 1222
      if (!hasTimeWindow) {
        continue;
      }

1223 1224
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1225

1226 1227
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1228

1229
      while (1) {
H
Haojun Liao 已提交
1230
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
H
Haojun Liao 已提交
1231
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1232
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1233
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1234 1235
          break;
        }
1236

1237 1238 1239
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1240

1241
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1242
        hasTimeWindow = false;
1243
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1244 1245
          break;
        }
1246

1247 1248 1249 1250
        if (hasTimeWindow) {
          pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
        }
1251
      }
1252

1253 1254 1255
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1256
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1257
        char *val = groupbyColumnData + bytes * offset;
1258

H
hjxilinx 已提交
1259
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1260 1261 1262 1263
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1264

1265
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1266
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1267 1268 1269 1270 1271
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1272

1273 1274 1275
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1276
        setQueryStatus(pQuery, QUERY_COMPLETED);
1277 1278 1279 1280
        break;
      }
    }
  }
H
Haojun Liao 已提交
1281 1282 1283 1284 1285 1286 1287 1288

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1289 1290 1291 1292 1293
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1294

1295 1296
    tfree(sasArray[i].data);
  }
1297

1298 1299 1300 1301
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1302
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1303
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1304 1305 1306
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1307
  
H
Haojun Liao 已提交
1308
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1309
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1310
  } else {
1311
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1312
  }
1313

1314
  // update the lastkey of current table
1315
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1316
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1317

1318
  // interval query with limit applied
1319
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1320
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1321 1322 1323
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1324

1325 1326 1327 1328
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1329

1330 1331 1332
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1333

1334 1335 1336
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1337
    }
1338
  }
1339

1340
  return numOfRes;
1341 1342
}

H
Haojun Liao 已提交
1343
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1344 1345 1346 1347 1348 1349
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1350
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1351
  pCtx->aInputElemBuf = inputData;
1352

1353
  if (tpField != NULL) {
H
Haojun Liao 已提交
1354
    pCtx->preAggVals.isSet  = true;
1355 1356
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1357 1358 1359
  } else {
    pCtx->preAggVals.isSet = false;
  }
1360

H
Haojun Liao 已提交
1361 1362 1363
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1364

1365 1366
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1367
    pCtx->ptsList = tsCol;
1368
  }
1369

1370 1371 1372 1373 1374
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1375
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1376
    /*
H
Haojun Liao 已提交
1377
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1378 1379 1380 1381 1382 1383 1384 1385 1386 1387
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1388

1389 1390
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1391 1392 1393 1394 1395 1396
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1410
  }
1411

1412 1413 1414 1415 1416 1417
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1418
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1419 1420 1421
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1422
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1423 1424 1425 1426 1427 1428
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1429 1430 1431
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1432
  if (isSelectivityWithTagsQuery(pQuery)) {
1433
    int32_t num = 0;
1434
    int16_t tagLen = 0;
1435 1436
    
    SQLFunctionCtx *p = NULL;
1437
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1438

1439
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1440
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1441
      
1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1455 1456 1457 1458 1459 1460 1461
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
      tfree(pTagCtx); 
    }
1462 1463 1464
  }
}

H
Haojun Liao 已提交
1465 1466
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1467
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1468 1469 1470 1471
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1472 1473 1474
  }
}

1475
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1476
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1477 1478
  SQuery *pQuery = pRuntimeEnv->pQuery;

1479 1480
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1481

1482
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1483
    goto _clean;
1484
  }
1485

1486
  pRuntimeEnv->offset[0] = 0;
1487
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1488
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1489

1490
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1491
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1492

1493 1494
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1495
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1496 1497 1498 1499
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1500 1501 1502 1503
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1504
      
1505 1506 1507 1508
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1509
  
1510
    assert(isValidDataType(pCtx->inputType));
1511
    pCtx->ptsOutputBuf = NULL;
1512

1513 1514
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1515

1516 1517
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1518

1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1529

1530 1531
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1532

1533
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1534
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1535
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1536

1537 1538 1539 1540
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1541

1542 1543
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1544

1545 1546 1547 1548
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1549

H
Haojun Liao 已提交
1550 1551
  char* buf = calloc(1, pRuntimeEnv->interBufSize);

1552
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1553
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1554

1555
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1556
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1557 1558
    resetCtxOutputBuf(pRuntimeEnv);
  }
1559

H
Haojun Liao 已提交
1560
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1561
  return TSDB_CODE_SUCCESS;
1562

1563
_clean:
1564 1565
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1566

1567
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1568 1569 1570 1571 1572 1573
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1574

1575
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1576
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1577

1578
  qDebug("QInfo:%p teardown runtime env", pQInfo);
1579
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1580

1581
  if (pRuntimeEnv->pCtx != NULL) {
1582
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1583
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1584

1585 1586 1587
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1588

1589 1590 1591
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
    }
1592

H
Haojun Liao 已提交
1593
    tfree(pRuntimeEnv->resultInfo[0].interResultBuf);
1594 1595 1596
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1597

H
Haojun Liao 已提交
1598
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1599

H
hjxilinx 已提交
1600
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1601
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1602
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1603

H
Haojun Liao 已提交
1604
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1605 1606
}

H
Haojun Liao 已提交
1607
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1608

1609
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
H
hjxilinx 已提交
1610

H
Haojun Liao 已提交
1611 1612 1613
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1614 1615
    return false;
  }
1616

1617
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1618
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1619 1620
    return true;
  }
1621

1622
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1623
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1624

1625 1626
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1627
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1628 1629
      continue;
    }
1630

1631 1632 1633
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1634

1635 1636 1637 1638
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1639

1640 1641 1642
  return false;
}

1643
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1644
static bool isPointInterpoQuery(SQuery *pQuery) {
1645
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1646
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1647
    if (functionID == TSDB_FUNC_INTERP) {
1648 1649 1650
      return true;
    }
  }
1651

1652 1653 1654 1655
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1656
static bool isSumAvgRateQuery(SQuery *pQuery) {
1657
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1658
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1659 1660 1661
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1662

1663 1664 1665 1666 1667
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1668

1669 1670 1671
  return false;
}

H
hjxilinx 已提交
1672
static bool isFirstLastRowQuery(SQuery *pQuery) {
1673
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1674
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1675 1676 1677 1678
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1679

1680 1681 1682
  return false;
}

H
hjxilinx 已提交
1683
static bool needReverseScan(SQuery *pQuery) {
1684
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1685
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1686 1687 1688
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1689

1690
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1691 1692
      return true;
    }
1693 1694 1695 1696 1697

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1698
  }
1699

1700 1701
  return false;
}
H
hjxilinx 已提交
1702 1703 1704

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1705 1706 1707 1708 1709
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1710 1711 1712
      return false;
    }
  }
1713

H
hjxilinx 已提交
1714 1715 1716
  return true;
}

1717 1718
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1719
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1720
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1721
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1722 1723 1724 1725 1726

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1727 1728 1729 1730
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1731 1732
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1733 1734 1735 1736 1737
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1738
    pQuery->checkBuffer = 0;
1739
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1740
    pQuery->checkBuffer = 0;
1741 1742
  } else {
    bool hasMultioutput = false;
1743
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1744
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1745 1746 1747
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1748

1749 1750 1751 1752 1753
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1754

1755
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1756 1757 1758 1759 1760 1761
  }
}

/*
 * todo add more parameters to check soon..
 */
1762
bool colIdCheck(SQuery *pQuery) {
1763 1764
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1765
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1766
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1767 1768 1769
      return false;
    }
  }
1770
  
1771 1772 1773 1774 1775 1776
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1777
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1778
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1779

1780 1781 1782 1783
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1784

1785 1786 1787 1788
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1789

1790 1791 1792 1793 1794 1795 1796
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
    }
  }
}

H
Haojun Liao 已提交
1811 1812 1813
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1814 1815 1816
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1817

1818 1819 1820
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1821
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1822
           pQuery->order.order, TSDB_ORDER_DESC);
1823

1824
    pQuery->order.order = TSDB_ORDER_DESC;
1825

1826 1827
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1828

1829 1830
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1831

1832 1833
    return;
  }
1834

1835 1836
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1837
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1838
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1839 1840
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1841

1842
    pQuery->order.order = TSDB_ORDER_ASC;
1843 1844
    return;
  }
1845

1846 1847 1848
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1849
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1850 1851
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1852
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1853
        doExchangeTimeWindow(pQInfo);
1854
      }
1855

1856
      pQuery->order.order = TSDB_ORDER_ASC;
1857 1858
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1859
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1860 1861
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1862
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1863
        doExchangeTimeWindow(pQInfo);
1864
      }
1865

1866
      pQuery->order.order = TSDB_ORDER_DESC;
1867
    }
1868

1869
  } else {  // interval query
1870
    if (stableQuery) {
1871 1872
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1873
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1874 1875
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1876 1877
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1878

1879
        pQuery->order.order = TSDB_ORDER_ASC;
1880 1881
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1882
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1883 1884
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1885 1886
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1887

1888
        pQuery->order.order = TSDB_ORDER_DESC;
1889 1890 1891 1892 1893 1894 1895 1896
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1897

1898
  int32_t num = 0;
1899

1900 1901
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1902
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1903
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1904
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1905 1906
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1907
  }
1908

1909 1910 1911 1912
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1913
#define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pSelectExpr[1].base.arg->argValue.i64:1)
1914

H
Haojun Liao 已提交
1915 1916
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1917
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1918 1919 1920 1921
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1922

H
Haojun Liao 已提交
1923 1924 1925
  SQuery    *pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = GET_RES_BUF_PAGE_BY_ID(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
  int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
1926

H
Haojun Liao 已提交
1927
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
1928
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1929 1930
}

H
Haojun Liao 已提交
1931
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_DOUBLE && (_t) != TSDB_DATA_TYPE_FLOAT)
1932

H
Haojun Liao 已提交
1933 1934 1935 1936
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
1937 1938 1939 1940 1941
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1942

H
Haojun Liao 已提交
1943 1944 1945 1946 1947 1948 1949 1950 1951
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

    if (index == -1) {
1952 1953
      continue;
    }
1954

1955
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
1956
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
1957 1958
      continue;
    }
1959

1960
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
1961
    if (pDataStatis[index].numOfNull == numOfRows) {
1962 1963
      continue;
    }
1964

H
Haojun Liao 已提交
1965 1966 1967 1968 1969
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataBlockst->min);
      float maxval = *(double *)(&pDataBlockst->max);
1970

1971 1972 1973 1974 1975 1976 1977
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
1978
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
1979 1980 1981 1982 1983
          return true;
        }
      }
    }
  }
1984

H
Haojun Liao 已提交
1985 1986 1987 1988 1989 1990 1991 1992
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
1993

H
Haojun Liao 已提交
1994
  return false;
1995 1996
}

H
Haojun Liao 已提交
1997
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock) {
1998
  SQuery *pQuery = pRuntimeEnv->pQuery;
1999

H
Haojun Liao 已提交
2000
  uint32_t status = 0;
2001
  if (pQuery->numOfFilterCols > 0) {
H
Haojun Liao 已提交
2002 2003
    status = BLK_DATA_ALL_NEEDED;
  } else { // check if this data block is required to load
2004
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2005
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
H
Haojun Liao 已提交
2006

2007 2008
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
H
Haojun Liao 已提交
2009
      status |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
2010
    }
2011

H
Haojun Liao 已提交
2012
    if (pRuntimeEnv->pTSBuf > 0 || QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
2013
      status |= BLK_DATA_ALL_NEEDED;
2014 2015
    }
  }
2016

H
Haojun Liao 已提交
2017 2018 2019
  if (status == BLK_DATA_NO_NEEDED) {
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2020
    pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2021
  } else if (status == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2022
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2023
      //        return DISK_DATA_LOAD_FAILED;
2024
    }
2025 2026 2027 2028
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2029
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2030
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2031 2032
    }
  } else {
H
Haojun Liao 已提交
2033
    assert(status == BLK_DATA_ALL_NEEDED);
2034 2035
  
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2036
    pRuntimeEnv->summary.loadBlockStatis += 1;
H
hjxilinx 已提交
2037
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2038
    }
2039
    
H
Haojun Liao 已提交
2040
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2041
#if defined(_DEBUG_VIEW)
2042
      qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2043
#endif
2044 2045
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2046 2047 2048
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
      return BLK_DATA_DISCARD;
2049
    }
2050
  
2051
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2052
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2053
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2054
  }
2055

H
Haojun Liao 已提交
2056
  return TSDB_CODE_SUCCESS;
2057 2058
}

H
hjxilinx 已提交
2059
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2060
  int32_t midPos = -1;
H
Haojun Liao 已提交
2061
  int32_t numOfRows;
2062

2063 2064 2065
  if (num <= 0) {
    return -1;
  }
2066

2067
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2068 2069

  TSKEY * keyList = (TSKEY *)pValue;
2070
  int32_t firstPos = 0;
2071
  int32_t lastPos = num - 1;
2072

2073
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2074 2075 2076 2077 2078
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2079

H
Haojun Liao 已提交
2080 2081
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2082

H
hjxilinx 已提交
2083 2084 2085 2086 2087 2088 2089 2090
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2091

H
hjxilinx 已提交
2092 2093 2094 2095 2096
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2097

H
hjxilinx 已提交
2098 2099 2100 2101 2102 2103 2104
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2105

H
Haojun Liao 已提交
2106 2107
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2108

H
hjxilinx 已提交
2109 2110 2111 2112 2113 2114 2115 2116 2117
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2118

H
hjxilinx 已提交
2119 2120 2121
  return midPos;
}

2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2144
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2145 2146 2147 2148 2149
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2150 2151 2152
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2153
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv)) {
2154 2155 2156 2157 2158 2159 2160 2161
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2162 2163
        assert(bytes > 0 && newSize > 0);

2164 2165 2166 2167
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
H
Hongze Cheng 已提交
2168
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (newSize - pRec->rows) * bytes);
2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2181
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2182 2183 2184 2185 2186 2187 2188
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2189 2190
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2191
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2192
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2193

2194
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2195 2196
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2197

2198
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2199
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2200

H
Haojun Liao 已提交
2201
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
2202
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2203
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2204

H
Haojun Liao 已提交
2205
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2206 2207
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2208
    }
2209

H
Haojun Liao 已提交
2210
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2211

2212
    // todo extract methods
H
Haojun Liao 已提交
2213
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2214
      STimeWindow w = TSWINDOW_INITIALIZER;
2215 2216
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2217
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2218
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
2219 2220 2221 2222
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2223
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
2224

H
hjxilinx 已提交
2225
        pWindowResInfo->startTime = pQuery->window.skey;
2226 2227 2228
        pWindowResInfo->prevSKey = w.skey;
      }
    }
2229

H
hjxilinx 已提交
2230
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2231
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2232

2233
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2234 2235 2236 2237 2238
    SArray *pDataBlock   = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }
2239

H
Haojun Liao 已提交
2240 2241
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2242
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2243

H
Haojun Liao 已提交
2244
    summary->totalRows += blockInfo.rows;
2245
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2246
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2247

2248 2249
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2250
      break;
2251 2252
    }
  }
2253

H
hjxilinx 已提交
2254
  // if the result buffer is not full, set the query complete
2255 2256 2257
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2258

H
Haojun Liao 已提交
2259
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2260
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2261
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
2262
//      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2263
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2264 2265 2266 2267
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2268

2269
  return 0;
2270 2271 2272 2273 2274 2275
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2276
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2277
  tVariantDestroy(tag);
2278

2279
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2280
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2281 2282 2283
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2284
  } else {
2285
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2286 2287 2288 2289
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2290 2291
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2292
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2293 2294 2295 2296
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2297
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2298
    } else {
H
Haojun Liao 已提交
2299 2300 2301 2302 2303
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2304
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2305
    }
2306
  }
2307 2308
}

2309
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2310
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2311
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2312

H
[td-90]  
Haojun Liao 已提交
2313 2314 2315
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2316 2317 2318 2319 2320 2321 2322 2323 2324 2325

    // todo refactor extract function.
    int16_t type = -1, bytes = -1;
    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
        type = pQuery->tagColList[i].type;
        bytes = pQuery->tagColList[i].bytes;
      }
    }

2326
    doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2327 2328
  } else {
    // set tag value, by which the results are aggregated.
2329
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2330
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2331
  
2332
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2333
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2334 2335
        continue;
      }
2336

2337
      // todo use tag column index to optimize performance
2338
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2339
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2340
    }
2341

2342
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2343
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2344
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2345 2346
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2347 2348 2349 2350 2351 2352 2353 2354 2355 2356

      // todo refactor
      int16_t type = -1, bytes = -1;
      for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
        if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
          type = pQuery->tagColList[i].type;
          bytes = pQuery->tagColList[i].bytes;
        }
      }

2357
      doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2358
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2359
          pRuntimeEnv->pCtx[0].tag.i64Key)
2360 2361 2362 2363 2364 2365 2366
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2367

2368
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2369
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2370 2371 2372
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2373

2374 2375 2376
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2377

2378 2379 2380
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2381

2382 2383 2384
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2385 2386 2387 2388 2389 2390 2391 2392
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2393 2394
    }
  }
2395

2396
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2397
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2398 2399 2400
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2401

2402 2403 2404 2405
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2406
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2475
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2476
  SQuery* pQuery = pRuntimeEnv->pQuery;
2477
  int32_t numOfCols = pQuery->numOfOutput;
2478
  printf("super table query intermediate result, total:%d\n", numOfRows);
2479

2480 2481
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2482
      
2483
      switch (pQuery->pSelectExpr[i].type) {
2484
        case TSDB_DATA_TYPE_BINARY: {
2485 2486 2487 2488 2489
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2490
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2491 2492 2493 2494 2495
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2496
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2497 2498
          break;
        case TSDB_DATA_TYPE_INT:
2499
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2500 2501
          break;
        case TSDB_DATA_TYPE_FLOAT:
2502
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2503 2504
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2505
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2506 2507 2508 2509 2510 2511 2512 2513
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2514 2515 2516
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2517 2518 2519 2520 2521
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2522

2523 2524
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2525

2526 2527
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2528

2529 2530 2531 2532
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2533

2534 2535 2536 2537
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2538

H
hjxilinx 已提交
2539
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2540
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2541

2542 2543
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2544

H
hjxilinx 已提交
2545
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2546
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2547

2548 2549
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2550

2551 2552 2553
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2554

2555 2556 2557
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2558
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2559
  int64_t st = taosGetTimestampMs();
2560
  int32_t ret = TSDB_CODE_SUCCESS;
2561

H
Haojun Liao 已提交
2562
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
2563

2564
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2565
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2566
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2567 2568 2569 2570
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2571
    pQInfo->groupIndex += 1;
2572 2573

    // this group generates at least one result, return results
2574 2575 2576
    if (ret > 0) {
      break;
    }
2577 2578

    assert(pQInfo->numOfGroupResultPages == 0);
2579
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2580
  }
2581

2582
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2583
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2584

2585 2586 2587 2588 2589 2590
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2591

2592
    // current results of group has been sent to client, try next group
2593
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2594 2595
      return;  // failed to save data in the disk
    }
2596

2597
    // check if all results has been sent to client
H
Haojun Liao 已提交
2598
    int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
2599
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2600
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2601 2602
      return;
    }
2603
  }
2604 2605

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2606
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2607

2608
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2609
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2610

2611 2612
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
H
Haojun Liao 已提交
2613
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[i]);
2614
    total += pData->num;
2615
  }
2616

2617
  int32_t rows = total;
2618

2619 2620
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
H
Haojun Liao 已提交
2621
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[num]);
2622

2623
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2624
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2625
      char *  pDest = pQuery->sdata[i]->data;
2626

2627 2628
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2629
    }
2630

2631
    offset += pData->num;
2632
  }
2633

2634
  assert(pQuery->rec.rows == 0);
2635

2636
  pQuery->rec.rows += rows;
2637 2638 2639
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2640
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2641
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2642
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2643

2644 2645 2646 2647 2648 2649 2650
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2651

2652
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2653
    assert(pResultInfo != NULL);
2654

H
Haojun Liao 已提交
2655 2656
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2657 2658
    }
  }
2659

H
Haojun Liao 已提交
2660
  return 0;
2661 2662
}

2663
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2664
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2665
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2666

2667
  size_t size = taosArrayGetSize(pGroup);
2668
  tFilePage **buffer = pQuery->sdata;
2669

2670
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2671
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2672

2673 2674 2675 2676 2677
  if (pTableList == NULL || posList == NULL) {
    tfree(posList);
    tfree(pTableList);

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2678
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2679 2680
  }

2681
  // todo opt for the case of one table per group
2682
  int32_t numOfTables = 0;
2683
  for (int32_t i = 0; i < size; ++i) {
2684
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2685

H
Haojun Liao 已提交
2686
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
2687 2688
    if (list.size > 0 && item->windowResInfo.size > 0) {
      pTableList[numOfTables] = item;
2689
      numOfTables += 1;
2690 2691
    }
  }
2692

2693
  if (numOfTables == 0) {
2694 2695
    tfree(posList);
    tfree(pTableList);
2696

2697 2698
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2699
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2700

2701
  }
2702

2703
  SCompSupporter cs = {pTableList, posList, pQInfo};
2704

2705
  SLoserTreeInfo *pTree = NULL;
2706
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2707

2708
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2709 2710 2711 2712
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2713 2714
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
2715
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2716

2717 2718
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2719

2720 2721
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2722

H
hjxilinx 已提交
2723
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2724
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2725

2726 2727
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2728

2729
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2730
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2731 2732
    if (num <= 0) {
      cs.position[pos] += 1;
2733

2734 2735
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2736

2737
        // all input sources are exhausted
2738
        if (--numOfTables == 0) {
2739 2740 2741 2742 2743 2744 2745
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2746
        if (buffer[0]->num == pQuery->rec.capacity) {
2747 2748 2749
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2750

2751 2752
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2753

2754
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2755
        buffer[0]->num += 1;
2756
      }
2757

2758
      lastTimestamp = ts;
2759

2760 2761 2762
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2763

2764
        // all input sources are exhausted
2765
        if (--numOfTables == 0) {
2766 2767 2768 2769
          break;
        }
      }
    }
2770

2771 2772
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2773

2774
  if (buffer[0]->num != 0) {  // there are data in buffer
2775
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2776
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2777

2778 2779 2780 2781
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2782

2783 2784 2785
      return -1;
    }
  }
2786

2787 2788 2789
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2790
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2791
#endif
2792

2793
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2794

2795 2796
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2797
  tfree(pTree);
2798

2799
  pQInfo->offset = 0;
2800

2801
  tfree(pResultInfo);
H
Haojun Liao 已提交
2802
  tfree(buf);
2803 2804 2805 2806
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2807 2808 2809
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2810
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2811
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2812

2813 2814
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2815

2816
  int32_t remain = pQuery->sdata[0]->num;
2817
  int32_t offset = 0;
2818

2819 2820 2821 2822 2823
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2824

2825
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2826
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2827

2828
    // pagewise copy to dest buffer
2829
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2830
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2831
      buf->num = r;
2832

2833 2834
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2835
    }
2836

2837 2838 2839
    offset += r;
    remain -= r;
  }
2840

2841 2842 2843 2844 2845
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2846
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2847
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2848 2849 2850
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2851

2852
    pQuery->sdata[k]->num = 0;
2853 2854 2855
  }
}

2856 2857 2858 2859 2860 2861 2862
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2863 2864 2865 2866 2867 2868 2869
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2870 2871 2872 2873 2874 2875 2876 2877
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
2878 2879 2880

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
2881 2882 2883 2884 2885
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2886 2887 2888 2889 2890
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2891

2892
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2893

2894
    // open/close the specified query for each group result
2895
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2896
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2897

2898 2899
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2900 2901 2902 2903 2904 2905 2906 2907
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2908 2909
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2910
  SQuery *pQuery = pRuntimeEnv->pQuery;
2911
  int32_t order = pQuery->order.order;
2912

2913 2914
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
2915
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
2916
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2917
  } else {  // for simple result of table query,
2918
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2919
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2920

2921
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2922 2923 2924
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2925

2926 2927
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2928 2929 2930 2931 2932 2933
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2934
  
H
Haojun Liao 已提交
2935
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
hjxilinx 已提交
2936 2937
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
2938
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
hjxilinx 已提交
2939 2940 2941
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
2942 2943
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
hjxilinx 已提交
2944 2945
    }
  }
2946 2947
}

2948
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2949
  SQuery *pQuery = pRuntimeEnv->pQuery;
2950
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2951
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2952 2953 2954
  }
}

H
Haojun Liao 已提交
2955
void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo, size_t interBufSize) {
2956
  int32_t numOfCols = pQuery->numOfOutput;
2957

2958 2959
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2960

H
Haojun Liao 已提交
2961 2962
  char* buf = calloc(1, interBufSize);

2963
  // set the intermediate result output buffer
H
Haojun Liao 已提交
2964
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
2965 2966 2967 2968
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2969

2970
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2971 2972
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2973

2974 2975 2976 2977 2978 2979
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2980

2981
    // set the timestamp output buffer for top/bottom/diff query
2982
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2983 2984 2985
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2986

2987
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2988
  }
2989

2990 2991 2992 2993 2994
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2995

2996
  // reset the execution contexts
2997
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2998
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2999
    assert(functionId != TSDB_FUNC_DIFF);
3000

3001 3002 3003 3004
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3005

3006 3007 3008 3009 3010 3011 3012 3013 3014 3015
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
3016

3017 3018 3019 3020 3021 3022
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3023

3024
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3025
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3026
    pRuntimeEnv->pCtx[j].currentStage = 0;
3027

H
Haojun Liao 已提交
3028 3029 3030 3031
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3032

3033 3034 3035 3036
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3037
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3038
  SQuery *pQuery = pRuntimeEnv->pQuery;
3039
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3040 3041
    return;
  }
3042

3043
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3044
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3045 3046
        pQuery->limit.offset - pQuery->rec.rows);
    
3047 3048
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3049

3050
    resetCtxOutputBuf(pRuntimeEnv);
3051

H
Haojun Liao 已提交
3052
    // clear the buffer full flag if exists
3053
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3054
  } else {
3055
    int64_t numOfSkip = pQuery->limit.offset;
3056
    pQuery->rec.rows -= numOfSkip;
3057 3058
    pQuery->limit.offset = 0;
  
3059
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3060 3061
           0, pQuery->rec.rows);
    
3062
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3063
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3064
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3065
      
H
Haojun Liao 已提交
3066 3067
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3068

3069
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3070
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3071 3072
      }
    }
3073

3074
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
3075 3076 3077 3078 3079 3080 3081 3082
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3083
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3084 3085 3086 3087 3088 3089
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3090

H
hjxilinx 已提交
3091
  bool toContinue = false;
H
Haojun Liao 已提交
3092
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3093 3094
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3095

3096 3097 3098 3099 3100
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3101

3102
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3103

3104
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3105
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3106 3107 3108
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3109

3110 3111
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3112

3113 3114 3115 3116
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3117
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3118
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3119 3120 3121
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3122

3123 3124
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3125

3126 3127 3128
      toContinue |= (!pResInfo->complete);
    }
  }
3129

3130 3131 3132
  return toContinue;
}

H
Haojun Liao 已提交
3133
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3134
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3135 3136
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3137 3138 3139
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3140
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3141
      .status      = pQuery->status,
3142
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3143
      .lastKey     = start,
H
hjxilinx 已提交
3144
      .w           = pQuery->window,
H
Haojun Liao 已提交
3145
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3146
  };
3147

3148 3149 3150
  return info;
}

3151 3152 3153 3154
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3155 3156 3157 3158 3159
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3160

3161
  // reverse order time range
3162 3163 3164
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3165
  SWITCH_ORDER(pQuery->order.order);
3166
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3167

3168
  STsdbQueryCond cond = {
3169
      .twindow = pQuery->window,
H
hjxilinx 已提交
3170
      .order   = pQuery->order.order,
3171
      .colList = pQuery->colList,
3172 3173
      .numOfCols = pQuery->numOfCols,
  };
3174

3175 3176 3177 3178
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3179

3180 3181
  // add ref for table
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3182

3183 3184
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3185
  disableFuncInReverseScan(pQInfo);
3186 3187
}

3188 3189
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3190
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3191

3192 3193
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3194

3195 3196 3197 3198
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3199

3200
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3201

3202 3203
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3204
  pTableQueryInfo->lastKey = pStatus->lastKey;
3205
  pQuery->status = pStatus->status;
3206
  
H
hjxilinx 已提交
3207
  pTableQueryInfo->win = pStatus->w;
3208
  pQuery->window = pTableQueryInfo->win;
3209 3210
}

3211
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3212
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3213
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3214 3215
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3216
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3217

3218
  // store the start query position
H
Haojun Liao 已提交
3219
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3220

3221 3222
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3223

3224 3225
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3226

3227 3228
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3229
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3230
      qstatus.lastKey = pTableQueryInfo->lastKey;
3231
    }
3232

3233
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3234
      // restore the status code and jump out of loop
3235
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3236
        pQuery->status = qstatus.status;
3237
      }
3238

3239 3240
      break;
    }
3241

3242
    STsdbQueryCond cond = {
3243
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3244
        .order   = pQuery->order.order,
3245
        .colList = pQuery->colList,
3246
        .numOfCols = pQuery->numOfCols,
3247
    };
3248

3249 3250
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3251
    }
3252

3253
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3254
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3255

3256 3257
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3258
    
3259
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3260
        cond.twindow.skey, cond.twindow.ekey);
3261

3262
    // check if query is killed or not
H
Haojun Liao 已提交
3263
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3264 3265
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3266 3267
    }
  }
3268

H
hjxilinx 已提交
3269
  if (!needReverseScan(pQuery)) {
3270 3271
    return;
  }
3272

3273
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3274

3275
  // reverse scan from current position
3276
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3277
  doScanAllDataBlocks(pRuntimeEnv);
3278 3279

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3280 3281
}

H
hjxilinx 已提交
3282
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3283
  SQuery *pQuery = pRuntimeEnv->pQuery;
3284

H
Haojun Liao 已提交
3285
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3286 3287
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3288
    if (pRuntimeEnv->groupbyNormalCol) {
3289 3290
      closeAllTimeWindow(pWindowResInfo);
    }
3291

3292 3293 3294 3295 3296
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3297

3298
      setWindowResOutputBuf(pRuntimeEnv, buf);
3299

3300
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3301
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3302
      }
3303

3304 3305 3306 3307 3308 3309
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3310

3311
  } else {
3312
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3313
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3314 3315 3316 3317 3318
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3319
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3320
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3321

3322 3323 3324 3325
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3326

3327 3328 3329
  return false;
}

H
Haojun Liao 已提交
3330
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3331
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3332

H
Haojun Liao 已提交
3333
  STableQueryInfo *pTableQueryInfo = buf;
3334

H
hjxilinx 已提交
3335 3336
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3337

3338
  pTableQueryInfo->pTable = pTable;
3339
  pTableQueryInfo->cur.vgroupIndex = -1;
3340

H
Haojun Liao 已提交
3341 3342
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3343
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3344 3345 3346
    int32_t initialThreshold = 100;
    initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3347 3348
  }

3349 3350 3351
  return pTableQueryInfo;
}

3352
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3353 3354 3355
  if (pTableQueryInfo == NULL) {
    return;
  }
3356

3357 3358 3359
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
}

H
Haojun Liao 已提交
3360 3361 3362 3363 3364 3365 3366
#define SET_CURRENT_QUERY_TABLE_INFO(_runtime, _tableInfo)                                      \
  do {                                                                                          \
    SQuery *_query = (_runtime)->pQuery;                                                        \
    _query->current = _tableInfo;                                                               \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_query)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_query))); \
  } while (0)
3367 3368 3369 3370

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3371
 * @param pDataBlockInfo
3372
 */
H
Haojun Liao 已提交
3373
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3374
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3375 3376 3377
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3378 3379
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3380 3381 3382 3383

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3384

H
Haojun Liao 已提交
3385 3386 3387
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3388

H
Haojun Liao 已提交
3389
  int32_t GROUPRESULTID = 1;
3390 3391
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3392 3393 3394
  if (pWindowRes == NULL) {
    return;
  }
3395

3396 3397 3398 3399 3400 3401 3402 3403 3404 3405
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3406

H
Haojun Liao 已提交
3407 3408
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3409 3410 3411 3412
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3413
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3414
  SQuery *pQuery = pRuntimeEnv->pQuery;
3415

3416
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3417
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3418 3419
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3420

3421
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3422 3423 3424
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3425

3426 3427 3428 3429 3430
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3431

3432 3433 3434 3435 3436 3437
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3438 3439
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3440

H
Haojun Liao 已提交
3441 3442 3443 3444 3445
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3446
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3447 3448
      continue;
    }
3449

H
Haojun Liao 已提交
3450 3451
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3452

H
Haojun Liao 已提交
3453 3454 3455 3456
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3457

H
Haojun Liao 已提交
3458 3459 3460 3461 3462
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3463

H
Haojun Liao 已提交
3464 3465 3466 3467 3468 3469
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3470
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3471
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3472

3473
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3474

3475 3476
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3477
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3478
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3479

3480
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3481

3482 3483 3484 3485 3486 3487
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3488

3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3501
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3502 3503
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3504 3505
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3506 3507 3508
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3509
    pTableQueryInfo->win.skey = key;
3510
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3511

3512 3513 3514 3515 3516
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3517

3518 3519 3520 3521 3522 3523
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3524
    STimeWindow     w = TSWINDOW_INITIALIZER;
3525
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3526

H
Haojun Liao 已提交
3527 3528
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3529
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3530
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3531

3532 3533
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3534
        assert(win.ekey == pQuery->window.ekey);
3535
      }
3536 3537
      
      pWindowResInfo->prevSKey = w.skey;
3538
    }
3539

3540
    pTableQueryInfo->queryRangeSet = 1;
3541
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3542 3543 3544 3545
  }
}

bool requireTimestamp(SQuery *pQuery) {
3546
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3547
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3561 3562 3563
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3564 3565
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3566 3567 3568
  return loadPrimaryTS;
}

3569
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3570 3571
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3572

3573 3574 3575
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3576

3577
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3578 3579
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3580

3581
  if (orderType == TSDB_ORDER_ASC) {
3582
    startIdx = pQInfo->groupIndex;
3583 3584
    step = 1;
  } else {  // desc order copy all data
3585
    startIdx = totalSet - pQInfo->groupIndex - 1;
3586 3587
    step = -1;
  }
3588

3589
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3590 3591
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3592
      pQInfo->groupIndex += 1;
3593 3594
      continue;
    }
3595

dengyihao's avatar
dengyihao 已提交
3596
    assert(pQInfo->offset <= 1);
3597

3598 3599
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3600

3601 3602 3603 3604
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3605 3606 3607 3608 3609
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3610
      pQInfo->groupIndex += 1;
3611
    }
3612

3613
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3614
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3615

3616 3617 3618 3619
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3620

3621
    numOfResult += numOfRowsToCopy;
3622 3623 3624
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3625
  }
3626

3627
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3628 3629

#ifdef _DEBUG_VIEW
3630
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
3644
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
3645
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3646

3647
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3648
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
3649

3650
  pQuery->rec.rows += numOfResult;
3651

3652
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3653 3654
}

H
Haojun Liao 已提交
3655
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
3656
  SQuery *pQuery = pRuntimeEnv->pQuery;
3657

3658
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3659 3660 3661
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
3662

H
Haojun Liao 已提交
3663 3664
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
3665

H
Haojun Liao 已提交
3666 3667 3668 3669
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
3670
      }
H
Haojun Liao 已提交
3671 3672

      pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
3673 3674 3675 3676
    }
  }
}

H
Haojun Liao 已提交
3677
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
3678
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3679
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3680 3681
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3682
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3683
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3684

H
Haojun Liao 已提交
3685
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3686
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3687
  } else {
3688
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3689 3690 3691
  }
}

3692 3693 3694
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3695

H
Haojun Liao 已提交
3696
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3697 3698
    return false;
  }
3699

3700
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724
    // There are results not returned to client yet, so filling operation applied to the remain result is required
    // in the first place.
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3725
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3726 3727 3728
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3729
  }
3730 3731

  return false;
3732 3733 3734
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3735
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3736

3737 3738
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3739

3740 3741 3742
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3743

weixin_48148422's avatar
weixin_48148422 已提交
3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3756 3757
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3758
    if (pQInfo->runtimeEnv.stableQuery) {
3759
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3760 3761 3762
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3763 3764 3765
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3766
    }
H
hjxilinx 已提交
3767
  }
3768 3769
}

H
Haojun Liao 已提交
3770
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3771
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3772
  SQuery *pQuery = pRuntimeEnv->pQuery;
3773 3774
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3775
  while (1) {
3776
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3777
    
3778
    // todo apply limit output function
3779 3780
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3781
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3782 3783
      return ret;
    }
3784

3785
    if (pQuery->limit.offset < ret) {
3786
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3787 3788
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3789 3790 3791
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3792 3793 3794
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3795
      }
3796
      
3797 3798 3799
      pQuery->limit.offset = 0;
      return ret;
    } else {
3800
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3801
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3802 3803
          pQuery->limit.offset - ret);
      
3804
      pQuery->limit.offset -= ret;
3805
      pQuery->rec.rows = 0;
3806 3807
      ret = 0;
    }
3808 3809

    if (!queryHasRemainResults(pRuntimeEnv)) {
3810 3811 3812 3813 3814
      return ret;
    }
  }
}

3815
static void queryCostStatis(SQInfo *pQInfo) {
3816
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3817 3818 3819 3820 3821 3822 3823
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
3824
//  qDebug("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
3825 3826
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
3827
//  qDebug("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
3828 3829 3830
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
3831
//  qDebug(
3832 3833 3834
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3835
  
H
Haojun Liao 已提交
3836 3837 3838 3839
  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, io time:%"PRId64" us, total blocks:%d, load block statis:%d,"
         " load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
         pQInfo, pSummary->elapsedTime, pSummary->ioTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3840

3841
//  qDebug("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
3842
//
3843 3844
//  qDebug("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qDebug("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
3845 3846 3847
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3848
  
3849 3850
//  double computing = total - io;
//
3851
//  qDebug(
3852 3853 3854 3855 3856
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3857 3858
}

3859 3860
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3861 3862
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3863
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3864

3865
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3866
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3867 3868 3869
    pQuery->limit.offset = 0;
    return;
  }
3870

3871 3872 3873 3874 3875
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3876

3877
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3878

3879
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3880
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3881

3882
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3883
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3884 3885

  // update the offset value
H
hjxilinx 已提交
3886
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3887
  pQuery->limit.offset = 0;
3888

H
hjxilinx 已提交
3889
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3890

3891
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
3892
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3893
}
3894

3895 3896 3897 3898 3899
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3900
  }
3901

3902 3903 3904
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3905
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3906
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3907

H
Haojun Liao 已提交
3908
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
3909
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
3910
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
3911 3912
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3913
    }
3914

H
Haojun Liao 已提交
3915
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
3916

3917 3918
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3919 3920
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3921

3922
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3923 3924
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3925 3926 3927
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3928
  }
3929
}
3930

H
Haojun Liao 已提交
3931
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3932
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3933
  *start = pQuery->current->lastKey;
3934

3935
  // if queried with value filter, do NOT forward query start position
3936
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3937
    return true;
3938
  }
3939

3940 3941 3942 3943 3944
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3945
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3946

H
Haojun Liao 已提交
3947
  STimeWindow w = TSWINDOW_INITIALIZER;
3948
  
3949
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3950
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3951

H
Haojun Liao 已提交
3952
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
3953
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
3954
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
3955

H
Haojun Liao 已提交
3956 3957
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3958
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
3959 3960 3961
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3962
    } else {
H
Haojun Liao 已提交
3963
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
3964

3965 3966 3967
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3968

3969 3970
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3971

3972 3973 3974 3975 3976 3977
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3978

3979
      STimeWindow tw = win;
H
Haojun Liao 已提交
3980
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
3981

3982
      if (pQuery->limit.offset == 0) {
3983 3984
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3985 3986
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3987 3988 3989
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

3990 3991
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
3992
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
3993 3994 3995 3996
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
3997 3998 3999 4000 4001 4002
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4003
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4004 4005
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4006
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4007 4008
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4009
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4010 4011
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4012
          return true;
H
Haojun Liao 已提交
4013 4014 4015 4016
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4017
          return true;
4018 4019 4020
        }
      }

H
Haojun Liao 已提交
4021 4022 4023 4024 4025 4026 4027
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4028 4029 4030 4031 4032 4033 4034
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4035
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4036 4037 4038 4039
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4040
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4041 4042
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4043
      } else {
H
Haojun Liao 已提交
4044
        break;  // offset is not 0, and next time window begins or ends in the next block.
4045 4046 4047
      }
    }
  }
4048

4049 4050 4051
  return true;
}

B
Bomin Zhang 已提交
4052 4053
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4054 4055
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4056 4057 4058 4059
  if (onlyQueryTags(pQuery)) {
    return;
  }

H
Haojun Liao 已提交
4060
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4061 4062
    return;
  }
4063 4064

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4065 4066 4067 4068
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4069
  };
weixin_48148422's avatar
weixin_48148422 已提交
4070

B
Bomin Zhang 已提交
4071
  if (!isSTableQuery
4072
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4073
    && (cond.order == TSDB_ORDER_ASC) 
H
Haojun Liao 已提交
4074
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4075
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4076
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4077
  ) {
H
Haojun Liao 已提交
4078
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4079 4080
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4081
  }
B
Bomin Zhang 已提交
4082

H
Haojun Liao 已提交
4083
  if (isFirstLastRowQuery(pQuery)) {
4084
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4085
  } else if (isPointInterpoQuery(pQuery)) {
4086
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4087
  } else {
4088
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4089
  }
B
Bomin Zhang 已提交
4090 4091
}

4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4105
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4106 4107 4108 4109 4110 4111 4112
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4113
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4114 4115
  int32_t code = TSDB_CODE_SUCCESS;
  
4116 4117 4118
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4119

4120
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4121 4122
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4123 4124

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4125
  changeExecuteScanOrder(pQInfo, false);
B
Bomin Zhang 已提交
4126
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4127
  
4128
  pQInfo->tsdb = tsdb;
4129
  pQInfo->vgId = vgId;
4130 4131

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4132
  pRuntimeEnv->pTSBuf = pTsBuf;
4133
  pRuntimeEnv->cur.vgroupIndex = -1;
4134
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4135
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4136
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4137

H
Haojun Liao 已提交
4138
  if (pTsBuf != NULL) {
4139 4140 4141 4142 4143 4144 4145 4146 4147 4148
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4149
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4150 4151 4152

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4153
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4154 4155 4156 4157 4158 4159 4160
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

H
Haojun Liao 已提交
4161
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4162 4163 4164 4165 4166
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

H
Haojun Liao 已提交
4167
      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 32, 4096, type);
4168 4169
    }

H
Haojun Liao 已提交
4170
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4171
    int32_t rows = getInitialPageNum(pQInfo);
4172
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4173 4174 4175 4176 4177
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4178
    if (pRuntimeEnv->groupbyNormalCol) {
4179 4180 4181 4182 4183 4184 4185 4186
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

4187
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4188
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4189 4190 4191 4192 4193 4194 4195
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4196 4197
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4198
  }
4199

H
Haojun Liao 已提交
4200
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4201
  return TSDB_CODE_SUCCESS;
4202 4203
}

4204
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4205
  SQuery *pQuery = pRuntimeEnv->pQuery;
4206

4207
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4208 4209 4210 4211 4212 4213 4214
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4215
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4216
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4217 4218
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4219
  
H
hjxilinx 已提交
4220
  int64_t st = taosGetTimestampMs();
4221

4222
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4223
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4224

H
Haojun Liao 已提交
4225 4226
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

4227
  while (tsdbNextDataBlock(pQueryHandle)) {
4228
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4229
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4230
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4231
    }
4232

H
Haojun Liao 已提交
4233
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4234 4235 4236 4237
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4238

H
Haojun Liao 已提交
4239
    assert(*pTableQueryInfo != NULL);
H
Haojun Liao 已提交
4240
    SET_CURRENT_QUERY_TABLE_INFO(pRuntimeEnv, *pTableQueryInfo);
4241

H
Haojun Liao 已提交
4242
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4243
      if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4244
        setExecutionContext(pQInfo, (*pTableQueryInfo)->groupIndex, blockInfo.window.ekey + step);
4245 4246 4247
      } else {  // interval query
        TSKEY nextKey = blockInfo.window.skey;
        setIntervalQueryRange(pQInfo, nextKey);
H
Haojun Liao 已提交
4248 4249 4250 4251

        if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
          setAdditionalInfo(pQInfo, (*pTableQueryInfo)->pTable, *pTableQueryInfo);
        }
4252
      }
4253
    }
4254

H
Haojun Liao 已提交
4255 4256 4257 4258 4259 4260 4261
    SDataStatis *pStatis = NULL;
    SArray *pDataBlock = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }

4262 4263 4264
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4265
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4266
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4267
  }
4268

H
Haojun Liao 已提交
4269 4270
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4271 4272
  int64_t et = taosGetTimestampMs();
  return et - st;
4273 4274
}

4275 4276
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4277
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4278

4279
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4280
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4281
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4282

H
Haojun Liao 已提交
4283 4284 4285
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4286

H
Haojun Liao 已提交
4287
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4288
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4289
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4290

4291
  STsdbQueryCond cond = {
4292
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4293 4294
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4295
      .numOfCols = pQuery->numOfCols,
4296
  };
4297

H
hjxilinx 已提交
4298
  // todo refactor
4299
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
4300
  SArray *tx = taosArrayInit(1, POINTER_BYTES);
4301

4302
  taosArrayPush(tx, &pCheckInfo->pTable);
4303
  taosArrayPush(g1, &tx);
4304
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4305

4306
  // include only current table
4307 4308 4309 4310
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4311

H
Haojun Liao 已提交
4312
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4313 4314
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4315

4316
  if (pRuntimeEnv->pTSBuf != NULL) {
4317
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4318 4319
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4320

4321 4322 4323 4324 4325 4326 4327 4328
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4329

4330
  initCtxOutputBuf(pRuntimeEnv);
4331 4332 4333 4334 4335 4336 4337 4338 4339 4340
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4341
static void sequentialTableProcess(SQInfo *pQInfo) {
4342
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4343
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4344
  setQueryStatus(pQuery, QUERY_COMPLETED);
4345

H
Haojun Liao 已提交
4346
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4347

H
Haojun Liao 已提交
4348
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4349 4350
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4351

4352
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4353
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4354

4355
      qDebug("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4356
             numOfGroups, group);
H
Haojun Liao 已提交
4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4377
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4378
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4379
      } else {
H
Haojun Liao 已提交
4380
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4381
      }
H
Haojun Liao 已提交
4382 4383
      
      initCtxOutputBuf(pRuntimeEnv);
4384
      
4385
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4386 4387
      assert(taosArrayGetSize(s) >= 1);
      
4388
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4389 4390 4391
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4392

dengyihao's avatar
dengyihao 已提交
4393
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4394

H
Haojun Liao 已提交
4395
      // here we simply set the first table as current table
4396 4397 4398
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4399
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4412 4413 4414 4415 4416 4417

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4418
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4419
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4420
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4421

4422
      qDebug("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
B
Bomin Zhang 已提交
4444 4445
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
4446

4447
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4448 4449
      assert(taosArrayGetSize(s) >= 1);

4450
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4451 4452 4453 4454 4455 4456 4457 4458

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4459
      taosArrayDestroy(s); 
4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4474
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4475 4476 4477 4478 4479 4480 4481
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4482
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4483 4484 4485 4486 4487 4488

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4489 4490 4491
    }
  } else {
    /*
4492
     * 1. super table projection query, 2. ts-comp query
4493 4494 4495
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4496
    if (pQInfo->groupIndex > 0) {
4497
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4498
      pQuery->rec.total += pQuery->rec.rows;
4499

4500
      if (pQuery->rec.rows > 0) {
4501 4502 4503
        return;
      }
    }
4504

4505
    // all data have returned already
4506
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4507 4508
      return;
    }
4509

4510 4511
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4512

H
Haojun Liao 已提交
4513
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4514 4515
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4516

4517
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4518
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4519 4520
        finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4521
      }
4522

4523
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4524
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4525
        pQInfo->tableIndex++;
4526 4527
        continue;
      }
4528

H
hjxilinx 已提交
4529
      // TODO handle the limit offset problem
4530
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4531
        //        skipBlocks(pRuntimeEnv);
4532 4533
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4534 4535 4536
          continue;
        }
      }
4537

4538
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4539
      skipResults(pRuntimeEnv);
4540

4541
      // the limitation of output result is reached, set the query completed
4542
      if (limitResults(pRuntimeEnv)) {
4543
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4544 4545
        break;
      }
4546

4547 4548
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4549

4550
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4551 4552 4553 4554 4555 4556
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4557
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4558

H
Haojun Liao 已提交
4559
        STableIdInfo tidInfo = {0};
4560

H
Haojun Liao 已提交
4561 4562 4563
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4564
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4565 4566
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4567
        // if the buffer is full or group by each table, we need to jump out of the loop
4568 4569
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4570 4571
          break;
        }
4572

4573
      } else {
4574
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4575 4576
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4577 4578
          continue;
        } else {
4579 4580 4581
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4582 4583 4584
        }
      }
    }
H
Haojun Liao 已提交
4585

4586
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4587 4588
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4589
  }
4590

4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4603
    finalizeQueryResult(pRuntimeEnv);
4604
  }
4605

4606 4607 4608
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4609

4610
  qDebug(
B
Bomin Zhang 已提交
4611
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4612
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4613
      pQuery->limit.offset);
4614 4615
}

4616 4617 4618 4619
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4620 4621 4622 4623
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4624
  if (pRuntimeEnv->pTSBuf != NULL) {
4625
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4626
  }
4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4639 4640

  pRuntimeEnv->prevGroupId = INT32_MIN;
4641
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4642 4643 4644 4645
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4646 4647
}

4648 4649 4650 4651
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4652
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4653

4654
  if (pRuntimeEnv->pTSBuf != NULL) {
4655
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4656
  }
4657

4658
  switchCtxOrder(pRuntimeEnv);
4659 4660 4661
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4662 4663 4664
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4665
//  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
4666

H
Haojun Liao 已提交
4667
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4668
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4669
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4670
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4671

4672
      size_t num = taosArrayGetSize(group);
4673
      for (int32_t j = 0; j < num; ++j) {
4674 4675
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
H
Haojun Liao 已提交
4676
//        removeRedundantWindow(&item->windowResInfo, item->lastKey - step, step);
4677
      }
H
hjxilinx 已提交
4678 4679 4680 4681 4682 4683 4684
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4685 4686 4687
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4688
  if (pQInfo->groupIndex > 0) {
4689
    /*
4690
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4691 4692
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
4693
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4694 4695
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4696
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4697 4698
#endif
    } else {
4699
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4700
    }
4701

4702
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4703 4704
    return;
  }
4705

4706
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4707 4708
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4709
  // do check all qualified data blocks
H
Haojun Liao 已提交
4710
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4711
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4712

H
hjxilinx 已提交
4713
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
4714
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4715
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4716 4717
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4718
  }
4719

H
hjxilinx 已提交
4720 4721
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4722

H
hjxilinx 已提交
4723 4724
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4725

H
Haojun Liao 已提交
4726
    el = scanMultiTableDataBlocks(pQInfo);
4727
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4728

H
Haojun Liao 已提交
4729
//    doCloseAllTimeWindowAfterScan(pQInfo);
H
Haojun Liao 已提交
4730
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
4731
  } else {
4732
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4733
  }
4734

4735
  setQueryStatus(pQuery, QUERY_COMPLETED);
4736

H
Haojun Liao 已提交
4737
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4738
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4739 4740
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
4741
  }
4742

H
Haojun Liao 已提交
4743
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4744
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4745
      copyResToQueryResultBuf(pQInfo, pQuery);
4746 4747

#ifdef _DEBUG_VIEW
4748
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4749 4750 4751
#endif
    }
  } else {  // not a interval query
4752
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4753
  }
4754

4755
  // handle the limitation of output buffer
4756
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4757 4758 4759 4760 4761 4762 4763 4764
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4765
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4766
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4767 4768
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4769 4770 4771 4772
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4773 4774
  pQuery->current = pTableInfo;  // set current query table info
  
4775
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4776
  finalizeQueryResult(pRuntimeEnv);
4777

H
Haojun Liao 已提交
4778
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4779 4780
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4781
  }
4782

H
Haojun Liao 已提交
4783
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4784
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4785

4786
  skipResults(pRuntimeEnv);
4787
  limitResults(pRuntimeEnv);
4788 4789
}

H
hjxilinx 已提交
4790
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4791
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4792 4793 4794 4795
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4796 4797 4798 4799
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4800

4801 4802 4803 4804 4805 4806
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4807 4808

  while (1) {
4809
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4810
    finalizeQueryResult(pRuntimeEnv);
4811

4812 4813
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4814
      skipResults(pRuntimeEnv);
4815 4816 4817
    }

    /*
H
hjxilinx 已提交
4818 4819
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4820
     */
4821
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4822 4823 4824
      break;
    }

4825
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
4826
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4827 4828 4829 4830

    resetCtxOutputBuf(pRuntimeEnv);
  }

4831
  limitResults(pRuntimeEnv);
4832
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4833
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
4834
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4835 4836
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
H
Haojun Liao 已提交
4837
    STableId* id = TSDB_TABLEID(pQuery->current);
4838

H
Haojun Liao 已提交
4839 4840
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4841 4842
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4843 4844
  }

4845 4846 4847
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4848 4849
}

H
Haojun Liao 已提交
4850
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4851
  SQuery *pQuery = pRuntimeEnv->pQuery;
4852

4853
  while (1) {
4854
    scanOneTableDataBlocks(pRuntimeEnv, start);
4855

4856
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4857
    finalizeQueryResult(pRuntimeEnv);
4858

4859 4860 4861
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4862
        pQuery->fillType == TSDB_FILL_NONE) {
4863 4864
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4865

4866 4867 4868 4869
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4870

4871
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4872 4873 4874 4875 4876
      break;
    }
  }
}

4877
// handle time interval query on table
H
hjxilinx 已提交
4878
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4879 4880
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4881 4882
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4883

H
Haojun Liao 已提交
4884
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
4885 4886
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4887
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4888
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4889
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4890 4891 4892 4893
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4894
  while (1) {
H
Haojun Liao 已提交
4895
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4896

H
Haojun Liao 已提交
4897
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4898
      pQInfo->groupIndex = 0;  // always start from 0
4899
      pQuery->rec.rows = 0;
4900
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4901

4902
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4903
    }
4904

4905
    // the offset is handled at prepare stage if no interpolation involved
4906
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4907
      limitResults(pRuntimeEnv);
4908 4909
      break;
    } else {
H
Haojun Liao 已提交
4910
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4911
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
4912
      numOfFilled = 0;
4913
      
H
Haojun Liao 已提交
4914
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
4915
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4916
        limitResults(pRuntimeEnv);
4917 4918
        break;
      }
4919

4920
      // no result generated yet, continue retrieve data
4921
      pQuery->rec.rows = 0;
4922 4923
    }
  }
4924

4925
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
4926
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
4927
    pQInfo->groupIndex = 0;
4928
    pQuery->rec.rows = 0;
4929
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4930
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4931
  }
4932

H
Haojun Liao 已提交
4933
  pQInfo->pointsInterpo += numOfFilled;
4934 4935
}

4936 4937 4938 4939
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4940
  if (queryHasRemainResults(pRuntimeEnv)) {
4941

H
Haojun Liao 已提交
4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
4954
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
4955
      return;
H
Haojun Liao 已提交
4956
    } else {
4957
      pQuery->rec.rows = 0;
4958
      pQInfo->groupIndex = 0;  // always start from 0
4959

4960
      if (pRuntimeEnv->windowResInfo.size > 0) {
4961
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4962
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4963

4964
        if (pQuery->rec.rows > 0) {
4965
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
4966 4967 4968

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
4969
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
4970 4971
          }

4972 4973 4974 4975 4976
          return;
        }
      }
    }
  }
4977

H
hjxilinx 已提交
4978
  // number of points returned during this query
4979
  pQuery->rec.rows = 0;
4980
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
4981
  
4982
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
4983
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
4984
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
4985
  
4986
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
4987
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
4988
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
4989
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
4990
    tableFixedOutputProcess(pQInfo, item);
4991 4992
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
4993
    tableMultiOutputProcess(pQInfo, item);
4994
  }
4995

4996
  // record the total elapsed time
4997
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
4998
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
4999 5000
}

5001
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5002 5003
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5004
  pQuery->rec.rows = 0;
5005

5006
  int64_t st = taosGetTimestampUs();
5007

H
Haojun Liao 已提交
5008
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
H
Haojun Liao 已提交
5009
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5010
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5011
    multiTableQueryProcess(pQInfo);
5012
  } else {
5013
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5014
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5015

5016
    sequentialTableProcess(pQInfo);
5017
  }
5018

H
hjxilinx 已提交
5019
  // record the total elapsed time
5020
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5021 5022
}

5023
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5024
  int32_t j = 0;
5025

5026
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5027 5028 5029 5030
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5031 5032 5033 5034
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5035

5036 5037
      j += 1;
    }
5038

5039 5040 5041 5042 5043
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5044

5045
      j += 1;
5046 5047 5048
    }
  }

5049
  assert(0);
5050 5051
}

5052 5053 5054
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5055 5056
}

5057
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5058
  if (pQueryMsg->intervalTime < 0) {
5059
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5060
    return false;
5061 5062
  }

H
hjxilinx 已提交
5063
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5064
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5065
    return false;
5066 5067
  }

H
hjxilinx 已提交
5068
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5069
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5070
    return false;
5071 5072
  }

5073 5074
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5075
    return false;
5076 5077
  }

5078 5079 5080 5081 5082 5083 5084 5085 5086 5087
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5088 5089 5090 5091 5092
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5093
        continue;
5094
      }
5095

5096
      return false;
5097 5098
    }
  }
5099

5100
  return true;
5101 5102
}

5103
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5104
  assert(pQueryMsg->numOfTables > 0);
5105

weixin_48148422's avatar
weixin_48148422 已提交
5106
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5107

weixin_48148422's avatar
weixin_48148422 已提交
5108 5109
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5110

5111
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5112 5113
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5114

H
hjxilinx 已提交
5115 5116 5117
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5118

H
hjxilinx 已提交
5119 5120
  return pMsg;
}
5121

5122
/**
H
hjxilinx 已提交
5123
 * pQueryMsg->head has been converted before this function is called.
5124
 *
H
hjxilinx 已提交
5125
 * @param pQueryMsg
5126 5127 5128 5129
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5130
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5131
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5132 5133
  int32_t code = TSDB_CODE_SUCCESS;

5134 5135 5136 5137 5138 5139 5140 5141
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5142

5143 5144
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5145
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5146
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5147 5148

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5149
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5150
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5151 5152 5153
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5154
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5155
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5156
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5157

5158
  // query msg safety check
5159
  if (!validateQueryMsg(pQueryMsg)) {
5160 5161
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5162 5163
  }

H
hjxilinx 已提交
5164 5165
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5166 5167
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5168
    pColInfo->colId = htons(pColInfo->colId);
5169
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5170 5171
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5172

H
hjxilinx 已提交
5173
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5174

H
hjxilinx 已提交
5175
    int32_t numOfFilters = pColInfo->numOfFilters;
5176
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5177
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5178 5179 5180
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5181 5182 5183 5184
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5185 5186 5187

      pMsg += sizeof(SColumnFilterInfo);

5188 5189
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5190

5191
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5192 5193
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5194
      } else {
5195 5196
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5197 5198
      }

5199 5200
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5201 5202 5203
    }
  }

5204 5205
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5206

5207
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5208
    (*pExpr)[i] = pExprMsg;
5209

5210
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5211 5212 5213 5214
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5215

5216
    pMsg += sizeof(SSqlFuncMsg);
5217 5218

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5219
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5220 5221 5222 5223
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5224
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5225 5226 5227 5228 5229
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5230 5231
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5232
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5233 5234
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5235 5236
      }
    } else {
5237
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5238
//        return TSDB_CODE_QRY_INVALID_MSG;
5239
//      }
5240 5241
    }

5242
    pExprMsg = (SSqlFuncMsg *)pMsg;
5243
  }
5244

5245
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5246
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5247
    goto _cleanup;
5248
  }
5249

H
hjxilinx 已提交
5250
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5251

H
hjxilinx 已提交
5252
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5253
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5254 5255 5256 5257
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5258 5259 5260

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5261
      pMsg += sizeof((*groupbyCols)[i].colId);
5262 5263

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5264 5265
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5266
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5267 5268 5269 5270 5271
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5272

H
hjxilinx 已提交
5273 5274
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5275 5276
  }

5277 5278
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5279
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5280 5281

    int64_t *v = (int64_t *)pMsg;
5282
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5283 5284
      v[i] = htobe64(v[i]);
    }
5285

5286
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5287
  }
5288

5289 5290 5291 5292
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5293

5294 5295 5296 5297
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5298

5299
      (*tagCols)[i] = *pTagCol;
5300
      pMsg += sizeof(SColumnInfo);
5301
    }
H
hjxilinx 已提交
5302
  }
5303

5304 5305 5306 5307 5308 5309
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5310

weixin_48148422's avatar
weixin_48148422 已提交
5311
  if (*pMsg != 0) {
5312
    size_t len = strlen(pMsg) + 1;
5313

5314
    *tbnameCond = malloc(len);
5315 5316 5317 5318 5319
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5320
    strcpy(*tbnameCond, pMsg);
5321
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5322
  }
5323

5324
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5325 5326
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5327
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5328
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5329 5330

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5331 5332 5333 5334 5335 5336 5337 5338 5339

_cleanup:
  tfree(*pExpr);
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
5340 5341

  return code;
5342 5343
}

H
hjxilinx 已提交
5344
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5345
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5346 5347 5348 5349 5350 5351 5352 5353 5354

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5355
  if (pExprNode == NULL) {
5356
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5357
    return TSDB_CODE_QRY_APP_ERROR;
5358
  }
5359

5360
  pArithExprInfo->pExpr = pExprNode;
5361 5362 5363
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5364
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5365 5366
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5367
  int32_t code = TSDB_CODE_SUCCESS;
5368

H
Haojun Liao 已提交
5369
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5370
  if (pExprs == NULL) {
5371
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5372 5373 5374 5375 5376
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5377
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5378
    pExprs[i].base = *pExprMsg[i];
5379
    pExprs[i].bytes = 0;
5380 5381 5382 5383

    int16_t type = 0;
    int16_t bytes = 0;

5384
    // parse the arithmetic expression
5385
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5386
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5387

5388 5389 5390
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5391 5392
      }

5393
      type  = TSDB_DATA_TYPE_DOUBLE;
5394
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5395
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5396 5397 5398
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5399
    } else{
5400
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5401
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5402

dengyihao's avatar
dengyihao 已提交
5403
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5404 5405 5406 5407
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5408
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5409

H
Haojun Liao 已提交
5410 5411 5412
        type  = s.type;
        bytes = s.bytes;
      }
5413 5414
    }

5415 5416
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5417
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5418
      tfree(pExprs);
5419
      return TSDB_CODE_QRY_INVALID_MSG;
5420 5421
    }

5422
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5423
      tagLen += pExprs[i].bytes;
5424
    }
5425
    assert(isValidDataType(pExprs[i].type));
5426 5427 5428
  }

  // TODO refactor
5429
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5430 5431
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5432

5433
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5434
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5435 5436 5437 5438 5439
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5440
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5441
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5442 5443 5444
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5445
  *pExprInfo = pExprs;
5446 5447 5448 5449

  return TSDB_CODE_SUCCESS;
}

5450
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5451 5452 5453 5454 5455
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5456
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5457
  if (pGroupbyExpr == NULL) {
5458
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5459 5460 5461 5462 5463 5464 5465
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5466 5467 5468 5469
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5470

5471 5472 5473
  return pGroupbyExpr;
}

5474
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5475
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5476
    if (pQuery->colList[i].numOfFilters > 0) {
5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5488
    if (pQuery->colList[i].numOfFilters > 0) {
5489 5490
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5491
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5492
      pFilterInfo->info = pQuery->colList[i];
5493

5494
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5495 5496 5497 5498
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5499
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5500 5501 5502 5503 5504

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5505
          qError("QInfo:%p invalid filter info", pQInfo);
5506
          return TSDB_CODE_QRY_INVALID_MSG;
5507 5508
        }

5509 5510
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5511

5512 5513 5514
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5515 5516

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5517
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5518
          return TSDB_CODE_QRY_INVALID_MSG;
5519 5520
        }

5521
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5522
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5523
          assert(rangeFilterArray != NULL);
5524
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5538
          assert(filterArray != NULL);
5539 5540 5541 5542
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5543
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5544
              return TSDB_CODE_QRY_INVALID_MSG;
5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5561
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5562
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5563

5564
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5565
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5566
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5567 5568
      continue;
    }
5569

5570
    // todo opt performance
H
Haojun Liao 已提交
5571 5572
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5573 5574
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5575 5576
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5577 5578 5579
          break;
        }
      }
5580 5581
      
      assert (f < pQuery->numOfCols);
5582
    } else {
5583 5584
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5585 5586
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5587 5588
          break;
        }
5589
      }
5590 5591
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5592 5593 5594 5595
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5596

5597
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5598 5599 5600 5601 5602 5603 5604
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5605 5606
static void freeQInfo(SQInfo *pQInfo);

weixin_48148422's avatar
weixin_48148422 已提交
5607
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5608
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
5609 5610
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5611
    return NULL;
5612 5613 5614 5615 5616 5617
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5618
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5619

5620
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5621
  pQuery->numOfOutput     = numOfOutput;
5622 5623 5624
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5625
  pQuery->order.orderColId = pQueryMsg->orderColId;
5626 5627 5628 5629
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5630
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5631
  pQuery->fillType        = pQueryMsg->fillType;
5632
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5633
  
5634
  // todo do not allocate ??
5635
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5636
  if (pQuery->colList == NULL) {
5637
    goto _cleanup;
5638
  }
5639

H
hjxilinx 已提交
5640
  for (int16_t i = 0; i < numOfCols; ++i) {
5641
    pQuery->colList[i] = pQueryMsg->colList[i];
5642
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5643
  }
5644

5645
  pQuery->tagColList = pTagCols;
5646

5647
  // calculate the result row size
5648 5649 5650
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5651
  }
5652

5653
  doUpdateExprColumnIndex(pQuery);
5654

5655
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5656
  if (ret != TSDB_CODE_SUCCESS) {
5657
    goto _cleanup;
5658 5659 5660
  }

  // prepare the result buffer
5661
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5662
  if (pQuery->sdata == NULL) {
5663
    goto _cleanup;
5664 5665
  }

H
hjxilinx 已提交
5666
  // set the output buffer capacity
H
hjxilinx 已提交
5667
  pQuery->rec.capacity = 4096;
5668
  pQuery->rec.threshold = 4000;
5669

5670
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5671
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5672 5673

    // allocate additional memory for interResults that are usually larger then final results
5674 5675
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5676
    if (pQuery->sdata[col] == NULL) {
5677
      goto _cleanup;
5678 5679 5680
    }
  }

5681
  if (pQuery->fillType != TSDB_FILL_NONE) {
5682 5683
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5684
      goto _cleanup;
5685 5686 5687
    }

    // the first column is the timestamp
5688
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5689 5690 5691
  }

  // to make sure third party won't overwrite this structure
5692
  pQInfo->signature = pQInfo;
5693

5694
  pQInfo->tableGroupInfo = *pTableGroupInfo;
dengyihao's avatar
dengyihao 已提交
5695 5696 5697 5698 5699 5700
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5701 5702 5703
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
  }
5704

weixin_48148422's avatar
weixin_48148422 已提交
5705 5706
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5707
  taosArraySort(pTableIdList, compareTableIdInfo);
5708

H
Haojun Liao 已提交
5709
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
5710 5711 5712
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5713
  for(int32_t i = 0; i < numOfGroups; ++i) {
5714
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5715

H
Haojun Liao 已提交
5716
    size_t s = taosArrayGetSize(pa);
5717
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
5718

H
hjxilinx 已提交
5719
    for(int32_t j = 0; j < s; ++j) {
5720
      void* pTable = taosArrayGetP(pa, j);
H
Haojun Liao 已提交
5721
      STableId* id = TSDB_TABLEID(pTable);
5722

H
Haojun Liao 已提交
5723
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5724 5725 5726
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5727
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5728
      }
5729

H
Haojun Liao 已提交
5730 5731
      void* buf = pQInfo->pBuf + index * sizeof(STableQueryInfo);
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, pTable, window, buf);
5732
      item->groupIndex = i;
H
hjxilinx 已提交
5733
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5734 5735
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5736
    }
5737

5738
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5739
  }
5740

weixin_48148422's avatar
weixin_48148422 已提交
5741 5742
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5743
  pQuery->pos = -1;
5744
  pQuery->window = pQueryMsg->window;
5745

5746
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
5747 5748
    int32_t code = TAOS_SYSTEM_ERROR(errno);
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
5749
    goto _cleanup;
5750
  }
5751

5752
  colIdCheck(pQuery);
5753

5754
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5755 5756
  return pQInfo;

5757
_cleanup:
dengyihao's avatar
dengyihao 已提交
5758
  freeQInfo(pQInfo);
5759 5760 5761
  return NULL;
}

H
hjxilinx 已提交
5762
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5763 5764 5765 5766
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5767

H
hjxilinx 已提交
5768 5769 5770 5771
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5772
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5773 5774 5775
  return (sig == (uint64_t)pQInfo);
}

H
Haojun Liao 已提交
5776
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param) {
H
hjxilinx 已提交
5777
  int32_t code = TSDB_CODE_SUCCESS;
5778
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5779

H
hjxilinx 已提交
5780 5781
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5782
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5783
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5784

H
hjxilinx 已提交
5785
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
5786 5787
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
5788
  }
5789

5790 5791
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
5792
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5793
           pQuery->window.ekey, pQuery->order.order);
5794
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
5795
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
5796 5797 5798
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5799

5800 5801
  pQInfo->param = param;

5802
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
5803
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
5804 5805 5806 5807 5808
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5809 5810

  // filter the qualified
5811
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5812 5813
    goto _error;
  }
H
hjxilinx 已提交
5814
  
H
hjxilinx 已提交
5815 5816 5817 5818
  return code;

_error:
  // table query ref will be decrease during error handling
5819
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5820 5821 5822
  return code;
}

B
Bomin Zhang 已提交
5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
5835 5836 5837 5838
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5839 5840

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5841
  setQueryKilled(pQInfo);
5842

5843
  qDebug("QInfo:%p start to free QInfo", pQInfo);
5844
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5845 5846
    tfree(pQuery->sdata[col]);
  }
5847

H
hjxilinx 已提交
5848
  sem_destroy(&(pQInfo->dataReady));
5849
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5850

H
hjxilinx 已提交
5851 5852 5853 5854 5855 5856
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5857

H
hjxilinx 已提交
5858
  if (pQuery->pSelectExpr != NULL) {
5859
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5860
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5861

H
hjxilinx 已提交
5862 5863 5864
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5865
    }
5866

H
hjxilinx 已提交
5867 5868
    tfree(pQuery->pSelectExpr);
  }
5869

5870 5871
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5872
  }
5873

5874
  // todo refactor, extract method to destroytableDataInfo
H
Haojun Liao 已提交
5875
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5876
  for (int32_t i = 0; i < numOfGroups; ++i) {
5877
    SArray *p = GET_TABLEGROUP(pQInfo, i);
5878

5879 5880
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
5881 5882 5883
      STableQueryInfo* item = taosArrayGetP(p, j);
      if (item != NULL) {
        destroyTableQueryInfo(item, pQuery->numOfOutput);
5884 5885
      }
    }
5886

H
hjxilinx 已提交
5887 5888
    taosArrayDestroy(p);
  }
5889

H
Haojun Liao 已提交
5890
  tfree(pQInfo->pBuf);
5891
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
5892
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
5893
  tsdbDestoryTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5894
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5895
  
5896 5897 5898 5899
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5900

5901 5902
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
B
Bomin Zhang 已提交
5903 5904 5905 5906 5907 5908 5909 5910 5911

  if (pQuery->colList != NULL) {
    for (int32_t i = 0; i < pQuery->numOfCols; i++) {
      SColumnInfo* column = pQuery->colList + i;
      freeColumnFilterInfo(column->filters, column->numOfFilters);
    }
    tfree(pQuery->colList);
  }

5912
  tfree(pQuery->sdata);
5913

5914
  tfree(pQuery);
5915

5916
  qDebug("QInfo:%p QInfo is freed", pQInfo);
5917

5918
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5919 5920 5921 5922
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5923
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5924 5925
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5937
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5938 5939 5940 5941
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5942
  }
H
hjxilinx 已提交
5943
}
5944

H
hjxilinx 已提交
5945 5946 5947
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5948

H
hjxilinx 已提交
5949 5950 5951
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5952

H
hjxilinx 已提交
5953 5954
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
5955 5956
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
5957
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
5958
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
5959 5960
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
5961 5962
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
5963
      }
H
Haojun Liao 已提交
5964

H
hjxilinx 已提交
5965 5966 5967
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
5968
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
5969
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5970
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
5971 5972 5973
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
5974
    }
5975

H
hjxilinx 已提交
5976 5977 5978 5979
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5980
  } else {
5981
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5982
  }
5983

5984
  pQuery->rec.total += pQuery->rec.rows;
5985
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5986

5987
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
5988
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
5989 5990 5991
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
5992
  return TSDB_CODE_SUCCESS;
5993 5994
}

5995 5996 5997 5998 5999 6000 6001
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

H
Haojun Liao 已提交
6002
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, qinfo_t* pQInfo) {
6003
  assert(pQueryMsg != NULL && tsdb != NULL);
6004 6005

  int32_t code = TSDB_CODE_SUCCESS;
6006

weixin_48148422's avatar
weixin_48148422 已提交
6007
  char *        tagCond = NULL, *tbnameCond = NULL;
6008
  SArray *      pTableIdList = NULL;
6009
  SSqlFuncMsg **pExprMsg = NULL;
6010 6011
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
dengyihao's avatar
dengyihao 已提交
6012 6013
  SExprInfo     *pExprs = NULL;
  SSqlGroupbyExpr *pGroupbyExpr = NULL;
6014

weixin_48148422's avatar
weixin_48148422 已提交
6015
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
6016
         TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6017
    goto _over;
6018 6019
  }

H
hjxilinx 已提交
6020
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6021
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6022
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6023
    goto _over;
6024 6025
  }

H
hjxilinx 已提交
6026
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6027
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6028
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6029
    goto _over;
6030 6031
  }

H
Haojun Liao 已提交
6032
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6033
    goto _over;
6034 6035
  }

dengyihao's avatar
dengyihao 已提交
6036
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6037
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6038
    goto _over;
6039
  }
6040

H
hjxilinx 已提交
6041
  bool isSTableQuery = false;
6042
  STableGroupInfo tableGroupInfo = {0};
6043
  
H
Haojun Liao 已提交
6044
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6045
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6046

6047
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
6048
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6049
      goto _over;
6050
    }
H
Haojun Liao 已提交
6051
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6052
    isSTableQuery = true;
H
Haojun Liao 已提交
6053 6054 6055 6056
    // TODO: need a macro from TSDB to check if table is super table

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6057 6058 6059 6060 6061 6062 6063 6064
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
6065
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
6066
                                          numOfGroupByCols);
6067 6068 6069
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6070
    } else {
6071 6072 6073 6074
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6075

6076
      qDebug("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6077
    }
H
hjxilinx 已提交
6078
  } else {
6079
    assert(0);
6080
  }
6081

6082
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6083 6084 6085 6086
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6087
  if ((*pQInfo) == NULL) {
6088
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6089
    goto _over;
6090
  }
6091

H
Haojun Liao 已提交
6092
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param);
6093

H
hjxilinx 已提交
6094
_over:
dengyihao's avatar
dengyihao 已提交
6095 6096 6097
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6098 6099
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6100
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6101
  } 
dengyihao's avatar
dengyihao 已提交
6102 6103
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6104
  free(pExprMsg);
H
hjxilinx 已提交
6105
  taosArrayDestroy(pTableIdList);
6106

B
Bomin Zhang 已提交
6107 6108 6109 6110 6111
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6112
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6113 6114
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
H
Haojun Liao 已提交
6115
  } else {
H
Haojun Liao 已提交
6116
//    SQInfo* pq = (SQInfo*) (*pQInfo);
H
Haojun Liao 已提交
6117

H
Haojun Liao 已提交
6118 6119
//    T_REF_INC(pq);
//    T_REF_INC(pq);
6120 6121
  }

6122
  // if failed to add ref for all meters in this query, abort current query
6123
  return code;
H
hjxilinx 已提交
6124 6125
}

H
Haojun Liao 已提交
6126
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6127 6128 6129 6130 6131
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6132 6133 6134
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6135 6136
}

6137
void qTableQuery(qinfo_t qinfo) {
6138 6139
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6140
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
6141
    qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
6142 6143
    return;
  }
6144

H
Haojun Liao 已提交
6145
  if (IS_QUERY_KILLED(pQInfo)) {
6146
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6147
    sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
6148 6149
    return;
  }
6150

6151 6152
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6153 6154 6155 6156 6157
    sem_post(&pQInfo->dataReady);
    return;
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6158
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6159 6160
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6161
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6162
    sem_post(&pQInfo->dataReady);
6163 6164 6165
    return;
  }

6166
  qDebug("QInfo:%p query task is launched", pQInfo);
6167

6168
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6169
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6170
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
6171
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
6172
  } else if (pQInfo->runtimeEnv.stableQuery) {
6173
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6174
  } else {
6175
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6176
  }
6177

6178
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6179
  if (IS_QUERY_KILLED(pQInfo)) {
6180 6181 6182 6183 6184 6185 6186 6187
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
    qDebug("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

H
hjxilinx 已提交
6188
  sem_post(&pQInfo->dataReady);
H
hjxilinx 已提交
6189 6190
}

H
hjxilinx 已提交
6191
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
6192 6193
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6194
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6195
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6196
  }
6197

H
hjxilinx 已提交
6198
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6199
  if (IS_QUERY_KILLED(pQInfo)) {
6200
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6201
    return pQInfo->code;
H
hjxilinx 已提交
6202
  }
6203

H
hjxilinx 已提交
6204
  sem_wait(&pQInfo->dataReady);
6205
  qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6206 6207
         pQInfo->code);

H
hjxilinx 已提交
6208
  return pQInfo->code;
H
hjxilinx 已提交
6209
}
6210

H
hjxilinx 已提交
6211
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6212 6213
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6214
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
6215
    qDebug("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6216 6217
    return false;
  }
6218 6219

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6220
  bool ret = false;
H
hjxilinx 已提交
6221
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6222
    ret = false;
H
hjxilinx 已提交
6223
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6224
    ret = true;
H
hjxilinx 已提交
6225
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6226
    ret = true;
H
hjxilinx 已提交
6227 6228
  } else {
    assert(0);
6229
  }
H
Haojun Liao 已提交
6230 6231

  if (ret) {
H
Haojun Liao 已提交
6232
//    T_REF_INC(pQInfo);
6233
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6234 6235 6236
  }

  return ret;
6237 6238
}

6239 6240 6241
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6242
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6243
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6244
  }
6245

6246
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6247 6248
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6249 6250
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6251
  *contLen = size + sizeof(SRetrieveTableRsp);
6252

6253 6254
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6255
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6256

6257 6258 6259
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6260
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6261 6262 6263 6264
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6265 6266
  
  (*pRsp)->precision = htons(pQuery->precision);
6267
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6268
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6269
  } else {
H
hjxilinx 已提交
6270
    setQueryStatus(pQuery, QUERY_OVER);
6271
    code = pQInfo->code;
6272
  }
6273

H
Haojun Liao 已提交
6274
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6275
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6276
  }
6277

H
hjxilinx 已提交
6278
  return code;
6279
}
H
hjxilinx 已提交
6280

H
Haojun Liao 已提交
6281
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6282 6283 6284 6285 6286 6287 6288 6289 6290 6291
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6292 6293 6294
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6295

H
Haojun Liao 已提交
6296
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6297
  assert(numOfGroup == 0 || numOfGroup == 1);
6298

H
Haojun Liao 已提交
6299
  if (numOfGroup == 0) {
6300 6301
    return;
  }
H
hjxilinx 已提交
6302
  
H
Haojun Liao 已提交
6303
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6304

H
Haojun Liao 已提交
6305
  size_t num = taosArrayGetSize(pa);
6306
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6307

H
Haojun Liao 已提交
6308
  int32_t count = 0;
6309 6310 6311
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6312

6313 6314
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6315
    count = 0;
6316

H
Haojun Liao 已提交
6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6328 6329
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6330
      STableQueryInfo *item = taosArrayGetP(pa, i);
6331

6332
      char *output = pQuery->sdata[0]->data + i * rsize;
6333
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6334

6335
      output = varDataVal(output);
H
Haojun Liao 已提交
6336
      STableId* id = TSDB_TABLEID(item->pTable);
6337

H
Haojun Liao 已提交
6338 6339
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6340

H
Haojun Liao 已提交
6341 6342
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6343

6344
      *(int32_t *)output = pQInfo->vgId;
6345
      output += sizeof(pQInfo->vgId);
6346

6347
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6348
        char *data = tsdbGetTableName(item->pTable);
6349
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6350
      } else {
6351
        char *val = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
6352 6353 6354 6355 6356 6357 6358 6359

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6360
        } else {
6361 6362
          if (val == NULL) {
            setNull(output, type, bytes);
H
Haojun Liao 已提交
6363
          } else {  // todo here stop will cause client crash
6364 6365
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6366 6367
        }
      }
6368

H
Haojun Liao 已提交
6369
      count += 1;
6370
    }
6371

6372
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6373

H
Haojun Liao 已提交
6374 6375 6376 6377 6378
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
6379
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6380
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6381
    count = 0;
H
Haojun Liao 已提交
6382
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6383 6384
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6385

6386
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6387
      STableQueryInfo* item = taosArrayGetP(pa, i);
6388

6389 6390
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6391
          char* data = tsdbGetTableName(item->pTable);
H
Haojun Liao 已提交
6392
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6393
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6394 6395 6396 6397
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
6398
          char* data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6399
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6400

H
hjxilinx 已提交
6401
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6402 6403 6404 6405 6406
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6407
          } else {
H
[td-90]  
Haojun Liao 已提交
6408 6409 6410 6411 6412
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6413
          }
6414
        }
H
hjxilinx 已提交
6415
      }
H
Haojun Liao 已提交
6416
      count += 1;
H
hjxilinx 已提交
6417
    }
6418

6419
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6420
  }
6421

H
Haojun Liao 已提交
6422
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6423
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6424 6425
}

6426 6427 6428 6429 6430 6431 6432
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6433
  qDestroyQueryInfo(*handle);
6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451
}

void* qOpenQueryMgmt(int32_t vgId) {
  const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

  SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt));

  pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryHandle->closed    = false;
  pthread_mutex_init(&pQueryHandle->lock, NULL);

  qDebug("vgId:%d, open querymgmt success", vgId);
  return pQueryHandle;
}

H
Haojun Liao 已提交
6452 6453 6454 6455 6456
static void queryMgmtKillQueryFn(void* handle) {
  qKillQuery(handle);
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = true;
  pthread_mutex_unlock(&pQueryMgmt->lock);

H
Haojun Liao 已提交
6468
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
  tfree(pQueryMgmt);

  qDebug("vgId:%d querymgmt cleanup completed", vgId);
}

6491
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6492 6493 6494 6495
  if (pMgmt == NULL) {
    return NULL;
  }

6496 6497
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2;

6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  pthread_mutex_lock(&pQueryMgmt->lock);
  if (pQueryMgmt->closed) {
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return NULL;
  } else {
6509 6510 6511
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
6512 6513 6514 6515 6516 6517
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return handle;
  }
}

6518
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6519 6520 6521 6522 6523 6524
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6525
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, needFree);
  return 0;
}