qExecutor.c 219.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
16 17
#include "tcache.h"
#include "tglobal.h"
H
Haojun Liao 已提交
18
#include "qfill.h"
19
#include "taosmsg.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
23 24
#include "qExecutor.h"
#include "qUtil.h"
25
#include "qresultBuf.h"
H
hjxilinx 已提交
26
#include "query.h"
S
slguan 已提交
27
#include "queryLog.h"
H
Haojun Liao 已提交
28 29
#include "qast.h"
#include "tfile.h"
30 31 32
#include "tlosertree.h"
#include "tscompression.h"
#include "ttime.h"
33 34 35 36 37 38 39 40 41

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

42
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
43
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
44
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
45
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
46

47
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
48

49
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
50
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
51

H
Haojun Liao 已提交
52 53
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

54 55
/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
56 57
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
58

59
enum {
H
hjxilinx 已提交
60
  // when query starts to execute, this status will set
61 62
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
63 64
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
65
   */
66 67
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
68 69 70
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
71
   */
72
  QUERY_COMPLETED = 0x4u,
73

H
hjxilinx 已提交
74 75
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
76
   */
77
  QUERY_OVER = 0x8u,
78
};
79 80

enum {
81 82
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
83 84 85
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

86
typedef struct {
87 88 89 90 91 92
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
93 94
} SQueryStatusInfo;

H
Haojun Liao 已提交
95
#if 0
H
Haojun Liao 已提交
96
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
97 98 99 100
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
101
    return malloc(__size);
H
Haojun Liao 已提交
102
  }
H
Haojun Liao 已提交
103 104
}

H
Haojun Liao 已提交
105 106 107 108 109 110 111 112 113 114
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
115
#define malloc  u_malloc
H
Haojun Liao 已提交
116
#endif
H
Haojun Liao 已提交
117

118
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
119 120 121
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

122
static void setQueryStatus(SQuery *pQuery, int8_t status);
123

H
Haojun Liao 已提交
124
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
125

H
hjxilinx 已提交
126
// todo move to utility
127
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
128

H
hjxilinx 已提交
129
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
130
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
131 132 133
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
134

135 136 137
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

138
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
139
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
140 141
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
142
static void buildTagQueryResult(SQInfo *pQInfo);
143

144
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
145
static int32_t flushFromResultBuf(SQInfo *pQInfo);
146

147
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
148 149
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
150

151 152
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
153 154
      return false;
    }
155

156 157
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
158
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
159

160 161 162 163 164
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
165

166 167 168 169
    if (!qualified) {
      return false;
    }
  }
170

171 172 173 174 175 176
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
177

178
  int64_t maxOutput = 0;
179
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
180
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
181

182 183 184 185 186 187 188 189
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
190

191 192 193 194 195
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
196

197
  assert(maxOutput >= 0);
198 199 200
  return maxOutput;
}

201 202 203 204 205 206 207 208 209
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
210 211 212 213 214 215 216
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
217 218 219 220
    pResInfo->numOfRes = numOfRes;
  }
}

221 222 223 224 225 226 227 228 229
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
230

231
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
232
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
233 234 235 236 237
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
238
        assert(pColIndex->colIndex > 0);
239
      }
240

241 242 243
      return true;
    }
  }
244

245 246 247 248 249
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
250

251 252
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
253

254
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
255
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
256 257 258 259 260
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
261

262
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
263 264
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
265 266 267
      break;
    }
  }
268

269 270 271 272 273 274
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
275

276
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
277
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
278 279 280 281
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
282

283 284 285 286
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
287

288 289 290
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
291

292 293 294
  return false;
}

295
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
296

297 298 299 300
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
301 302
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
303
    
304
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
305 306
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
307 308 309
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
310

311 312 313 314
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
315
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
316
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
317 318 319
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
320

321 322 323 324
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
325

326 327 328
  return false;
}

H
Haojun Liao 已提交
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
347
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, int32_t numOfCols, int32_t index) {
348
  // for a tag column, no corresponding field info
H
Haojun Liao 已提交
349 350
  SColIndex *pColIndex = &pQuery->pSelectExpr[index].base.colInfo;
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
351 352
    return NULL;
  }
H
Haojun Liao 已提交
353
  
354 355 356
  /*
   * Choose the right column field info by field id, since the file block may be out of date,
   * which means the newest table schema is not equalled to the schema of this block.
H
Haojun Liao 已提交
357
   * TODO: speedup by using bsearch
358
   */
H
Haojun Liao 已提交
359 360
  for (int32_t i = 0; i < numOfCols; ++i) {
    if (pColIndex->colId == pStatis[i].colId) {
361 362 363
      return &pStatis[i];
    }
  }
H
Haojun Liao 已提交
364
  
365 366 367
  return NULL;
}

368 369 370 371 372 373 374 375
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
376
static bool hasNullValue(SQuery *pQuery, int32_t col, int32_t numOfCols, SDataStatis *pStatis, SDataStatis **pColStatis) {
377
  SColIndex *pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
378
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
379 380
    return false;
  }
381

382 383 384 385
  // query on primary timestamp column, not null value at all
  if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }
386

387
  if (pStatis != NULL) {
H
Haojun Liao 已提交
388
    *pColStatis = getStatisInfo(pQuery, pStatis, numOfCols, col);
H
hjxilinx 已提交
389 390
  } else {
    *pColStatis = NULL;
391
  }
392

393 394 395
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
396

397 398 399 400
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
401
                                             int16_t bytes, bool masterscan) {
402
  SQuery *pQuery = pRuntimeEnv->pQuery;
403

404
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
405 406
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
407 408 409 410 411 412 413 414 415 416 417 418
  } else {
    if (masterscan) {  // more than the capacity, reallocate the resources
      if (pWindowResInfo->size >= pWindowResInfo->capacity) {
        int64_t newCap = pWindowResInfo->capacity * 2;

        char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
        if (t != NULL) {
          pWindowResInfo->pResult = (SWindowResult *)t;
          memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
        } else {
          // todo
        }
419

420 421 422 423 424
        for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
          SPosInfo pos = {-1, -1};
          createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
        }
        pWindowResInfo->capacity = newCap;
425
      }
426

427 428 429 430 431 432
      // add a new result set for a new group
      pWindowResInfo->curIndex = pWindowResInfo->size++;
      taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
    } else {
      return NULL;
    }
433
  }
434

435 436 437 438 439 440
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
441

442 443 444 445 446 447 448
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
449

450 451
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
452

453 454 455
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
456

457 458 459 460
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
461

462 463 464
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
465

466 467 468 469 470 471 472
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
473

474
  assert(ts >= w.skey && ts <= w.ekey);
475

476 477 478 479 480 481 482 483
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
484

485
  tFilePage *pData = NULL;
486

487 488 489
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
490

491 492 493 494
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
H
Haojun Liao 已提交
495
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
496

497
    if (pData->num >= numOfRowsPerPage) {
498 499
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
500
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
501 502 503
      }
    }
  }
504

505 506 507
  if (pData == NULL) {
    return -1;
  }
508

509 510 511
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
512
    pWindowRes->pos.rowId = pData->num++;
513
  }
514

515 516 517 518
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
519
                                       STimeWindow *win, bool masterscan, bool* newWind) {
520 521
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
522

523 524
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
525
  if (pWindowRes == NULL) {
526 527 528
    *newWind = false;

    return masterscan? -1:0;
529
  }
530

531
  *newWind = true;
532 533 534 535 536 537 538
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
539

540 541
  // set time window for current result
  pWindowRes->window = *win;
542

H
Haojun Liao 已提交
543
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
544 545 546 547 548 549 550 551
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
552
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
553
                                      int16_t order, int64_t *pData) {
H
Haojun Liao 已提交
554
  int32_t endPos = searchFn((char *)pData, numOfRows, ekey, order);
555
  int32_t forwardStep = 0;
556

557
  if (endPos >= 0) {
558
    forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
559
    assert(forwardStep >= 0);
560

561 562 563 564 565
    // endPos data is equalled to the key so, we do need to read the element in endPos
    if (pData[endPos] == ekey) {
      forwardStep += 1;
    }
  }
566

567 568 569 570 571 572
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
573
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
574
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
575
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
576
    return pWindowResInfo->size;
577
  }
578

579
  // no qualified results exist, abort check
580 581
  int32_t numOfClosed = 0;
  
582
  if (pWindowResInfo->size == 0) {
583
    return pWindowResInfo->size;
584
  }
585

586
  // query completed
H
hjxilinx 已提交
587 588
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
589
    closeAllTimeWindow(pWindowResInfo);
590

591 592 593 594
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
595
    int64_t skey = TSKEY_INITIAL_VAL;
596

597 598 599
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
600
        numOfClosed += 1;
601 602
        continue;
      }
603

604 605 606 607 608 609 610 611
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
612

613
    // all windows are closed, set the last one to be the skey
614
    if (skey == TSKEY_INITIAL_VAL) {
615 616 617 618 619
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
620

621
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
622

623 624
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
625
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
626 627
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
628
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
629
    } else {
630
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
631
             numOfClosed);
632 633
    }
  }
634 635 636 637 638 639 640
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
641
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
642
  return numOfClosed;
643 644 645
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
646
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
647
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
648

649 650 651
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
652

H
hjxilinx 已提交
653 654
  STableQueryInfo* item = pQuery->current;
  
655 656
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
657
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
658 659 660 661
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
662
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
663 664 665
        }
      }
    } else {
666
      num = pDataBlockInfo->rows - startPos;
667
      if (updateLastKey) {
H
hjxilinx 已提交
668
        item->lastKey = pDataBlockInfo->window.ekey + step;
669 670 671 672
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
673
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
674 675 676 677
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
678
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
679 680 681 682 683
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
684
        item->lastKey = pDataBlockInfo->window.skey + step;
685 686 687
      }
    }
  }
688

689 690 691 692 693
  assert(num >= 0);
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
694
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
695 696
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
697

698 699 700
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
701

702 703 704
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
705

706 707 708
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
709

710 711 712 713
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
714

715 716 717
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
718 719 720 721 722 723 724 725
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
726

727 728 729
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
730

731 732 733 734
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
735 736 737 738 739
    }
  }
}

static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
740 741
                                      SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
                                      __block_search_fn_t searchFn) {
742
  SQuery *pQuery = pRuntimeEnv->pQuery;
743

H
Haojun Liao 已提交
744 745 746 747
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime) {
    // todo opt
  }
748

H
Haojun Liao 已提交
749
  getNextTimeWindow(pQuery, pNextWin);
750

H
Haojun Liao 已提交
751 752 753 754 755
  // next time window is not in current block
  if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
    return -1;
  }
756

H
Haojun Liao 已提交
757 758 759 760 761
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    startKey = pNextWin->skey;
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
762
    }
H
Haojun Liao 已提交
763 764 765 766
  } else {
    startKey = pNextWin->ekey;
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
767
    }
H
Haojun Liao 已提交
768
  }
769

H
Haojun Liao 已提交
770
  int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
771

H
Haojun Liao 已提交
772 773 774 775 776 777
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNextWin->ekey) {
    TSKEY next = primaryKeys[startPos];
778

H
Haojun Liao 已提交
779 780 781 782
    pNextWin->ekey += ((next - pNextWin->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->skey = pNextWin->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNextWin->skey) {
    TSKEY next = primaryKeys[startPos];
783

H
Haojun Liao 已提交
784 785
    pNextWin->skey -= ((pNextWin->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->ekey = pNextWin->skey + pQuery->intervalTime - 1;
786
  }
787

H
Haojun Liao 已提交
788
  return startPos;
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
}

static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
804

805 806 807
  return ekey;
}

H
hjxilinx 已提交
808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
823
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
824 825 826
  if (pDataBlock == NULL) {
    return NULL;
  }
827
  char *dataBlock = NULL;
828

H
Haojun Liao 已提交
829
  SQuery *pQuery = pRuntimeEnv->pQuery;
830
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
831

832
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
833
  if (functionId == TSDB_FUNC_ARITHM) {
834
    sas->pArithExpr = &pQuery->pSelectExpr[col];
835

836 837 838 839 840 841
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
842

843 844 845 846
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
847

H
Haojun Liao 已提交
848 849 850 851
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

852
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
853
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
854
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
855
      SColumnInfo *pColMsg = &pQuery->colList[i];
856

857 858 859 860 861 862 863 864
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
865

866
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
867
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
868
    }
869

870
  } else {  // other type of query function
871
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
872
    if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
873 874
      dataBlock = NULL;
    } else {
H
hjxilinx 已提交
875
      dataBlock = getDataBlockImpl(pDataBlock, pCol->colId);
876 877
    }
  }
878

879 880 881 882
  return dataBlock;
}

/**
H
Haojun Liao 已提交
883
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
884 885
 * @param pRuntimeEnv
 * @param forwardStep
886
 * @param tsCols
887 888 889 890 891
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
892
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
893 894
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
895
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
896 897
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

898 899
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
900
  if (pDataBlock != NULL) {
901
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
902
    tsCols = (TSKEY *)(pColInfo->pData);
903
  }
904

905
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
906 907 908
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
909

910
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
911
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
912
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
913
  }
914

915
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
916
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && tsCols != NULL) {
917
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
918
    TSKEY   ts = tsCols[offset];
919

920
    bool hasTimeWindow = false;
921
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
922
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
923
      tfree(sasArray);
H
hjxilinx 已提交
924
      return;
925
    }
926

927 928 929 930
    if (hasTimeWindow) {
      TSKEY   ekey = reviseWindowEkey(pQuery, &win);
      int32_t forwardStep =
          getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
931

932 933 934
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, tsCols, pDataBlockInfo->rows);
    }
935

936 937
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
938

939
    while (1) {
940
      int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn);
941 942 943
      if (startPos < 0) {
        break;
      }
944

945
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
946
      hasTimeWindow = false;
947
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
948 949
        break;
      }
950

951 952 953 954 955 956
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
      int32_t forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
957

958
      SWindowStatus* pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
959
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
960
    }
961

962 963 964 965 966 967 968
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
969
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
970
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
971 972 973 974 975
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
976

977 978 979 980
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
981

982 983
    tfree(sasArray[i].data);
  }
984

985 986 987 988 989 990 991
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
992

993
  int32_t GROUPRESULTID = 1;
994

995
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
996

997 998 999 1000 1001 1002 1003 1004 1005 1006 1007
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
1008
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1009 1010 1011
  if (pWindowRes == NULL) {
    return -1;
  }
1012

1013 1014 1015
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

1016 1017 1018 1019 1020 1021
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1022

1023 1024 1025 1026 1027
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1028
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1029
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1030

1031
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1032 1033
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1034 1035
      continue;
    }
1036

1037
    int16_t colIndex = -1;
1038
    int32_t colId = pColIndex->colId;
1039

1040
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1041
      if (pQuery->colList[i].colId == colId) {
1042 1043 1044 1045
        colIndex = i;
        break;
      }
    }
1046

1047
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1048

1049 1050
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1051 1052 1053 1054 1055 1056
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
1057

1058 1059 1060 1061 1062 1063
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1064
  }
1065

1066
  return NULL;
1067 1068 1069 1070
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1071

1072 1073
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1074

1075 1076 1077 1078
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1079

1080 1081 1082
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1083 1084
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1085 1086
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1087

1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1101

1102 1103 1104 1105 1106
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1107
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1108 1109 1110 1111 1112

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1113
  
1114 1115 1116
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1117

1118
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1119 1120
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1121 1122 1123 1124 1125 1126

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1127
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1128
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1129 1130
    return false;
  }
1131

1132 1133 1134
  return true;
}

1135 1136
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1137
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1138
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1139

1140
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1141
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1142 1143 1144 1145

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1146 1147
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1148
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1149 1150 1151
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1152

1153 1154
  int16_t type = 0;
  int16_t bytes = 0;
1155

1156
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1157
  if (groupbyColumnValue) {
1158
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1159
  }
1160

1161
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1162
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1163
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1164
  }
1165

1166 1167
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1168
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1169 1170
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1171
  }
1172

1173
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1174

1175 1176 1177
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1178
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1179
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1180 1181
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1182

1183
  int32_t j = 0;
H
hjxilinx 已提交
1184
  int32_t offset = -1;
1185

1186
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1187
    offset = GET_COL_DATA_POS(pQuery, j, step);
1188

1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1199

1200
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1201 1202
      continue;
    }
1203

1204
    // interval window query
H
Haojun Liao 已提交
1205
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1206
      // decide the time window according to the primary timestamp
1207
      int64_t     ts = tsCols[offset];
1208
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1209

1210 1211
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1212 1213 1214
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1215

1216 1217 1218 1219
      if (!hasTimeWindow) {
        continue;
      }

1220 1221
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1222

1223 1224
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1225

1226 1227
      while (1) {
        getNextTimeWindow(pQuery, &nextWin);
H
Haojun Liao 已提交
1228
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1229
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1230
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1231 1232
          break;
        }
1233

1234 1235 1236
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1237

1238
        // null data, failed to allocate more memory buffer
1239 1240
        bool hasTimeWindow = false;
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1241 1242
          break;
        }
1243

1244 1245 1246 1247
        if (hasTimeWindow) {
          pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
        }
1248
      }
1249

1250 1251 1252
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1253
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1254
        char *val = groupbyColumnData + bytes * offset;
1255

H
hjxilinx 已提交
1256
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1257 1258 1259 1260
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1261

1262
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1263
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1264 1265 1266 1267 1268
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1269

1270 1271 1272
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1273
        setQueryStatus(pQuery, QUERY_COMPLETED);
1274 1275 1276 1277
        break;
      }
    }
  }
H
Haojun Liao 已提交
1278 1279 1280 1281 1282 1283 1284 1285

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1286 1287 1288 1289 1290
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1291

1292 1293
    tfree(sasArray[i].data);
  }
1294

1295 1296 1297 1298
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1299
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1300
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1301 1302 1303
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1304
  
H
Haojun Liao 已提交
1305
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1306
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1307
  } else {
1308
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1309
  }
1310

1311
  // update the lastkey of current table
1312
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1313
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1314

1315
  // interval query with limit applied
1316
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1317
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1318 1319 1320
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1321

1322 1323 1324 1325
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1326

1327 1328 1329
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1330

1331 1332 1333
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1334
    }
1335
  }
1336

1337
  return numOfRes;
1338 1339
}

H
Haojun Liao 已提交
1340
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1341 1342 1343 1344 1345 1346 1347
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
  pCtx->hasNull = hasNullValue(pQuery, colIndex, pBlockInfo->numOfCols, pStatis, &tpField);
1348
  pCtx->aInputElemBuf = inputData;
1349

1350
  if (tpField != NULL) {
H
Haojun Liao 已提交
1351
    pCtx->preAggVals.isSet  = true;
1352 1353
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1354 1355 1356
  } else {
    pCtx->preAggVals.isSet = false;
  }
1357

H
Haojun Liao 已提交
1358 1359 1360
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1361

1362 1363
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1364
    pCtx->ptsList = tsCol;
1365
  }
1366

1367 1368 1369 1370 1371
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1372
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1373
    /*
H
Haojun Liao 已提交
1374
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1385

1386 1387
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1388 1389 1390 1391 1392 1393
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1407
  }
1408

1409 1410 1411 1412 1413 1414
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1415
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1416 1417 1418
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1419
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1420 1421 1422 1423 1424 1425
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1426 1427 1428
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1429
  if (isSelectivityWithTagsQuery(pQuery)) {
1430
    int32_t num = 0;
1431
    int16_t tagLen = 0;
1432 1433
    
    SQLFunctionCtx *p = NULL;
1434
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1435

1436
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1437
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1438
      
1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1452 1453 1454 1455 1456 1457 1458
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
      tfree(pTagCtx); 
    }
1459 1460 1461
  }
}

H
Haojun Liao 已提交
1462
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
1463
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1464 1465
    assert(pQuery->pSelectExpr[i].interBytes <= DEFAULT_INTERN_BUF_PAGE_SIZE);
    
1466
    setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interBytes, isStableQuery);
1467 1468 1469
  }
}

1470
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1471
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1472 1473
  SQuery *pQuery = pRuntimeEnv->pQuery;

1474 1475
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1476

1477
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1478
    goto _clean;
1479
  }
1480

1481
  pRuntimeEnv->offset[0] = 0;
1482
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1483
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1484

1485
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1486
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1487

1488 1489
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1490
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1491 1492 1493 1494
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1495 1496 1497 1498
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1499
      
1500 1501 1502 1503
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1504
  
1505
    assert(isValidDataType(pCtx->inputType));
1506
    pCtx->ptsOutputBuf = NULL;
1507

1508 1509
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1510

1511 1512
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1513

1514 1515 1516 1517 1518 1519 1520 1521 1522 1523
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1524

1525 1526
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1527

1528
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1529
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1530
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1531

1532 1533 1534 1535
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1536

1537 1538
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1539

1540 1541 1542 1543
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1544

1545
  // set the intermediate result output buffer
1546
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
1547

1548
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1549
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1550 1551
    resetCtxOutputBuf(pRuntimeEnv);
  }
1552

H
Haojun Liao 已提交
1553
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1554
  return TSDB_CODE_SUCCESS;
1555

1556
_clean:
1557 1558
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1559

1560
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1561 1562 1563 1564 1565 1566
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1567

1568
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1569
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1570

1571
  qDebug("QInfo:%p teardown runtime env", pQInfo);
1572
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1573

1574
  if (pRuntimeEnv->pCtx != NULL) {
1575
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1576
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1577

1578 1579 1580
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1581

1582 1583 1584 1585
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
      tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
    }
1586

1587 1588 1589
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1590

H
Haojun Liao 已提交
1591
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1592

H
hjxilinx 已提交
1593
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1594
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1595
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1596

1597 1598 1599
  pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
}

1600
static bool isQueryKilled(SQInfo *pQInfo) {
1601
  return (pQInfo->code == TSDB_CODE_TSC_QUERY_CANCELLED);
1602 1603
}

1604
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
H
hjxilinx 已提交
1605

H
hjxilinx 已提交
1606
static bool isFixedOutputQuery(SQuery *pQuery) {
1607 1608 1609
  if (pQuery->intervalTime != 0) {
    return false;
  }
1610

1611 1612 1613 1614
  // Note:top/bottom query is fixed output query
  if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    return true;
  }
1615

1616
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1617
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1618

1619 1620
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1621
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1622 1623
      continue;
    }
1624

1625 1626 1627
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1628

1629 1630 1631 1632
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1633

1634 1635 1636
  return false;
}

1637
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1638
static bool isPointInterpoQuery(SQuery *pQuery) {
1639
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1640
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1641
    if (functionID == TSDB_FUNC_INTERP) {
1642 1643 1644
      return true;
    }
  }
1645

1646 1647 1648 1649
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1650
static bool isSumAvgRateQuery(SQuery *pQuery) {
1651
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1652
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1653 1654 1655
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1656

1657 1658 1659 1660 1661
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1662

1663 1664 1665
  return false;
}

H
hjxilinx 已提交
1666
static bool isFirstLastRowQuery(SQuery *pQuery) {
1667
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1668
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1669 1670 1671 1672
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1673

1674 1675 1676
  return false;
}

H
hjxilinx 已提交
1677
static bool needReverseScan(SQuery *pQuery) {
1678
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1679
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1680 1681 1682
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1683

1684
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1685 1686
      return true;
    }
1687 1688 1689 1690 1691

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1692
  }
1693

1694 1695
  return false;
}
H
hjxilinx 已提交
1696 1697 1698

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1699 1700 1701 1702 1703
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1704 1705 1706
      return false;
    }
  }
1707

H
hjxilinx 已提交
1708 1709 1710
  return true;
}

1711 1712
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1713
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1714
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1715
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1716 1717 1718 1719 1720

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1721 1722 1723 1724
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1725 1726
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1727 1728 1729 1730 1731
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1732
    pQuery->checkBuffer = 0;
1733
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1734
    pQuery->checkBuffer = 0;
1735 1736
  } else {
    bool hasMultioutput = false;
1737
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1738
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1739 1740 1741
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1742

1743 1744 1745 1746 1747
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1748

1749
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1750 1751 1752 1753 1754 1755
  }
}

/*
 * todo add more parameters to check soon..
 */
1756
bool colIdCheck(SQuery *pQuery) {
1757 1758
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1759
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1760
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1761 1762 1763
      return false;
    }
  }
1764
  
1765 1766 1767 1768 1769 1770
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1771
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1772
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1773

1774 1775 1776 1777
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1778

1779 1780 1781 1782
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1783

1784 1785 1786 1787 1788 1789 1790
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
    }
  }
}

H
Haojun Liao 已提交
1805 1806 1807
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1808 1809 1810
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1811

1812 1813 1814
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1815
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1816
           pQuery->order.order, TSDB_ORDER_DESC);
1817

1818
    pQuery->order.order = TSDB_ORDER_DESC;
1819

1820 1821
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1822

1823 1824
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1825

1826 1827
    return;
  }
1828

1829 1830
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1831
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1832
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1833 1834
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1835

1836
    pQuery->order.order = TSDB_ORDER_ASC;
1837 1838
    return;
  }
1839

1840 1841 1842
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1843
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1844 1845
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1846
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1847
        doExchangeTimeWindow(pQInfo);
1848
      }
1849

1850
      pQuery->order.order = TSDB_ORDER_ASC;
1851 1852
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1853
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1854 1855
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1856
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1857
        doExchangeTimeWindow(pQInfo);
1858
      }
1859

1860
      pQuery->order.order = TSDB_ORDER_DESC;
1861
    }
1862

1863
  } else {  // interval query
1864
    if (stableQuery) {
1865 1866
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1867
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1868 1869
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1870 1871
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1872

1873
        pQuery->order.order = TSDB_ORDER_ASC;
1874 1875
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1876
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1877 1878
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1879 1880
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1881

1882
        pQuery->order.order = TSDB_ORDER_DESC;
1883 1884 1885 1886 1887 1888 1889 1890
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1891

1892
  int32_t num = 0;
1893

1894 1895
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1896
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1897
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1898
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1899 1900
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1901
  }
1902

1903 1904 1905 1906
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1907
#define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pSelectExpr[1].base.arg->argValue.i64:1)
1908

H
Haojun Liao 已提交
1909 1910
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1911
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1912 1913 1914 1915
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1916

H
Haojun Liao 已提交
1917 1918 1919
  SQuery    *pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = GET_RES_BUF_PAGE_BY_ID(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
  int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
1920

H
Haojun Liao 已提交
1921
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
1922
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1923 1924 1925 1926 1927 1928
}

/**
 * decrease the refcount for each table involved in this query
 * @param pQInfo
 */
1929
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
1930
  if (pQInfo != NULL) {
1931
    //    assert(taosHashGetSize(pQInfo->tableqinfoGroupInfo) >= 1);
1932 1933 1934
  }

#if 0
1935
  if (pQInfo == NULL || pQInfo->tableqinfoGroupInfo.numOfTables == 1) {
1936
    atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
1937
    qDebug("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
1938 1939 1940
           pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
  } else {
    int32_t num = 0;
1941 1942
    for (int32_t i = 0; i < pQInfo->tableqinfoGroupInfo.numOfTables; ++i) {
      SMeterObj *pMeter = getMeterObj(pQInfo->tableqinfoGroupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
1943
      atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
1944

1945
      if (pMeter->numOfQueries > 0) {
1946
        qDebug("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
1947 1948 1949 1950
               pMeter->meterId, pMeter->numOfQueries);
        num++;
      }
    }
1951

1952 1953 1954 1955
    /*
     * in order to reduce log output, for all meters of which numOfQueries count are 0,
     * we do not output corresponding information
     */
1956
    num = pQInfo->tableqinfoGroupInfo.numOfTables - num;
1957
    qDebug("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
1958
           pQInfo->tableqinfoGroupInfo.numOfTables, num);
1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971
  }
#endif
}

static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
                                int32_t numOfTotalPoints) {
  if (pDataStatis == NULL) {
    return true;
  }

#if 0
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1972
    int32_t                  colIndex = pFilterInfo->info.colIndex;
1973

1974 1975 1976 1977
    // this column not valid in current data block
    if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
      continue;
    }
1978

1979 1980 1981 1982
    // not support pre-filter operation on binary/nchar data type
    if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
      continue;
    }
1983

1984 1985 1986 1987
    // all points in current column are NULL, no need to check its boundary value
    if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
      continue;
    }
1988

1989 1990 1991
    if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataStatis[colIndex].min);
      float maxval = *(double *)(&pDataStatis[colIndex].max);
1992

1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
                                        (char *)&pDataStatis[colIndex].max)) {
          return true;
        }
      }
    }
  }
2007

2008
  // todo disable this opt code block temporarily
2009
  //  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2010
  //    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
2011 2012 2013 2014
  //    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
  //      return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
  //    }
  //  }
2015

2016 2017 2018 2019 2020 2021 2022
#endif
  return true;
}

// previous time window may not be of the same size of pQuery->intervalTime
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
2023

2024 2025 2026 2027
  pTimeWindow->skey += (pQuery->slidingTime * factor);
  pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
}

H
hjxilinx 已提交
2028
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis) {
2029
  SQuery *pQuery = pRuntimeEnv->pQuery;
2030 2031 2032 2033

  uint32_t r = 0;
  SArray * pDataBlock = NULL;

2034 2035 2036
  if (pQuery->numOfFilterCols > 0) {
    r = BLK_DATA_ALL_NEEDED;
  } else {
2037
    // check if this data block is required to load
2038
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2039 2040 2041 2042
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
      
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
2043
      r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
2044
    }
2045

H
Haojun Liao 已提交
2046
    if (pRuntimeEnv->pTSBuf > 0 || QUERY_IS_INTERVAL_QUERY(pQuery)) {
2047 2048 2049
      r |= BLK_DATA_ALL_NEEDED;
    }
  }
2050

2051
  if (r == BLK_DATA_NO_NEEDED) {
2052
    qDebug("QInfo:%p data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
2053
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2054 2055
    pRuntimeEnv->summary.discardBlocks += 1;
  } else if (r == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2056
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2057
      //        return DISK_DATA_LOAD_FAILED;
2058
    }
2059 2060 2061 2062
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
hjxilinx 已提交
2063
      pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2064
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2065 2066 2067
    }
  } else {
    assert(r == BLK_DATA_ALL_NEEDED);
2068 2069 2070
  
    // load the data block statistics to perform further filter
    pRuntimeEnv->summary.loadBlockStatis +=1;
H
hjxilinx 已提交
2071
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2072
    }
2073 2074
    
    if (!needToLoadDataBlock(pQuery,*pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2075
#if defined(_DEBUG_VIEW)
2076
      qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2077
#endif
2078 2079
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
2080 2081
      //        return DISK_DATA_DISCARDED;
    }
2082
  
2083
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2084
    pRuntimeEnv->summary.loadBlocks += 1;
H
hjxilinx 已提交
2085
    pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2086
  }
2087

2088 2089 2090
  return pDataBlock;
}

H
hjxilinx 已提交
2091
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2092
  int32_t midPos = -1;
H
Haojun Liao 已提交
2093
  int32_t numOfRows;
2094

2095 2096 2097
  if (num <= 0) {
    return -1;
  }
2098

2099
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2100 2101

  TSKEY * keyList = (TSKEY *)pValue;
2102
  int32_t firstPos = 0;
2103
  int32_t lastPos = num - 1;
2104

2105
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2106 2107 2108 2109 2110
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2111

H
Haojun Liao 已提交
2112 2113
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2114

H
hjxilinx 已提交
2115 2116 2117 2118 2119 2120 2121 2122
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2123

H
hjxilinx 已提交
2124 2125 2126 2127 2128
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2129

H
hjxilinx 已提交
2130 2131 2132 2133 2134 2135 2136
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2137

H
Haojun Liao 已提交
2138 2139
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2140

H
hjxilinx 已提交
2141 2142 2143 2144 2145 2146 2147 2148 2149
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2150

H
hjxilinx 已提交
2151 2152 2153
  return midPos;
}

2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2176
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2177 2178 2179 2180 2181
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2182 2183 2184
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2185
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pQuery)) {
2186 2187 2188 2189 2190 2191 2192 2193
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2194 2195
        assert(bytes > 0 && newSize > 0);

2196 2197 2198 2199
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
H
Hongze Cheng 已提交
2200
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (newSize - pRec->rows) * bytes);
2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2213
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2214 2215 2216 2217 2218 2219 2220
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2221 2222
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2223
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2224
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2225

2226
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2227 2228
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2229

2230
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2231

H
Haojun Liao 已提交
2232
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
2233
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2234
    summary->totalBlocks += 1;
2235
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
2236
      return 0;
2237
    }
2238

H
Haojun Liao 已提交
2239
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2240

2241
    // todo extract methods
H
Haojun Liao 已提交
2242
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2243
      STimeWindow w = TSWINDOW_INITIALIZER;
2244 2245
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2246
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2247
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
2248 2249 2250 2251
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2252
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
2253

H
hjxilinx 已提交
2254
        pWindowResInfo->startTime = pQuery->window.skey;
2255 2256 2257
        pWindowResInfo->prevSKey = w.skey;
      }
    }
2258

H
hjxilinx 已提交
2259
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2260
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2261

2262
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2263
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
2264

H
Haojun Liao 已提交
2265 2266
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2267
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2268

H
Haojun Liao 已提交
2269
    summary->totalRows += blockInfo.rows;
2270
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2271
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2272

2273 2274
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2275
      break;
2276 2277
    }
  }
2278

H
hjxilinx 已提交
2279
  // if the result buffer is not full, set the query complete
2280 2281 2282
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2283

H
Haojun Liao 已提交
2284
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2285
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
2286
//      int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;
2287

2288
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
Haojun Liao 已提交
2289
//      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2290
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2291 2292 2293 2294
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2295

2296
  return 0;
2297 2298 2299 2300 2301 2302
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2303
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2304
  tVariantDestroy(tag);
2305

2306
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2307
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2308 2309 2310
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2311
  } else {
2312
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2313 2314 2315 2316
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2317 2318
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2319
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2320 2321 2322 2323
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2324
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2325
    } else {
H
Haojun Liao 已提交
2326 2327 2328 2329 2330
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2331
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2332
    }
2333
  }
2334 2335
}

2336
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2337
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2338
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2339

H
[td-90]  
Haojun Liao 已提交
2340 2341 2342
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2343 2344 2345 2346 2347 2348 2349 2350 2351 2352

    // todo refactor extract function.
    int16_t type = -1, bytes = -1;
    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
        type = pQuery->tagColList[i].type;
        bytes = pQuery->tagColList[i].bytes;
      }
    }

2353
    doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2354 2355
  } else {
    // set tag value, by which the results are aggregated.
2356
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2357
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2358
  
2359
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2360
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2361 2362
        continue;
      }
2363

2364
      // todo use tag column index to optimize performance
2365
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2366
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2367
    }
2368

2369
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2370
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2371
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2372 2373
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2374 2375 2376 2377 2378 2379 2380 2381 2382 2383

      // todo refactor
      int16_t type = -1, bytes = -1;
      for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
        if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
          type = pQuery->tagColList[i].type;
          bytes = pQuery->tagColList[i].bytes;
        }
      }

2384
      doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2385
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2386
          pRuntimeEnv->pCtx[0].tag.i64Key)
2387 2388 2389 2390 2391 2392 2393
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2394

2395
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2396
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2397 2398 2399
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2400

2401 2402 2403
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2404

2405 2406 2407
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2408

2409 2410 2411
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2412 2413 2414 2415 2416 2417 2418 2419
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2420 2421
    }
  }
2422

2423
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2424
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2425 2426 2427
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2428

2429 2430 2431 2432
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2433
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2502
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2503
  SQuery* pQuery = pRuntimeEnv->pQuery;
2504
  int32_t numOfCols = pQuery->numOfOutput;
2505
  printf("super table query intermediate result, total:%d\n", numOfRows);
2506

2507 2508
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2509
      
2510
      switch (pQuery->pSelectExpr[i].type) {
2511
        case TSDB_DATA_TYPE_BINARY: {
2512 2513 2514 2515 2516
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2517
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2518 2519 2520 2521 2522
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2523
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2524 2525
          break;
        case TSDB_DATA_TYPE_INT:
2526
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2527 2528
          break;
        case TSDB_DATA_TYPE_FLOAT:
2529
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2530 2531
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2532
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2533 2534 2535 2536 2537 2538 2539 2540
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2541 2542 2543
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2544 2545 2546 2547 2548
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2549

2550 2551
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2552

2553 2554
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2555

2556 2557 2558 2559
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2560

2561 2562 2563 2564
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2565

H
hjxilinx 已提交
2566
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2567
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2568

2569 2570
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2571

H
hjxilinx 已提交
2572
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2573
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2574

2575 2576
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2577

2578 2579 2580
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2581

2582 2583 2584
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2585
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2586
  int64_t st = taosGetTimestampMs();
2587
  int32_t ret = TSDB_CODE_SUCCESS;
2588

H
Haojun Liao 已提交
2589
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
2590

2591
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2592
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2593
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2594 2595 2596 2597
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2598
    pQInfo->groupIndex += 1;
2599 2600

    // this group generates at least one result, return results
2601 2602 2603
    if (ret > 0) {
      break;
    }
2604 2605

    assert(pQInfo->numOfGroupResultPages == 0);
2606
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2607
  }
2608

2609
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2610
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2611

2612 2613 2614 2615 2616 2617
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2618

2619
    // current results of group has been sent to client, try next group
2620
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2621 2622
      return;  // failed to save data in the disk
    }
2623

2624
    // check if all results has been sent to client
H
Haojun Liao 已提交
2625
    int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
2626
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2627
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2628 2629
      return;
    }
2630
  }
2631 2632

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2633
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2634

2635
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2636
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2637

2638 2639
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
H
Haojun Liao 已提交
2640
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[i]);
2641
    total += pData->num;
2642
  }
2643

2644
  int32_t rows = total;
2645

2646 2647
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
H
Haojun Liao 已提交
2648
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[num]);
2649

2650
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2651
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2652
      char *  pDest = pQuery->sdata[i]->data;
2653

2654 2655
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2656
    }
2657

2658
    offset += pData->num;
2659
  }
2660

2661
  assert(pQuery->rec.rows == 0);
2662

2663
  pQuery->rec.rows += rows;
2664 2665 2666
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2667
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2668
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2669
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2670

2671 2672 2673 2674 2675 2676 2677
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2678

2679
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2680
    assert(pResultInfo != NULL);
2681

H
Haojun Liao 已提交
2682 2683
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2684 2685
    }
  }
2686

H
Haojun Liao 已提交
2687
  return 0;
2688 2689
}

2690
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2691
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2692
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2693

2694
  size_t size = taosArrayGetSize(pGroup);
2695
  tFilePage **buffer = pQuery->sdata;
2696

2697
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2698
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2699

2700 2701 2702 2703 2704
  if (pTableList == NULL || posList == NULL) {
    tfree(posList);
    tfree(pTableList);

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2705
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2706 2707
  }

2708
  // todo opt for the case of one table per group
2709
  int32_t numOfTables = 0;
2710
  for (int32_t i = 0; i < size; ++i) {
2711
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2712

H
Haojun Liao 已提交
2713
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
2714 2715
    if (list.size > 0 && item->windowResInfo.size > 0) {
      pTableList[numOfTables] = item;
2716
      numOfTables += 1;
2717 2718
    }
  }
2719

2720
  if (numOfTables == 0) {
2721 2722
    tfree(posList);
    tfree(pTableList);
2723

2724 2725
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2726
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2727

2728
  }
2729

2730
  SCompSupporter cs = {pTableList, posList, pQInfo};
2731

2732
  SLoserTreeInfo *pTree = NULL;
2733
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2734

2735
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2736 2737 2738 2739
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

2740 2741
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2742

2743 2744
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2745

2746 2747
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2748

H
hjxilinx 已提交
2749
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2750
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2751

2752 2753
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2754

2755
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2756
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2757 2758
    if (num <= 0) {
      cs.position[pos] += 1;
2759

2760 2761
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2762

2763
        // all input sources are exhausted
2764
        if (--numOfTables == 0) {
2765 2766 2767 2768 2769 2770 2771
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2772
        if (buffer[0]->num == pQuery->rec.capacity) {
2773 2774 2775
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2776

2777 2778
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2779

2780
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2781
        buffer[0]->num += 1;
2782
      }
2783

2784
      lastTimestamp = ts;
2785

2786 2787 2788
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2789

2790
        // all input sources are exhausted
2791
        if (--numOfTables == 0) {
2792 2793 2794 2795
          break;
        }
      }
    }
2796

2797 2798
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2799

2800
  if (buffer[0]->num != 0) {  // there are data in buffer
2801
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2802
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2803

2804 2805 2806 2807
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2808

2809 2810 2811
      return -1;
    }
  }
2812

2813 2814 2815
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2816
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2817
#endif
2818

2819
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2820

2821 2822
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2823
  tfree(pTree);
2824

2825
  pQInfo->offset = 0;
2826
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2827 2828
    tfree(pResultInfo[i].interResultBuf);
  }
2829

2830 2831 2832 2833 2834
  tfree(pResultInfo);
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2835 2836 2837
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2838
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2839
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2840

2841 2842
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2843

2844
  int32_t remain = pQuery->sdata[0]->num;
2845
  int32_t offset = 0;
2846

2847 2848 2849 2850 2851
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2852

2853
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2854
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2855

2856
    // pagewise copy to dest buffer
2857
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2858
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2859
      buf->num = r;
2860

2861 2862
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2863
    }
2864

2865 2866 2867
    offset += r;
    remain -= r;
  }
2868

2869 2870 2871 2872 2873
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2874
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2875
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2876 2877 2878
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2879

2880
    pQuery->sdata[k]->num = 0;
2881 2882 2883
  }
}

2884 2885 2886 2887 2888 2889 2890
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2891 2892 2893 2894 2895 2896 2897
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2898 2899 2900 2901 2902 2903 2904 2905
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
2906 2907 2908

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
2909 2910 2911 2912 2913
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2914 2915 2916 2917 2918
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2919

2920
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2921

2922
    // open/close the specified query for each group result
2923
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2924
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2925

2926 2927
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2928 2929 2930 2931 2932 2933 2934 2935
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2936 2937
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2938
  SQuery *pQuery = pRuntimeEnv->pQuery;
2939
  int32_t order = pQuery->order.order;
2940

2941 2942
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
2943
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
2944
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2945
  } else {  // for simple result of table query,
2946
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2947
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2948

2949
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2950 2951 2952
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2953

2954 2955
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2956 2957 2958 2959 2960 2961
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2962
  
H
Haojun Liao 已提交
2963
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
hjxilinx 已提交
2964 2965
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
2966
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
hjxilinx 已提交
2967 2968 2969
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
2970 2971
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
hjxilinx 已提交
2972 2973
    }
  }
2974 2975
}

2976
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2977
  SQuery *pQuery = pRuntimeEnv->pQuery;
2978
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2979
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2980 2981 2982 2983
  }
}

void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
2984
  int32_t numOfCols = pQuery->numOfOutput;
2985

2986 2987
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2988

2989 2990 2991 2992 2993 2994
  // set the intermediate result output buffer
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2995

2996
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2997 2998
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2999

3000 3001 3002 3003 3004 3005
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3006

3007
    // set the timestamp output buffer for top/bottom/diff query
3008
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3009 3010 3011
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3012

3013
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
3014
  }
3015

3016 3017 3018 3019 3020
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3021

3022
  // reset the execution contexts
3023
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3024
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3025
    assert(functionId != TSDB_FUNC_DIFF);
3026

3027 3028 3029 3030
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3031

3032 3033 3034 3035 3036 3037 3038 3039 3040 3041
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
3042

3043 3044 3045 3046 3047 3048
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3049

3050
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3051
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3052
    pRuntimeEnv->pCtx[j].currentStage = 0;
3053

H
Haojun Liao 已提交
3054 3055 3056 3057
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3058

3059 3060 3061 3062
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3063
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3064
  SQuery *pQuery = pRuntimeEnv->pQuery;
3065
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3066 3067
    return;
  }
3068

3069
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3070
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3071 3072
        pQuery->limit.offset - pQuery->rec.rows);
    
3073 3074
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3075

3076
    resetCtxOutputBuf(pRuntimeEnv);
3077

H
Haojun Liao 已提交
3078
    // clear the buffer full flag if exists
3079
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3080
  } else {
3081
    int64_t numOfSkip = pQuery->limit.offset;
3082
    pQuery->rec.rows -= numOfSkip;
3083 3084
    pQuery->limit.offset = 0;
  
3085
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3086 3087
           0, pQuery->rec.rows);
    
3088
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3089
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3090
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3091
      
H
Haojun Liao 已提交
3092 3093
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3094

3095
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3096
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3097 3098
      }
    }
3099

3100
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
3101 3102 3103 3104 3105 3106 3107 3108
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3109
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3110 3111 3112 3113 3114 3115
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3116

H
hjxilinx 已提交
3117
  bool toContinue = false;
H
Haojun Liao 已提交
3118
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3119 3120
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3121

3122 3123 3124 3125 3126
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3127

3128
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3129

3130
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3131
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3132 3133 3134
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3135

3136 3137
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3138

3139 3140 3141 3142
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3143
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3144
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3145 3146 3147
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3148

3149 3150
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3151

3152 3153 3154
      toContinue |= (!pResInfo->complete);
    }
  }
3155

3156 3157 3158
  return toContinue;
}

H
Haojun Liao 已提交
3159
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3160
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3161 3162
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3163 3164 3165
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3166
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3167
      .status      = pQuery->status,
3168
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3169
      .lastKey     = start,
H
hjxilinx 已提交
3170
      .w           = pQuery->window,
H
Haojun Liao 已提交
3171
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3172
  };
3173

3174 3175 3176
  return info;
}

3177 3178 3179 3180
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3181 3182 3183 3184 3185
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3186

3187
  // reverse order time range
3188 3189 3190
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3191
  SWITCH_ORDER(pQuery->order.order);
3192
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3193

3194
  STsdbQueryCond cond = {
3195
      .twindow = pQuery->window,
H
hjxilinx 已提交
3196
      .order   = pQuery->order.order,
3197
      .colList = pQuery->colList,
3198 3199
      .numOfCols = pQuery->numOfCols,
  };
3200

3201 3202 3203 3204
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3205

3206 3207
  // add ref for table
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3208

3209 3210
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3211
  disableFuncInReverseScan(pQInfo);
3212 3213
}

3214 3215
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3216
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3217

3218 3219
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3220

3221 3222 3223 3224
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3225

3226
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3227

3228 3229
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3230
  pTableQueryInfo->lastKey = pStatus->lastKey;
3231
  pQuery->status = pStatus->status;
3232
  
H
hjxilinx 已提交
3233
  pTableQueryInfo->win = pStatus->w;
3234
  pQuery->window = pTableQueryInfo->win;
3235 3236
}

3237
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3238
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3239
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3240 3241
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3242
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3243

3244
  // store the start query position
H
Haojun Liao 已提交
3245
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3246

3247 3248
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3249

3250 3251
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3252

3253 3254
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3255
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3256
      qstatus.lastKey = pTableQueryInfo->lastKey;
3257
    }
3258

3259
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3260
      // restore the status code and jump out of loop
3261
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3262
        pQuery->status = qstatus.status;
3263
      }
3264

3265 3266
      break;
    }
3267

3268
    STsdbQueryCond cond = {
3269
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3270
        .order   = pQuery->order.order,
3271
        .colList = pQuery->colList,
3272
        .numOfCols = pQuery->numOfCols,
3273
    };
3274

3275 3276
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3277
    }
3278

3279
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3280
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3281

3282 3283
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3284
    
3285
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3286
        cond.twindow.skey, cond.twindow.ekey);
3287

3288
    // check if query is killed or not
3289
    if (isQueryKilled(pQInfo)) {
3290 3291 3292
      return;
    }
  }
3293

H
hjxilinx 已提交
3294
  if (!needReverseScan(pQuery)) {
3295 3296
    return;
  }
3297

3298
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3299

3300
  // reverse scan from current position
3301
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3302
  doScanAllDataBlocks(pRuntimeEnv);
3303 3304

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3305 3306
}

H
hjxilinx 已提交
3307
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3308
  SQuery *pQuery = pRuntimeEnv->pQuery;
3309

H
Haojun Liao 已提交
3310
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3311 3312
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3313
    if (pRuntimeEnv->groupbyNormalCol) {
3314 3315
      closeAllTimeWindow(pWindowResInfo);
    }
3316

3317 3318 3319 3320 3321
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3322

3323
      setWindowResOutputBuf(pRuntimeEnv, buf);
3324

3325
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3326
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3327
      }
3328

3329 3330 3331 3332 3333 3334
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3335

3336
  } else {
3337
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3338
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3339 3340 3341 3342 3343
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3344
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3345
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3346

3347 3348 3349 3350
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3351

3352 3353 3354
  return false;
}

H
Haojun Liao 已提交
3355
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3356
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3357

H
Haojun Liao 已提交
3358
  STableQueryInfo *pTableQueryInfo = buf;
3359

H
hjxilinx 已提交
3360 3361
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3362

3363
  pTableQueryInfo->pTable = pTable;
3364
  pTableQueryInfo->cur.vgroupIndex = -1;
3365

H
Haojun Liao 已提交
3366 3367
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3368 3369 3370 3371
    int32_t initialSize = 20;
    int32_t initialThreshold = 100;
    initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3372 3373
  }

3374 3375 3376
  return pTableQueryInfo;
}

3377
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3378 3379 3380
  if (pTableQueryInfo == NULL) {
    return;
  }
3381

3382 3383 3384
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
}

H
Haojun Liao 已提交
3385 3386 3387 3388 3389 3390 3391
#define SET_CURRENT_QUERY_TABLE_INFO(_runtime, _tableInfo)                                      \
  do {                                                                                          \
    SQuery *_query = (_runtime)->pQuery;                                                        \
    _query->current = _tableInfo;                                                               \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_query)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_query))); \
  } while (0)
3392 3393 3394 3395

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3396
 * @param pDataBlockInfo
3397
 */
H
Haojun Liao 已提交
3398
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3399
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3400 3401 3402
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3403 3404
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3405 3406 3407 3408

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3409

H
Haojun Liao 已提交
3410 3411 3412
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3413

H
Haojun Liao 已提交
3414
  int32_t GROUPRESULTID = 1;
3415 3416
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3417 3418 3419
  if (pWindowRes == NULL) {
    return;
  }
3420

3421 3422 3423 3424 3425 3426 3427 3428 3429 3430
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3431

H
Haojun Liao 已提交
3432 3433
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3434 3435 3436 3437
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3438
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3439
  SQuery *pQuery = pRuntimeEnv->pQuery;
3440

3441
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3442
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3443 3444
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3445

3446
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3447 3448 3449
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3450

3451 3452 3453 3454 3455
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3456

3457 3458 3459 3460 3461 3462
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3463 3464
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3465

H
Haojun Liao 已提交
3466 3467 3468 3469 3470
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3471
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3472 3473
      continue;
    }
3474

H
Haojun Liao 已提交
3475 3476
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3477

H
Haojun Liao 已提交
3478 3479 3480 3481
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3482

H
Haojun Liao 已提交
3483 3484 3485 3486 3487
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3488

H
Haojun Liao 已提交
3489 3490 3491 3492 3493 3494
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3495
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3496
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3497

3498
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3499

3500 3501
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3502
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3503
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3504

3505
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3506

3507 3508 3509 3510 3511 3512
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3513

3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3526
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3527 3528
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3529 3530
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3531 3532 3533
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3534
    pTableQueryInfo->win.skey = key;
3535
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3536

3537 3538 3539 3540 3541
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3542

3543 3544 3545 3546 3547 3548
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3549
    STimeWindow     w = TSWINDOW_INITIALIZER;
3550
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3551

H
Haojun Liao 已提交
3552 3553
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3554
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3555
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3556

3557 3558
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3559
        assert(win.ekey == pQuery->window.ekey);
3560
      }
3561 3562
      
      pWindowResInfo->prevSKey = w.skey;
3563
    }
3564

3565
    pTableQueryInfo->queryRangeSet = 1;
3566
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3567 3568 3569 3570
  }
}

bool requireTimestamp(SQuery *pQuery) {
3571
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3572
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3586 3587 3588
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3589 3590
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3591 3592 3593 3594 3595
  return loadPrimaryTS;
}

static int32_t getNumOfSubset(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3596

3597
  int32_t totalSubset = 0;
H
Haojun Liao 已提交
3598
  if (pQInfo->runtimeEnv.groupbyNormalCol || (QUERY_IS_INTERVAL_QUERY(pQuery))) {
3599 3600
    totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  } else {
H
Haojun Liao 已提交
3601
    totalSubset = GET_NUM_OF_TABLEGROUP(pQInfo);
3602
  }
3603

3604 3605 3606 3607 3608 3609
  return totalSubset;
}

static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3610

3611 3612 3613
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3614

3615
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3616
  int32_t totalSubset = getNumOfSubset(pQInfo);
3617

3618
  if (orderType == TSDB_ORDER_ASC) {
3619
    startIdx = pQInfo->groupIndex;
3620 3621
    step = 1;
  } else {  // desc order copy all data
3622
    startIdx = totalSubset - pQInfo->groupIndex - 1;
3623 3624
    step = -1;
  }
3625

3626 3627 3628
  for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3629
      pQInfo->groupIndex += 1;
3630 3631
      continue;
    }
3632

dengyihao's avatar
dengyihao 已提交
3633
    assert(pQInfo->offset <= 1);
3634

3635 3636
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3637

3638 3639 3640 3641
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3642 3643 3644 3645 3646
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3647
      pQInfo->groupIndex += 1;
3648
    }
3649

3650
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3651
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3652

3653 3654 3655 3656
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3657

3658
    numOfResult += numOfRowsToCopy;
3659 3660 3661
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3662
  }
3663

3664
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3665 3666

#ifdef _DEBUG_VIEW
3667
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3683

3684
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3685
  int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
3686

3687
  pQuery->rec.rows += numOfResult;
3688

3689
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3690 3691
}

H
Haojun Liao 已提交
3692
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3693
  SQuery *pQuery = pRuntimeEnv->pQuery;
3694

3695
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3696
  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
3697

3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708
    for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
      SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
        if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
          continue;
        }

        pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
      }
3709 3710 3711 3712
    }
  }
}

3713 3714
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3715
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3716 3717
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3718
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3719
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3720

H
Haojun Liao 已提交
3721
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3722
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3723
  } else {
3724
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3725
  }
3726

H
hjxilinx 已提交
3727
  updateWindowResNumOfRes(pRuntimeEnv, pTableQueryInfo);
3728 3729
}

3730 3731 3732
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3733

H
Haojun Liao 已提交
3734
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3735 3736
    return false;
  }
3737

3738
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762
    // There are results not returned to client yet, so filling operation applied to the remain result is required
    // in the first place.
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3763
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3764 3765 3766
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3767
  }
3768 3769

  return false;
3770 3771 3772
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3773
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3774

3775 3776
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3777

3778 3779 3780
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3781

weixin_48148422's avatar
weixin_48148422 已提交
3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3794 3795
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3796
    if (pQInfo->runtimeEnv.stableQuery) {
3797
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3798 3799 3800
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3801 3802 3803
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3804
    }
H
hjxilinx 已提交
3805
  }
3806 3807
}

H
Haojun Liao 已提交
3808
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3809
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3810
  SQuery *pQuery = pRuntimeEnv->pQuery;
3811 3812
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3813
  while (1) {
3814
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3815
    
3816
    // todo apply limit output function
3817 3818
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3819
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3820 3821
      return ret;
    }
3822

3823
    if (pQuery->limit.offset < ret) {
3824
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3825 3826
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3827 3828 3829
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3830 3831 3832
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3833
      }
3834
      
3835 3836 3837
      pQuery->limit.offset = 0;
      return ret;
    } else {
3838
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3839
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3840 3841
          pQuery->limit.offset - ret);
      
3842
      pQuery->limit.offset -= ret;
3843
      pQuery->rec.rows = 0;
3844 3845
      ret = 0;
    }
3846 3847

    if (!queryHasRemainResults(pRuntimeEnv)) {
3848 3849 3850 3851 3852
      return ret;
    }
  }
}

3853
static void queryCostStatis(SQInfo *pQInfo) {
3854
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3855 3856 3857 3858 3859 3860 3861
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
3862
//  qDebug("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
3863 3864
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
3865
//  qDebug("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
3866 3867 3868
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
3869
//  qDebug(
3870 3871 3872
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3873
  
3874
  qDebug("QInfo:%p :cost summary: elpased time:%"PRId64" us, total blocks:%d, use block statis:%d, use block data:%d, "
3875 3876
         "total rows:%"PRId64 ", check rows:%"PRId64, pQInfo, pSummary->elapsedTime, pSummary->totalBlocks,
         pSummary->loadBlockStatis, pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3877

3878
//  qDebug("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
3879
//
3880 3881
//  qDebug("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qDebug("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
3882 3883 3884
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3885
  
3886 3887
//  double computing = total - io;
//
3888
//  qDebug(
3889 3890 3891 3892 3893
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3894 3895
}

3896 3897
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3898 3899
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3900
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3901

3902
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3903
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3904 3905 3906
    pQuery->limit.offset = 0;
    return;
  }
3907

3908 3909 3910 3911 3912
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3913

3914
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3915

3916
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3917
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3918

3919
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3920
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3921 3922

  // update the offset value
H
hjxilinx 已提交
3923
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3924
  pQuery->limit.offset = 0;
3925

H
hjxilinx 已提交
3926
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3927

3928
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
3929
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3930
}
3931

3932 3933 3934 3935 3936
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3937
  }
3938

3939 3940 3941
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3942
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3943
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3944

H
Haojun Liao 已提交
3945
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
3946 3947 3948
  while (tsdbNextDataBlock(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      return;
3949
    }
3950

H
Haojun Liao 已提交
3951
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
3952

3953 3954
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3955 3956
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3957

3958
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3959 3960
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3961 3962 3963
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3964
  }
3965
}
3966

H
Haojun Liao 已提交
3967
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3968
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3969
  *start = pQuery->current->lastKey;
3970

3971
  // if queried with value filter, do NOT forward query start position
3972
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3973
    return true;
3974
  }
3975

3976 3977 3978 3979 3980
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3981
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3982

H
Haojun Liao 已提交
3983
  STimeWindow w = TSWINDOW_INITIALIZER;
3984
  
3985
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3986
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3987

H
Haojun Liao 已提交
3988
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
3989
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
3990
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
3991

H
Haojun Liao 已提交
3992 3993
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3994
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
3995 3996 3997
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3998
    } else {
H
Haojun Liao 已提交
3999
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4000

4001 4002 4003
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4004

4005 4006
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4007

4008 4009 4010 4011 4012 4013
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4014

4015 4016
      STimeWindow tw = win;
      getNextTimeWindow(pQuery, &tw);
4017

4018
      if (pQuery->limit.offset == 0) {
4019 4020
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4021 4022
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4023 4024 4025
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4026 4027 4028
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
4029 4030 4031 4032
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
4033 4034 4035 4036 4037 4038
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4039
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4040 4041
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4042
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4043 4044
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4045
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4046 4047
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4048
          return true;
H
Haojun Liao 已提交
4049 4050 4051 4052
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4053
          return true;
4054 4055 4056
        }
      }

H
Haojun Liao 已提交
4057 4058 4059 4060 4061 4062 4063
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4076
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4077 4078
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4079
      } else {
H
Haojun Liao 已提交
4080
        break;  // offset is not 0, and next time window begins or ends in the next block.
4081 4082 4083
      }
    }
  }
4084

4085 4086 4087
  return true;
}

B
Bomin Zhang 已提交
4088 4089
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4090 4091
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4092 4093 4094 4095
  if (onlyQueryTags(pQuery)) {
    return;
  }

H
Haojun Liao 已提交
4096
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pQuery))) {
B
Bomin Zhang 已提交
4097 4098
    return;
  }
4099 4100

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4101 4102 4103 4104
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4105
  };
weixin_48148422's avatar
weixin_48148422 已提交
4106

B
Bomin Zhang 已提交
4107
  if (!isSTableQuery
4108
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4109
    && (cond.order == TSDB_ORDER_ASC) 
H
Haojun Liao 已提交
4110
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4111 4112 4113
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
    && (!isFixedOutputQuery(pQuery))
  ) {
H
Haojun Liao 已提交
4114
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4115 4116
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4117
  }
B
Bomin Zhang 已提交
4118

H
Haojun Liao 已提交
4119
  if (isFirstLastRowQuery(pQuery)) {
4120
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4121
  } else if (isPointInterpoQuery(pQuery)) {
4122
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4123
  } else {
4124
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4125
  }
B
Bomin Zhang 已提交
4126 4127
}

4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4141
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4142 4143 4144 4145 4146 4147 4148
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4149
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4150 4151
  int32_t code = TSDB_CODE_SUCCESS;
  
4152 4153 4154
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4155
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
4156 4157

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4158
  changeExecuteScanOrder(pQInfo, false);
B
Bomin Zhang 已提交
4159
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4160
  
4161
  pQInfo->tsdb = tsdb;
4162
  pQInfo->vgId = vgId;
4163 4164

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4165
  pRuntimeEnv->pTSBuf = pTsBuf;
4166
  pRuntimeEnv->cur.vgroupIndex = -1;
4167
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4168
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4169
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4170

H
Haojun Liao 已提交
4171
  if (pTsBuf != NULL) {
4172 4173 4174 4175 4176 4177 4178 4179 4180 4181
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4182
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4183 4184 4185

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4186
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4187 4188 4189 4190 4191 4192 4193
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

H
Haojun Liao 已提交
4194
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4195 4196 4197 4198 4199 4200 4201 4202
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
    }

H
Haojun Liao 已提交
4203
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4204
    int32_t rows = getInitialPageNum(pQInfo);
4205
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4206 4207 4208 4209 4210
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4211
    if (pRuntimeEnv->groupbyNormalCol) {
4212 4213 4214 4215 4216 4217 4218 4219
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

4220
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4221
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4222 4223 4224 4225 4226 4227 4228
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4229 4230
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4231
  }
4232

4233 4234
  // todo refactor
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
H
Haojun Liao 已提交
4235
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4236

H
Haojun Liao 已提交
4237
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4238
  return TSDB_CODE_SUCCESS;
4239 4240
}

4241
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4242
  SQuery *pQuery = pRuntimeEnv->pQuery;
4243

4244
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4245 4246 4247 4248 4249 4250 4251
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4252
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4253
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4254 4255
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4256
  
H
hjxilinx 已提交
4257
  int64_t st = taosGetTimestampMs();
4258

4259
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4260
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4261

4262
  while (tsdbNextDataBlock(pQueryHandle)) {
4263
    summary->totalBlocks += 1;
4264
    if (isQueryKilled(pQInfo)) {
4265 4266
      break;
    }
4267

H
Haojun Liao 已提交
4268
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4269 4270 4271 4272
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4273

H
Haojun Liao 已提交
4274
    assert(*pTableQueryInfo != NULL);
H
Haojun Liao 已提交
4275
    SET_CURRENT_QUERY_TABLE_INFO(pRuntimeEnv, *pTableQueryInfo);
4276

4277
    SDataStatis *pStatis = NULL;
H
hjxilinx 已提交
4278
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
4279

H
Haojun Liao 已提交
4280
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4281
      if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4282
        int32_t step = QUERY_IS_ASC_QUERY(pQuery)? 1:-1;
H
Haojun Liao 已提交
4283
        setExecutionContext(pQInfo, (*pTableQueryInfo)->groupIndex, blockInfo.window.ekey + step);
4284 4285 4286
      } else {  // interval query
        TSKEY nextKey = blockInfo.window.skey;
        setIntervalQueryRange(pQInfo, nextKey);
H
Haojun Liao 已提交
4287 4288 4289 4290

        if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
          setAdditionalInfo(pQInfo, (*pTableQueryInfo)->pTable, *pTableQueryInfo);
        }
4291
      }
4292
    }
4293

4294 4295 4296
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4297
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4298
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4299
  }
4300

H
hjxilinx 已提交
4301 4302
  int64_t et = taosGetTimestampMs();
  return et - st;
4303 4304
}

4305 4306
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4307
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4308

4309
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4310
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4311
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4312

4313
  setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
4314

H
Haojun Liao 已提交
4315
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4316
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4317
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4318

4319
  STsdbQueryCond cond = {
4320
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4321 4322
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4323
      .numOfCols = pQuery->numOfCols,
4324
  };
4325

H
hjxilinx 已提交
4326
  // todo refactor
4327
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
4328
  SArray *tx = taosArrayInit(1, POINTER_BYTES);
4329

4330
  taosArrayPush(tx, &pCheckInfo->pTable);
4331
  taosArrayPush(g1, &tx);
4332
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4333

4334
  // include only current table
4335 4336 4337 4338
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4339

H
Haojun Liao 已提交
4340
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4341 4342
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4343

4344
  if (pRuntimeEnv->pTSBuf != NULL) {
4345
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4346 4347
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4348

4349 4350 4351 4352 4353 4354 4355 4356
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4357

4358
  initCtxOutputBuf(pRuntimeEnv);
4359 4360 4361 4362 4363 4364 4365 4366 4367 4368
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4369
static void sequentialTableProcess(SQInfo *pQInfo) {
4370
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4371
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4372
  setQueryStatus(pQuery, QUERY_COMPLETED);
4373

H
Haojun Liao 已提交
4374
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4375

H
Haojun Liao 已提交
4376
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4377 4378
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4379

4380
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4381
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4382

4383
      qDebug("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4384
             numOfGroups, group);
H
Haojun Liao 已提交
4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4405
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4406
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4407
      } else {
H
Haojun Liao 已提交
4408
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4409
      }
H
Haojun Liao 已提交
4410 4411
      
      initCtxOutputBuf(pRuntimeEnv);
4412
      
4413
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4414 4415
      assert(taosArrayGetSize(s) >= 1);
      
4416
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4417 4418 4419
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4420

dengyihao's avatar
dengyihao 已提交
4421
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4422

H
Haojun Liao 已提交
4423
      // here we simply set the first table as current table
4424 4425 4426
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4427
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4440 4441 4442 4443 4444 4445

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4446
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4447
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4448
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4449

4450
      qDebug("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);

4473
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4474 4475
      assert(taosArrayGetSize(s) >= 1);

4476
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4477 4478 4479 4480 4481 4482 4483 4484

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4485
      taosArrayDestroy(s); 
4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4500
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
      copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4515 4516 4517
    }
  } else {
    /*
4518
     * 1. super table projection query, 2. ts-comp query
4519 4520 4521
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4522
    if (pQInfo->groupIndex > 0) {
4523
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4524
      pQuery->rec.total += pQuery->rec.rows;
4525

4526
      if (pQuery->rec.rows > 0) {
4527 4528 4529
        return;
      }
    }
4530

4531
    // all data have returned already
4532
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4533 4534
      return;
    }
4535

4536 4537
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4538

H
Haojun Liao 已提交
4539
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4540 4541
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4542

4543
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
4544
      if (isQueryKilled(pQInfo)) {
4545 4546
        return;
      }
4547

4548
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4549
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4550
        pQInfo->tableIndex++;
4551 4552
        continue;
      }
4553

H
hjxilinx 已提交
4554
      // TODO handle the limit offset problem
4555
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4556
        //        skipBlocks(pRuntimeEnv);
4557 4558
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4559 4560 4561
          continue;
        }
      }
4562

4563
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4564
      skipResults(pRuntimeEnv);
4565

4566
      // the limitation of output result is reached, set the query completed
4567
      if (limitResults(pRuntimeEnv)) {
4568
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4569 4570
        break;
      }
4571

4572 4573
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4574

4575
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4576 4577 4578 4579 4580 4581
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4582
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4583

H
Haojun Liao 已提交
4584
        STableIdInfo tidInfo = {0};
4585

H
Haojun Liao 已提交
4586 4587 4588
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4589
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4590 4591
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4592
        // if the buffer is full or group by each table, we need to jump out of the loop
4593 4594
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4595 4596
          break;
        }
4597

4598
      } else {
4599
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4600 4601
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4602 4603
          continue;
        } else {
4604 4605 4606
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4607 4608 4609
        }
      }
    }
H
Haojun Liao 已提交
4610

4611
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4612 4613
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4614
  }
4615

4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4628
    finalizeQueryResult(pRuntimeEnv);
4629
  }
4630

4631 4632 4633
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4634

4635
  qDebug(
B
Bomin Zhang 已提交
4636
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4637
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4638
      pQuery->limit.offset);
4639 4640
}

4641 4642 4643 4644
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4645 4646 4647 4648
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4649
  if (pRuntimeEnv->pTSBuf != NULL) {
4650
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4651
  }
4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4664 4665

  pRuntimeEnv->prevGroupId = INT32_MIN;
4666
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4667 4668 4669 4670
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4671 4672
}

4673 4674 4675 4676
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4677
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4678

4679
  if (pRuntimeEnv->pTSBuf != NULL) {
4680
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4681
  }
4682

4683
  switchCtxOrder(pRuntimeEnv);
4684 4685 4686
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4687 4688 4689
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4690
//  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
4691

H
Haojun Liao 已提交
4692
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4693
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4694
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4695
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4696

4697
      size_t num = taosArrayGetSize(group);
4698
      for (int32_t j = 0; j < num; ++j) {
4699 4700
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
H
Haojun Liao 已提交
4701
//        removeRedundantWindow(&item->windowResInfo, item->lastKey - step, step);
4702
      }
H
hjxilinx 已提交
4703 4704 4705 4706 4707 4708 4709
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4710 4711 4712
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4713
  if (pQInfo->groupIndex > 0) {
4714
    /*
4715
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4716 4717
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
4718
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4719 4720
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4721
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4722 4723 4724 4725
#endif
    } else {
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
    }
4726

4727
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4728 4729
    return;
  }
4730

4731
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4732 4733
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4734
  // do check all qualified data blocks
H
Haojun Liao 已提交
4735
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4736
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4737

H
hjxilinx 已提交
4738 4739
  // query error occurred or query is killed, abort current execution
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
4740
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4741
    return;
4742
  }
4743

H
hjxilinx 已提交
4744 4745
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4746

H
hjxilinx 已提交
4747 4748
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4749

H
Haojun Liao 已提交
4750
    el = scanMultiTableDataBlocks(pQInfo);
4751
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4752

H
Haojun Liao 已提交
4753
//    doCloseAllTimeWindowAfterScan(pQInfo);
H
Haojun Liao 已提交
4754
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
4755
  } else {
4756
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4757
  }
4758

4759
  setQueryStatus(pQuery, QUERY_COMPLETED);
4760

H
hjxilinx 已提交
4761
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
4762
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4763 4764
    return;
  }
4765

H
Haojun Liao 已提交
4766
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4767
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4768
      copyResToQueryResultBuf(pQInfo, pQuery);
4769 4770

#ifdef _DEBUG_VIEW
4771
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4772 4773 4774 4775 4776
#endif
    }
  } else {  // not a interval query
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
  }
4777

4778
  // handle the limitation of output buffer
4779
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4780 4781 4782 4783 4784 4785 4786 4787
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4788
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4789
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4790 4791
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4792 4793 4794 4795
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4796 4797
  pQuery->current = pTableInfo;  // set current query table info
  
4798
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4799
  finalizeQueryResult(pRuntimeEnv);
4800

4801
  if (isQueryKilled(pQInfo)) {
4802 4803
    return;
  }
4804

H
Haojun Liao 已提交
4805
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4806
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4807

4808
  skipResults(pRuntimeEnv);
4809
  limitResults(pRuntimeEnv);
4810 4811
}

H
hjxilinx 已提交
4812
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4813
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4814 4815 4816 4817
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4818 4819 4820 4821
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4822

4823 4824 4825 4826 4827 4828
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4829 4830

  while (1) {
4831
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4832
    finalizeQueryResult(pRuntimeEnv);
4833

4834
    if (isQueryKilled(pQInfo)) {
4835 4836 4837
      return;
    }

4838 4839
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4840
      skipResults(pRuntimeEnv);
4841 4842 4843
    }

    /*
H
hjxilinx 已提交
4844 4845
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4846
     */
4847
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4848 4849 4850
      break;
    }

4851
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
4852
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4853 4854 4855 4856

    resetCtxOutputBuf(pRuntimeEnv);
  }

4857
  limitResults(pRuntimeEnv);
4858
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4859
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
4860
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4861 4862
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
H
Haojun Liao 已提交
4863
    STableId* id = TSDB_TABLEID(pQuery->current);
4864

H
Haojun Liao 已提交
4865 4866
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4867 4868
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4869 4870
  }

4871 4872 4873
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4874 4875
}

H
Haojun Liao 已提交
4876
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4877
  SQuery *pQuery = pRuntimeEnv->pQuery;
4878

4879
  while (1) {
4880
    scanOneTableDataBlocks(pRuntimeEnv, start);
4881

4882
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
4883 4884
      return;
    }
4885

4886
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4887
    finalizeQueryResult(pRuntimeEnv);
4888

4889 4890 4891
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4892
        pQuery->fillType == TSDB_FILL_NONE) {
4893 4894
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4895

4896 4897 4898 4899
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4900

4901
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4902 4903 4904 4905 4906
      break;
    }
  }
}

4907
// handle time interval query on table
H
hjxilinx 已提交
4908
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4909 4910
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4911 4912
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4913

H
Haojun Liao 已提交
4914
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
4915 4916
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4917
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4918
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4919
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4920 4921 4922 4923
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4924
  while (1) {
H
Haojun Liao 已提交
4925
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4926

H
Haojun Liao 已提交
4927
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4928
      pQInfo->groupIndex = 0;  // always start from 0
4929
      pQuery->rec.rows = 0;
4930
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4931

4932
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4933
    }
4934

4935
    // the offset is handled at prepare stage if no interpolation involved
4936
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4937
      limitResults(pRuntimeEnv);
4938 4939
      break;
    } else {
H
Haojun Liao 已提交
4940
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4941
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
4942
      numOfFilled = 0;
4943
      
H
Haojun Liao 已提交
4944
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
4945
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4946
        limitResults(pRuntimeEnv);
4947 4948
        break;
      }
4949

4950
      // no result generated yet, continue retrieve data
4951
      pQuery->rec.rows = 0;
4952 4953
    }
  }
4954

4955
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
4956
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
4957
    pQInfo->groupIndex = 0;
4958
    pQuery->rec.rows = 0;
4959
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4960
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4961
  }
4962

H
Haojun Liao 已提交
4963
  pQInfo->pointsInterpo += numOfFilled;
4964 4965
}

4966 4967 4968 4969
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4970
  if (queryHasRemainResults(pRuntimeEnv)) {
4971

H
Haojun Liao 已提交
4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
4984
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
4985
      return;
H
Haojun Liao 已提交
4986
    } else {
4987
      pQuery->rec.rows = 0;
4988
      pQInfo->groupIndex = 0;  // always start from 0
4989

4990 4991
      if (pRuntimeEnv->windowResInfo.size > 0) {
        copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4992
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4993

4994
        if (pQuery->rec.rows > 0) {
4995
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
4996 4997 4998

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
4999
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5000 5001
          }

5002 5003 5004 5005 5006
          return;
        }
      }
    }
  }
5007

H
hjxilinx 已提交
5008
  // number of points returned during this query
5009
  pQuery->rec.rows = 0;
5010
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
5011
  
5012
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5013
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5014
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
5015
  
5016
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5017
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5018
    tableIntervalProcess(pQInfo, item);
5019
  } else if (isFixedOutputQuery(pQuery)) {
5020
    tableFixedOutputProcess(pQInfo, item);
5021 5022
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5023
    tableMultiOutputProcess(pQInfo, item);
5024
  }
5025

5026
  // record the total elapsed time
5027
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5028
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5029 5030
}

5031
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5032 5033
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5034
  pQuery->rec.rows = 0;
5035

5036
  int64_t st = taosGetTimestampUs();
5037

H
Haojun Liao 已提交
5038 5039
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
      (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5040
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5041
    multiTableQueryProcess(pQInfo);
5042
  } else {
5043
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5044
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5045

5046
    sequentialTableProcess(pQInfo);
5047
  }
5048

H
hjxilinx 已提交
5049
  // record the total elapsed time
5050
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5051 5052
}

5053
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5054
  int32_t j = 0;
5055

5056
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5057 5058 5059 5060
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5061 5062 5063 5064
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5065

5066 5067
      j += 1;
    }
5068

5069 5070 5071 5072 5073
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5074

5075
      j += 1;
5076 5077 5078
    }
  }

5079
  assert(0);
5080 5081
}

5082 5083 5084
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5085 5086
}

5087
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5088
  if (pQueryMsg->intervalTime < 0) {
5089
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5090
    return false;
5091 5092
  }

H
hjxilinx 已提交
5093
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5094
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5095
    return false;
5096 5097
  }

H
hjxilinx 已提交
5098
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5099
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5100
    return false;
5101 5102
  }

5103 5104
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5105
    return false;
5106 5107
  }

5108 5109 5110 5111 5112 5113 5114 5115 5116 5117
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5118 5119 5120 5121 5122
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5123
        continue;
5124
      }
5125

5126
      return false;
5127 5128
    }
  }
5129

5130
  return true;
5131 5132
}

5133
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5134
  assert(pQueryMsg->numOfTables > 0);
5135

weixin_48148422's avatar
weixin_48148422 已提交
5136
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5137

weixin_48148422's avatar
weixin_48148422 已提交
5138 5139
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5140

5141
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5142 5143
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5144

H
hjxilinx 已提交
5145 5146 5147
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5148

H
hjxilinx 已提交
5149 5150
  return pMsg;
}
5151

5152
/**
H
hjxilinx 已提交
5153
 * pQueryMsg->head has been converted before this function is called.
5154
 *
H
hjxilinx 已提交
5155
 * @param pQueryMsg
5156 5157 5158 5159
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5160
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5161
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5162 5163
  int32_t code = TSDB_CODE_SUCCESS;

5164 5165 5166 5167 5168 5169 5170 5171
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5172

5173 5174
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5175
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5176
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5177 5178

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5179
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5180
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5181 5182 5183
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5184
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5185
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5186
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5187

5188
  // query msg safety check
5189
  if (!validateQueryMsg(pQueryMsg)) {
5190 5191
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5192 5193
  }

H
hjxilinx 已提交
5194 5195
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5196 5197
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5198
    pColInfo->colId = htons(pColInfo->colId);
5199
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5200 5201
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5202

H
hjxilinx 已提交
5203
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5204

H
hjxilinx 已提交
5205
    int32_t numOfFilters = pColInfo->numOfFilters;
5206
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5207
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5208 5209 5210
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5211 5212 5213 5214
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5215 5216 5217

      pMsg += sizeof(SColumnFilterInfo);

5218 5219
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5220

5221
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5222 5223
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5224
      } else {
5225 5226
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5227 5228
      }

5229 5230
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5231 5232 5233
    }
  }

5234 5235
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5236

5237
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5238
    (*pExpr)[i] = pExprMsg;
5239

5240
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5241 5242 5243 5244
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5245

5246
    pMsg += sizeof(SSqlFuncMsg);
5247 5248

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5249
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5250 5251 5252 5253
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5254
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5255 5256 5257 5258 5259
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5260 5261
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5262
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5263 5264
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5265 5266
      }
    } else {
5267
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5268
//        return TSDB_CODE_QRY_INVALID_MSG;
5269
//      }
5270 5271
    }

5272
    pExprMsg = (SSqlFuncMsg *)pMsg;
5273
  }
5274

5275
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5276
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5277
    goto _cleanup;
5278
  }
5279

H
hjxilinx 已提交
5280
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5281

H
hjxilinx 已提交
5282
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5283
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5284 5285 5286 5287
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5288 5289 5290

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5291
      pMsg += sizeof((*groupbyCols)[i].colId);
5292 5293

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5294 5295
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5296
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5297 5298 5299 5300 5301
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5302

H
hjxilinx 已提交
5303 5304
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5305 5306
  }

5307 5308
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5309
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5310 5311

    int64_t *v = (int64_t *)pMsg;
5312
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5313 5314
      v[i] = htobe64(v[i]);
    }
5315

5316
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5317
  }
5318

5319 5320 5321 5322
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5323

5324 5325 5326 5327
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5328

5329
      (*tagCols)[i] = *pTagCol;
5330
      pMsg += sizeof(SColumnInfo);
5331
    }
H
hjxilinx 已提交
5332
  }
5333

5334 5335 5336 5337 5338 5339
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5340

weixin_48148422's avatar
weixin_48148422 已提交
5341
  if (*pMsg != 0) {
5342
    size_t len = strlen(pMsg) + 1;
5343

5344
    *tbnameCond = malloc(len);
5345 5346 5347 5348 5349
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5350
    strcpy(*tbnameCond, pMsg);
5351
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5352
  }
5353

5354
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5355 5356
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5357
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5358
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5359 5360

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5361 5362 5363 5364 5365 5366 5367 5368 5369

_cleanup:
  tfree(*pExpr);
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
5370 5371

  return code;
5372 5373
}

H
hjxilinx 已提交
5374
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5375
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5376 5377 5378 5379 5380 5381 5382 5383 5384

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5385
  if (pExprNode == NULL) {
5386
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5387
    return TSDB_CODE_QRY_APP_ERROR;
5388
  }
5389

5390
  pArithExprInfo->pExpr = pExprNode;
5391 5392 5393
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5394
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5395 5396
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5397
  int32_t code = TSDB_CODE_SUCCESS;
5398

H
Haojun Liao 已提交
5399
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5400
  if (pExprs == NULL) {
5401
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5402 5403 5404 5405 5406
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5407
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5408
    pExprs[i].base = *pExprMsg[i];
5409
    pExprs[i].bytes = 0;
5410 5411 5412 5413

    int16_t type = 0;
    int16_t bytes = 0;

5414
    // parse the arithmetic expression
5415
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5416
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5417

5418 5419 5420
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5421 5422
      }

5423
      type  = TSDB_DATA_TYPE_DOUBLE;
5424
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5425
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5426 5427 5428
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5429
    } else{
5430
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5431
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5432

dengyihao's avatar
dengyihao 已提交
5433
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5434 5435 5436 5437
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5438
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5439

H
Haojun Liao 已提交
5440 5441 5442
        type  = s.type;
        bytes = s.bytes;
      }
5443 5444
    }

5445 5446
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5447
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5448
      tfree(pExprs);
5449
      return TSDB_CODE_QRY_INVALID_MSG;
5450 5451
    }

5452
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5453
      tagLen += pExprs[i].bytes;
5454
    }
5455
    assert(isValidDataType(pExprs[i].type));
5456 5457 5458
  }

  // TODO refactor
5459
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5460 5461
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5462

5463
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5464
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5465 5466 5467 5468 5469
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5470
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5471
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5472 5473 5474
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5475
  *pExprInfo = pExprs;
5476 5477 5478 5479

  return TSDB_CODE_SUCCESS;
}

5480
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5481 5482 5483 5484 5485
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5486
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5487
  if (pGroupbyExpr == NULL) {
5488
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5489 5490 5491 5492 5493 5494 5495
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5496 5497 5498 5499
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5500

5501 5502 5503
  return pGroupbyExpr;
}

5504
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5505
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5506
    if (pQuery->colList[i].numOfFilters > 0) {
5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5518
    if (pQuery->colList[i].numOfFilters > 0) {
5519 5520
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5521
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5522
      pFilterInfo->info = pQuery->colList[i];
5523

5524
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5525 5526 5527 5528
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5529
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5530 5531 5532 5533 5534

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5535
          qError("QInfo:%p invalid filter info", pQInfo);
5536
          return TSDB_CODE_QRY_INVALID_MSG;
5537 5538
        }

5539 5540
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5541

5542 5543 5544
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5545 5546

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5547
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5548
          return TSDB_CODE_QRY_INVALID_MSG;
5549 5550
        }

5551
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5552
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5553
          assert(rangeFilterArray != NULL);
5554
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5568
          assert(filterArray != NULL);
5569 5570 5571 5572
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5573
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5574
              return TSDB_CODE_QRY_INVALID_MSG;
5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5591
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5592
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5593

5594
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5595
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5596
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5597 5598
      continue;
    }
5599

5600
    // todo opt performance
H
Haojun Liao 已提交
5601 5602
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5603 5604
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5605 5606
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5607 5608 5609
          break;
        }
      }
5610 5611
      
      assert (f < pQuery->numOfCols);
5612
    } else {
5613 5614
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5615 5616
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5617 5618
          break;
        }
5619
      }
5620 5621
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5622 5623 5624 5625
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5626

5627
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5628 5629 5630 5631 5632 5633 5634
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5635 5636
static void freeQInfo(SQInfo *pQInfo);

weixin_48148422's avatar
weixin_48148422 已提交
5637
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5638
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
5639 5640
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5641
    return NULL;
5642 5643 5644 5645 5646 5647
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5648
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5649

5650
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5651
  pQuery->numOfOutput     = numOfOutput;
5652 5653 5654
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5655
  pQuery->order.orderColId = pQueryMsg->orderColId;
5656 5657 5658 5659
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5660
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5661
  pQuery->fillType        = pQueryMsg->fillType;
5662
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5663
  
5664
  // todo do not allocate ??
5665
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5666
  if (pQuery->colList == NULL) {
5667
    goto _cleanup;
5668
  }
5669

H
hjxilinx 已提交
5670
  for (int16_t i = 0; i < numOfCols; ++i) {
5671
    pQuery->colList[i] = pQueryMsg->colList[i];
5672
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5673
  }
5674

5675
  pQuery->tagColList = pTagCols;
5676

5677
  // calculate the result row size
5678 5679 5680
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5681
  }
5682

5683
  doUpdateExprColumnIndex(pQuery);
5684

5685
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5686
  if (ret != TSDB_CODE_SUCCESS) {
5687
    goto _cleanup;
5688 5689 5690
  }

  // prepare the result buffer
5691
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5692
  if (pQuery->sdata == NULL) {
5693
    goto _cleanup;
5694 5695
  }

H
hjxilinx 已提交
5696
  // set the output buffer capacity
H
hjxilinx 已提交
5697
  pQuery->rec.capacity = 4096;
5698
  pQuery->rec.threshold = 4000;
5699

5700
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5701
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5702 5703

    // allocate additional memory for interResults that are usually larger then final results
5704 5705
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5706
    if (pQuery->sdata[col] == NULL) {
5707
      goto _cleanup;
5708 5709 5710
    }
  }

5711
  if (pQuery->fillType != TSDB_FILL_NONE) {
5712 5713
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5714
      goto _cleanup;
5715 5716 5717
    }

    // the first column is the timestamp
5718
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5719 5720 5721
  }

  // to make sure third party won't overwrite this structure
5722
  pQInfo->signature = pQInfo;
5723

5724
  pQInfo->tableGroupInfo = *pTableGroupInfo;
dengyihao's avatar
dengyihao 已提交
5725 5726 5727 5728 5729 5730
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5731 5732 5733
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
  }
5734

weixin_48148422's avatar
weixin_48148422 已提交
5735 5736
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5737
  taosArraySort(pTableIdList, compareTableIdInfo);
5738

H
Haojun Liao 已提交
5739 5740 5741 5742
  // TODO optimize the STableQueryInfo malloc strategy
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5743
  for(int32_t i = 0; i < numOfGroups; ++i) {
5744
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5745

H
Haojun Liao 已提交
5746
    size_t s = taosArrayGetSize(pa);
5747
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
5748

H
hjxilinx 已提交
5749
    for(int32_t j = 0; j < s; ++j) {
5750
      void* pTable = taosArrayGetP(pa, j);
H
Haojun Liao 已提交
5751
      STableId* id = TSDB_TABLEID(pTable);
5752

H
Haojun Liao 已提交
5753
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5754 5755 5756
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5757
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5758
      }
5759

H
Haojun Liao 已提交
5760 5761
      void* buf = pQInfo->pBuf + index * sizeof(STableQueryInfo);
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, pTable, window, buf);
5762
      item->groupIndex = i;
H
hjxilinx 已提交
5763
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5764 5765
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5766
    }
5767

5768
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5769
  }
5770

weixin_48148422's avatar
weixin_48148422 已提交
5771 5772
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5773
  pQuery->pos = -1;
5774
  pQuery->window = pQueryMsg->window;
5775

5776
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
5777 5778
    int32_t code = TAOS_SYSTEM_ERROR(errno);
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
5779
    goto _cleanup;
5780
  }
5781

5782
  colIdCheck(pQuery);
5783

5784
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5785 5786
  return pQInfo;

5787
_cleanup:
dengyihao's avatar
dengyihao 已提交
5788
  freeQInfo(pQInfo);
5789 5790 5791
  return NULL;
}

H
hjxilinx 已提交
5792
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5793 5794 5795 5796
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5797

H
hjxilinx 已提交
5798 5799 5800 5801
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5802
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5803 5804 5805
  return (sig == (uint64_t)pQInfo);
}

H
Haojun Liao 已提交
5806
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param, _qinfo_free_fn_t fn) {
H
hjxilinx 已提交
5807
  int32_t code = TSDB_CODE_SUCCESS;
5808
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5809

H
hjxilinx 已提交
5810 5811
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5812
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5813
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5814

H
hjxilinx 已提交
5815
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
5816 5817
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
5818
  }
5819

5820 5821
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
5822
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5823
           pQuery->window.ekey, pQuery->order.order);
5824
    setQueryStatus(pQuery, QUERY_COMPLETED);
5825

5826 5827 5828
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5829

5830 5831 5832
  pQInfo->param = param;
  pQInfo->freeFn = fn;

5833
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
5834
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
5835 5836 5837 5838 5839
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5840 5841

  // filter the qualified
5842
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5843 5844
    goto _error;
  }
H
hjxilinx 已提交
5845
  
H
hjxilinx 已提交
5846 5847 5848 5849
  return code;

_error:
  // table query ref will be decrease during error handling
5850
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5851 5852 5853 5854 5855 5856 5857
  return code;
}

static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5858 5859

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5860
  setQueryKilled(pQInfo);
5861

5862
  qDebug("QInfo:%p start to free QInfo", pQInfo);
5863
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5864 5865
    tfree(pQuery->sdata[col]);
  }
5866

H
hjxilinx 已提交
5867
  sem_destroy(&(pQInfo->dataReady));
5868
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5869

H
hjxilinx 已提交
5870 5871 5872 5873 5874 5875
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5876

H
hjxilinx 已提交
5877
  if (pQuery->pSelectExpr != NULL) {
5878
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5879
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5880

H
hjxilinx 已提交
5881 5882 5883
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5884
    }
5885

H
hjxilinx 已提交
5886 5887
    tfree(pQuery->pSelectExpr);
  }
5888

5889 5890
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5891
  }
5892

5893
  // todo refactor, extract method to destroytableDataInfo
H
Haojun Liao 已提交
5894
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5895
  for (int32_t i = 0; i < numOfGroups; ++i) {
5896
    SArray *p = GET_TABLEGROUP(pQInfo, i);
5897

5898 5899
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
5900 5901 5902
      STableQueryInfo* item = taosArrayGetP(p, j);
      if (item != NULL) {
        destroyTableQueryInfo(item, pQuery->numOfOutput);
5903 5904
      }
    }
5905

H
hjxilinx 已提交
5906 5907
    taosArrayDestroy(p);
  }
5908

H
Haojun Liao 已提交
5909
  tfree(pQInfo->pBuf);
5910
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
5911
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
5912
  tsdbDestoryTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5913
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5914
  
5915 5916 5917 5918
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5919

5920 5921 5922 5923
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);
  tfree(pQuery->sdata);
5924

5925
  tfree(pQuery);
5926

5927
  qDebug("QInfo:%p QInfo is freed", pQInfo);
5928

5929
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5930 5931 5932 5933
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5934
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5935 5936
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5948
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5949 5950 5951 5952
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5953
  }
H
hjxilinx 已提交
5954
}
5955

H
hjxilinx 已提交
5956 5957 5958
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5959

H
hjxilinx 已提交
5960 5961 5962
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5963

H
hjxilinx 已提交
5964 5965
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
5966 5967
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
5968
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
5969
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
5970 5971
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
5972 5973
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
5974
      }
H
Haojun Liao 已提交
5975

H
hjxilinx 已提交
5976 5977 5978
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
5979
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
5980
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5981
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
5982 5983 5984
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
5985
    }
5986

H
hjxilinx 已提交
5987 5988 5989 5990
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5991
  } else {
5992
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5993
  }
5994

5995
  pQuery->rec.total += pQuery->rec.rows;
5996
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5997

5998
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
5999
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6000 6001 6002
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
6003
  return TSDB_CODE_SUCCESS;
6004 6005
}

6006 6007 6008 6009 6010 6011 6012
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

H
Haojun Liao 已提交
6013 6014
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, _qinfo_free_fn_t fn,
    qinfo_t* pQInfo) {
6015
  assert(pQueryMsg != NULL && tsdb != NULL);
6016 6017

  int32_t code = TSDB_CODE_SUCCESS;
6018

weixin_48148422's avatar
weixin_48148422 已提交
6019
  char *        tagCond = NULL, *tbnameCond = NULL;
6020
  SArray *      pTableIdList = NULL;
6021
  SSqlFuncMsg **pExprMsg = NULL;
6022 6023
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
dengyihao's avatar
dengyihao 已提交
6024 6025
  SExprInfo     *pExprs = NULL;
  SSqlGroupbyExpr *pGroupbyExpr = NULL;
6026

weixin_48148422's avatar
weixin_48148422 已提交
6027
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
6028
         TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6029
    goto _over;
6030 6031
  }

H
hjxilinx 已提交
6032
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6033
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6034
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6035
    goto _over;
6036 6037
  }

H
hjxilinx 已提交
6038
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6039
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6040
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6041
    goto _over;
6042 6043
  }

H
Haojun Liao 已提交
6044
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6045
    goto _over;
6046 6047
  }

dengyihao's avatar
dengyihao 已提交
6048
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6049
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6050
    goto _over;
6051
  }
6052

H
hjxilinx 已提交
6053
  bool isSTableQuery = false;
6054
  STableGroupInfo tableGroupInfo = {0};
6055
  
H
Haojun Liao 已提交
6056
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6057
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6058

6059
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
6060
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6061
      goto _over;
6062
    }
H
Haojun Liao 已提交
6063
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6064
    isSTableQuery = true;
H
Haojun Liao 已提交
6065 6066 6067 6068
    // TODO: need a macro from TSDB to check if table is super table

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6069 6070 6071 6072 6073 6074 6075 6076
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
6077
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
6078
                                          numOfGroupByCols);
6079 6080 6081
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6082
    } else {
6083 6084 6085 6086
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6087

6088
      qDebug("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6089
    }
H
hjxilinx 已提交
6090
  } else {
6091
    assert(0);
6092
  }
6093

6094
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6095 6096 6097 6098
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6099
  if ((*pQInfo) == NULL) {
6100
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6101
    goto _over;
6102
  }
6103

H
Haojun Liao 已提交
6104
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param, fn);
6105

H
hjxilinx 已提交
6106
_over:
dengyihao's avatar
dengyihao 已提交
6107 6108 6109
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6110 6111
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6112
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6113
  } 
dengyihao's avatar
dengyihao 已提交
6114 6115
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6116
  free(pExprMsg);
H
hjxilinx 已提交
6117
  taosArrayDestroy(pTableIdList);
6118

H
Haojun Liao 已提交
6119
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6120 6121
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
H
Haojun Liao 已提交
6122 6123 6124 6125 6126
  } else {
    SQInfo* pq = (SQInfo*) (*pQInfo);

    T_REF_INC(pq);
    T_REF_INC(pq);
6127 6128
  }

6129
  // if failed to add ref for all meters in this query, abort current query
6130
  return code;
H
hjxilinx 已提交
6131 6132
}

H
Haojun Liao 已提交
6133 6134
static void doDestoryQueryInfo(SQInfo* pQInfo) {
  assert(pQInfo != NULL);
6135
  qDebug("QInfo:%p query completed", pQInfo);
H
Haojun Liao 已提交
6136
  queryCostStatis(pQInfo);   // print the query cost summary
6137 6138 6139
  freeQInfo(pQInfo);
}

H
Haojun Liao 已提交
6140
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6141 6142 6143 6144 6145
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

dengyihao's avatar
dengyihao 已提交
6146
  int32_t ref = T_REF_DEC(pQInfo);
6147
  qDebug("QInfo:%p dec refCount, value:%d", pQInfo, ref);
H
Haojun Liao 已提交
6148

H
Haojun Liao 已提交
6149
  if (ref == 0) {
6150
    _qinfo_free_fn_t freeFp = pQInfo->freeFn;
H
Hongze Cheng 已提交
6151
    void* param = pQInfo->param;
H
Haojun Liao 已提交
6152 6153

    doDestoryQueryInfo(pQInfo);
6154
    if (freeFp != NULL) {
H
Hongze Cheng 已提交
6155
      assert(param != NULL);
6156
      freeFp(param);
H
Hongze Cheng 已提交
6157 6158
    }

H
Haojun Liao 已提交
6159 6160 6161
  }
}

6162
void qTableQuery(qinfo_t qinfo) {
6163 6164
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6165
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
6166
    qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
6167 6168
    return;
  }
6169

H
hjxilinx 已提交
6170
  if (isQueryKilled(pQInfo)) {
6171
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6172 6173

    sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
6174
    qDestroyQueryInfo(pQInfo);
H
hjxilinx 已提交
6175 6176
    return;
  }
6177

6178 6179
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6180 6181 6182 6183 6184 6185

    sem_post(&pQInfo->dataReady);
    qDestroyQueryInfo(pQInfo);
    return;
  }

H
Haojun Liao 已提交
6186 6187
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);

6188 6189 6190 6191 6192 6193 6194
  // error occurs, record the error code and return to client
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
    qDebug("QInfo:%p query abort due to error occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
    sem_post(&pQInfo->dataReady);
    qDestroyQueryInfo(pQInfo);

6195 6196 6197
    return;
  }

6198
  qDebug("QInfo:%p query task is launched", pQInfo);
6199

6200
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6201
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6202
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
6203
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
6204
  } else if (pQInfo->runtimeEnv.stableQuery) {
6205
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6206
  } else {
6207
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6208
  }
6209

6210 6211 6212 6213 6214 6215 6216 6217 6218 6219
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (isQueryKilled(pQInfo)) {
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
    qDebug("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

H
hjxilinx 已提交
6220
  sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
6221
  qDestroyQueryInfo(pQInfo);
H
hjxilinx 已提交
6222 6223
}

H
hjxilinx 已提交
6224
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
6225 6226
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6227
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6228
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6229
  }
6230

H
hjxilinx 已提交
6231
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6232
  if (isQueryKilled(pQInfo)) {
6233
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6234
    return pQInfo->code;
H
hjxilinx 已提交
6235
  }
6236

H
hjxilinx 已提交
6237
  sem_wait(&pQInfo->dataReady);
6238
  qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6239 6240
         pQInfo->code);

H
hjxilinx 已提交
6241
  return pQInfo->code;
H
hjxilinx 已提交
6242
}
6243

H
hjxilinx 已提交
6244
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6245 6246
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6247
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
6248
    qDebug("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6249 6250
    return false;
  }
6251 6252

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6253
  bool ret = false;
H
hjxilinx 已提交
6254
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6255
    ret = false;
H
hjxilinx 已提交
6256
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6257
    ret = true;
H
hjxilinx 已提交
6258
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6259
    ret = true;
H
hjxilinx 已提交
6260 6261
  } else {
    assert(0);
6262
  }
H
Haojun Liao 已提交
6263 6264 6265

  if (ret) {
    T_REF_INC(pQInfo);
6266
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6267 6268 6269
  }

  return ret;
6270 6271
}

6272 6273 6274
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6275
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6276
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6277
  }
6278

6279
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6280 6281
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6282 6283
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6284
  *contLen = size + sizeof(SRetrieveTableRsp);
6285

6286 6287
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6288
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6289

6290 6291 6292
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6293
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6294 6295 6296 6297
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6298 6299
  
  (*pRsp)->precision = htons(pQuery->precision);
6300
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6301
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6302
  } else {
H
hjxilinx 已提交
6303
    setQueryStatus(pQuery, QUERY_OVER);
6304
    code = pQInfo->code;
6305
  }
6306

H
hjxilinx 已提交
6307
  if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6308
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6309
  }
6310

H
hjxilinx 已提交
6311
  return code;
6312
}
H
hjxilinx 已提交
6313

H
Haojun Liao 已提交
6314
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6315 6316 6317 6318 6319 6320 6321
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
H
Haojun Liao 已提交
6322
  qDestroyQueryInfo(pQInfo);
H
Haojun Liao 已提交
6323 6324 6325
  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6326 6327 6328
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6329

H
Haojun Liao 已提交
6330
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6331
  assert(numOfGroup == 0 || numOfGroup == 1);
6332

H
Haojun Liao 已提交
6333
  if (numOfGroup == 0) {
6334 6335
    return;
  }
H
hjxilinx 已提交
6336
  
H
Haojun Liao 已提交
6337
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6338

H
Haojun Liao 已提交
6339
  size_t num = taosArrayGetSize(pa);
6340
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6341

H
Haojun Liao 已提交
6342
  int32_t count = 0;
6343 6344 6345
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6346

6347 6348
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6349
    count = 0;
6350

H
Haojun Liao 已提交
6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6362 6363
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6364
      STableQueryInfo *item = taosArrayGetP(pa, i);
6365

6366
      char *output = pQuery->sdata[0]->data + i * rsize;
6367
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6368

6369
      output = varDataVal(output);
H
Haojun Liao 已提交
6370
      STableId* id = TSDB_TABLEID(item->pTable);
6371

H
Haojun Liao 已提交
6372 6373
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6374

H
Haojun Liao 已提交
6375 6376
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6377

6378
      *(int32_t *)output = pQInfo->vgId;
6379
      output += sizeof(pQInfo->vgId);
6380

6381
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6382
        char *data = tsdbGetTableName(item->pTable);
6383
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6384
      } else {
6385
        char *val = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
6386 6387 6388 6389 6390 6391 6392 6393

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6394
        } else {
6395 6396
          if (val == NULL) {
            setNull(output, type, bytes);
H
Haojun Liao 已提交
6397
          } else {  // todo here stop will cause client crash
6398 6399
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6400 6401
        }
      }
6402

H
Haojun Liao 已提交
6403
      count += 1;
6404
    }
6405

6406
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6407

H
Haojun Liao 已提交
6408 6409 6410 6411 6412
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
6413
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6414
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6415
    count = 0;
H
Haojun Liao 已提交
6416
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6417 6418
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6419

6420
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6421
      STableQueryInfo* item = taosArrayGetP(pa, i);
6422

6423 6424
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6425
          char* data = tsdbGetTableName(item->pTable);
H
Haojun Liao 已提交
6426
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6427
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6428 6429 6430 6431
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
6432
          char* data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6433
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6434

H
hjxilinx 已提交
6435
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6436 6437 6438 6439 6440
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6441
          } else {
H
[td-90]  
Haojun Liao 已提交
6442 6443 6444 6445 6446
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6447
          }
6448
        }
H
hjxilinx 已提交
6449
      }
H
Haojun Liao 已提交
6450
      count += 1;
H
hjxilinx 已提交
6451
    }
6452

6453
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6454
  }
6455

H
Haojun Liao 已提交
6456
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6457
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6458 6459
}

6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
}

void* qOpenQueryMgmt(int32_t vgId) {
  const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

  SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt));

  pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryHandle->closed    = false;
  pthread_mutex_init(&pQueryHandle->lock, NULL);

  qDebug("vgId:%d, open querymgmt success", vgId);
  return pQueryHandle;
}

void qSetQueryMgmtClosed(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = true;
  pthread_mutex_unlock(&pQueryMgmt->lock);

6497
  taosCacheRefresh(pQueryMgmt->qinfoPool, freeqinfoFn);
6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
  tfree(pQueryMgmt);

  qDebug("vgId:%d querymgmt cleanup completed", vgId);
}

6520
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6521 6522 6523 6524
  if (pMgmt == NULL) {
    return NULL;
  }

6525 6526
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2;

6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  pthread_mutex_lock(&pQueryMgmt->lock);
  if (pQueryMgmt->closed) {
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return NULL;
  } else {
6538 6539 6540
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
6541 6542 6543 6544 6545 6546
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return handle;
  }
}

6547
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6548 6549 6550 6551 6552 6553
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6554
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, needFree);
  return 0;
}