qExecutor.c 218.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
16 17
#include "tcache.h"
#include "tglobal.h"
H
Haojun Liao 已提交
18
#include "qfill.h"
19
#include "taosmsg.h"
20 21

#include "hash.h"
22 23
#include "qExecutor.h"
#include "qUtil.h"
H
hjxilinx 已提交
24
#include "qast.h"
25
#include "qresultBuf.h"
H
hjxilinx 已提交
26
#include "query.h"
S
slguan 已提交
27
#include "queryLog.h"
28
#include "tlosertree.h"
H
Haojun Liao 已提交
29
#include "exception.h"
30 31
#include "tscompression.h"
#include "ttime.h"
S
Shuduo Sang 已提交
32
#include "tfile.h"
33 34 35 36 37 38 39 40 41

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

42
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
43
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
44
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
45
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
46

47
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
48

49
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
50
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
51 52 53

/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
54 55
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
56

57
enum {
H
hjxilinx 已提交
58
  // when query starts to execute, this status will set
59 60
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
61 62
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
63
   */
64 65
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
66 67 68
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
69
   */
70
  QUERY_COMPLETED = 0x4u,
71

H
hjxilinx 已提交
72 73
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
74
   */
75
  QUERY_OVER = 0x8u,
76
};
77 78

enum {
79 80
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
81 82 83
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

84
typedef struct {
85 86 87 88 89 90
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
91 92
} SQueryStatusInfo;

H
Haojun Liao 已提交
93
#if 0
H
Haojun Liao 已提交
94
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
95 96 97 98
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
99
    return malloc(__size);
H
Haojun Liao 已提交
100
  }
H
Haojun Liao 已提交
101 102
}

H
Haojun Liao 已提交
103 104 105 106 107 108 109 110 111 112
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
113
#define malloc  u_malloc
H
Haojun Liao 已提交
114
#endif
H
Haojun Liao 已提交
115

116
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
117 118 119
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

120
static void setQueryStatus(SQuery *pQuery, int8_t status);
121

H
Haojun Liao 已提交
122
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
123

H
hjxilinx 已提交
124
// todo move to utility
125
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
126

H
hjxilinx 已提交
127
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
128
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
129 130 131
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
132

133 134 135
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

136
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
137
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
138 139
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
140
static void buildTagQueryResult(SQInfo *pQInfo);
141

142
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
143
static int32_t flushFromResultBuf(SQInfo *pQInfo);
144

145
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
146 147
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
148

149 150
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
151 152
      return false;
    }
153

154 155
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
156
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
157

158 159 160 161 162
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
163

164 165 166 167
    if (!qualified) {
      return false;
    }
  }
168

169 170 171 172 173 174
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
175

176
  int64_t maxOutput = 0;
177
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
178
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
179

180 181 182 183 184 185 186 187
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
188

189 190 191 192 193
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
194

195
  assert(maxOutput >= 0);
196 197 198
  return maxOutput;
}

199 200 201 202 203 204 205 206 207
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
208 209 210 211 212 213 214
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
215 216 217 218
    pResInfo->numOfRes = numOfRes;
  }
}

219 220 221 222 223 224 225 226 227
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
228

229
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
230
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
231 232 233 234 235
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
236
        assert(pColIndex->colIndex > 0);
237
      }
238

239 240 241
      return true;
    }
  }
242

243 244 245 246 247
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
248

249 250
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
251

252
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
253
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
254 255 256 257 258
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
259

260
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
261 262
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
263 264 265
      break;
    }
  }
266

267 268 269 270 271 272
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
273

274
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
275
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
276 277 278 279
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
280

281 282 283 284
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
285

286 287 288
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
289

290 291 292
  return false;
}

293
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
294

295 296 297 298
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
299 300
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
301
    
302
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
303 304
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
305 306 307
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
308

309 310 311 312
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
313
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
314
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
315 316 317
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
318

319 320 321 322
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
323

324 325 326
  return false;
}

H
Haojun Liao 已提交
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
345
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, int32_t numOfCols, int32_t index) {
346
  // for a tag column, no corresponding field info
H
Haojun Liao 已提交
347 348
  SColIndex *pColIndex = &pQuery->pSelectExpr[index].base.colInfo;
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
349 350
    return NULL;
  }
H
Haojun Liao 已提交
351
  
352 353 354
  /*
   * Choose the right column field info by field id, since the file block may be out of date,
   * which means the newest table schema is not equalled to the schema of this block.
H
Haojun Liao 已提交
355
   * TODO: speedup by using bsearch
356
   */
H
Haojun Liao 已提交
357 358
  for (int32_t i = 0; i < numOfCols; ++i) {
    if (pColIndex->colId == pStatis[i].colId) {
359 360 361
      return &pStatis[i];
    }
  }
H
Haojun Liao 已提交
362
  
363 364 365
  return NULL;
}

366 367 368 369 370 371 372 373
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
374
static bool hasNullValue(SQuery *pQuery, int32_t col, int32_t numOfCols, SDataStatis *pStatis, SDataStatis **pColStatis) {
375
  SColIndex *pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
376
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
377 378
    return false;
  }
379

380 381 382 383
  // query on primary timestamp column, not null value at all
  if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }
384

385
  if (pStatis != NULL) {
H
Haojun Liao 已提交
386
    *pColStatis = getStatisInfo(pQuery, pStatis, numOfCols, col);
H
hjxilinx 已提交
387 388
  } else {
    *pColStatis = NULL;
389
  }
390

391 392 393
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
394

395 396 397 398 399 400
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
                                             int16_t bytes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
401

402
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
403 404 405 406 407
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
  } else {  // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 2;
408

409 410 411 412 413 414 415
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
      } else {
        // todo
      }
416

417 418 419 420 421 422
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        SPosInfo pos = {-1, -1};
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
      }
      pWindowResInfo->capacity = newCap;
    }
423

424 425 426 427
    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
  }
428

429 430 431 432 433 434
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
435

436 437 438 439 440 441 442
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
443

444 445
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
446

447 448 449
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
450

451 452 453 454
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
455

456 457 458
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
459

460 461 462 463 464 465 466
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
467

468
  assert(ts >= w.skey && ts <= w.ekey);
469

470 471 472 473 474 475 476 477
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
478

479
  tFilePage *pData = NULL;
480

481 482 483
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
484

485 486 487 488
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
H
Haojun Liao 已提交
489
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
490

491
    if (pData->num >= numOfRowsPerPage) {
492 493
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
494
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
495 496 497
      }
    }
  }
498

499 500 501
  if (pData == NULL) {
    return -1;
  }
502

503 504 505
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
506
    pWindowRes->pos.rowId = pData->num++;
507
  }
508

509 510 511 512 513 514 515
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
                                       STimeWindow *win) {
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
516

517 518 519 520
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE);
  if (pWindowRes == NULL) {
    return -1;
  }
521

522 523 524 525 526 527 528
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
529

530 531
  // set time window for current result
  pWindowRes->window = *win;
532

H
Haojun Liao 已提交
533
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
534 535 536 537 538 539 540 541
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
542
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
543
                                      int16_t order, int64_t *pData) {
H
Haojun Liao 已提交
544
  int32_t endPos = searchFn((char *)pData, numOfRows, ekey, order);
545
  int32_t forwardStep = 0;
546

547
  if (endPos >= 0) {
548
    forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
549
    assert(forwardStep >= 0);
550

551 552 553 554 555
    // endPos data is equalled to the key so, we do need to read the element in endPos
    if (pData[endPos] == ekey) {
      forwardStep += 1;
    }
  }
556

557 558 559 560 561 562
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
563
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
564
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
565
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
566
    return pWindowResInfo->size;
567
  }
568

569
  // no qualified results exist, abort check
570 571
  int32_t numOfClosed = 0;
  
572
  if (pWindowResInfo->size == 0) {
573
    return pWindowResInfo->size;
574
  }
575

576
  // query completed
H
hjxilinx 已提交
577 578
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
579
    closeAllTimeWindow(pWindowResInfo);
580

581 582 583 584
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
585
    int64_t skey = TSKEY_INITIAL_VAL;
586

587 588 589
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
590
        numOfClosed += 1;
591 592
        continue;
      }
593

594 595 596 597 598 599 600 601
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
602

603
    // all windows are closed, set the last one to be the skey
604
    if (skey == TSKEY_INITIAL_VAL) {
605 606 607 608 609
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
610

611
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
612

613 614
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
615
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
616 617
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
618
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
619
    } else {
620
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
621
             numOfClosed);
622 623
    }
  }
624 625 626 627 628 629 630
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
631
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
632
  return numOfClosed;
633 634 635
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
636
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
637
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
638

639 640 641
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
642

H
hjxilinx 已提交
643 644
  STableQueryInfo* item = pQuery->current;
  
645 646
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
647
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
648 649 650 651
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
652
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
653 654 655
        }
      }
    } else {
656
      num = pDataBlockInfo->rows - startPos;
657
      if (updateLastKey) {
H
hjxilinx 已提交
658
        item->lastKey = pDataBlockInfo->window.ekey + step;
659 660 661 662
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
663
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
664 665 666 667
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
668
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
669 670 671 672 673
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
674
        item->lastKey = pDataBlockInfo->window.skey + step;
675 676 677
      }
    }
  }
678

679 680 681 682 683
  assert(num >= 0);
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
684
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
685 686
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
687

H
Haojun Liao 已提交
688 689
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
690

H
Haojun Liao 已提交
691 692 693
    pCtx[k].nStartQueryTimestamp = pWin->skey;
    pCtx[k].size = forwardStep;
    pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
694

H
Haojun Liao 已提交
695 696 697
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      pCtx[k].ptsList = &tsBuf[offset];
    }
698

H
Haojun Liao 已提交
699 700 701 702 703 704 705
    // not a whole block involved in query processing, statistics data can not be used
    if (forwardStep != numOfTotal) {
      pCtx[k].preAggVals.isSet = false;
    }

    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunction(&pCtx[k]);
706 707 708 709 710 711 712 713
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
714

H
Haojun Liao 已提交
715 716
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    pCtx[k].nStartQueryTimestamp = pWin->skey;
717

H
Haojun Liao 已提交
718 719 720
    int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunctionF(&pCtx[k], offset);
721 722 723 724 725
    }
  }
}

static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
726 727
                                      SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
                                      __block_search_fn_t searchFn) {
728
  SQuery *pQuery = pRuntimeEnv->pQuery;
729

H
Haojun Liao 已提交
730 731 732 733
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime) {
    // todo opt
  }
734

H
Haojun Liao 已提交
735
  getNextTimeWindow(pQuery, pNextWin);
736

H
Haojun Liao 已提交
737 738 739 740 741
  // next time window is not in current block
  if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
    return -1;
  }
742

H
Haojun Liao 已提交
743 744 745 746 747
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    startKey = pNextWin->skey;
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
748
    }
H
Haojun Liao 已提交
749 750 751 752
  } else {
    startKey = pNextWin->ekey;
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
753
    }
H
Haojun Liao 已提交
754
  }
755

H
Haojun Liao 已提交
756
  int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
757

H
Haojun Liao 已提交
758 759 760 761 762 763
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNextWin->ekey) {
    TSKEY next = primaryKeys[startPos];
764

H
Haojun Liao 已提交
765 766 767 768
    pNextWin->ekey += ((next - pNextWin->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->skey = pNextWin->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNextWin->skey) {
    TSKEY next = primaryKeys[startPos];
769

H
Haojun Liao 已提交
770 771
    pNextWin->skey -= ((pNextWin->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->ekey = pNextWin->skey + pQuery->intervalTime - 1;
772
  }
773

H
Haojun Liao 已提交
774
  return startPos;
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
}

static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
790

791 792 793
  return ekey;
}

H
hjxilinx 已提交
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
809
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
810 811 812
  if (pDataBlock == NULL) {
    return NULL;
  }
813
  char *dataBlock = NULL;
814

H
Haojun Liao 已提交
815
  SQuery *pQuery = pRuntimeEnv->pQuery;
816
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
817

818
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
819
  if (functionId == TSDB_FUNC_ARITHM) {
820
    sas->pArithExpr = &pQuery->pSelectExpr[col];
821

822 823 824 825 826 827
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
828

829 830 831 832
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
833

H
Haojun Liao 已提交
834 835 836 837
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

838
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
839
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
840
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
841
      SColumnInfo *pColMsg = &pQuery->colList[i];
842

843 844 845 846 847 848 849 850
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
851

852
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
853
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
854
    }
855

856
  } else {  // other type of query function
857
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
858
    if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
859 860
      dataBlock = NULL;
    } else {
H
hjxilinx 已提交
861
      dataBlock = getDataBlockImpl(pDataBlock, pCol->colId);
862 863
    }
  }
864

865 866 867 868
  return dataBlock;
}

/**
H
Haojun Liao 已提交
869
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
870 871
 * @param pRuntimeEnv
 * @param forwardStep
872
 * @param tsCols
873 874 875 876 877
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
878
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
879 880
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
881
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
882 883 884
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
885
  if (pDataBlock != NULL) {
886
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
887
    tsCols = (TSKEY *)(pColInfo->pData);
888
  }
889

890
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
891 892 893
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
894

895
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
896
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
897
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
898
  }
899

900
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
901
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && tsCols != NULL) {
902
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
903
    TSKEY   ts = tsCols[offset];
904

905
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
hjxilinx 已提交
906
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win) != TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
907
      tfree(sasArray);
H
hjxilinx 已提交
908
      return;
909
    }
910

911 912
    TSKEY   ekey = reviseWindowEkey(pQuery, &win);
    int32_t forwardStep =
913
        getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
914

915
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
916
    doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, tsCols, pDataBlockInfo->rows);
917

918 919
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
920

921
    while (1) {
922
      int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn);
923 924 925
      if (startPos < 0) {
        break;
      }
926

927
      // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
928
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
929 930
        break;
      }
931

932
      ekey = reviseWindowEkey(pQuery, &nextWin);
933
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
934

935
      pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
936
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
937
    }
938

939 940 941 942 943 944 945
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
946
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
947
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
948 949 950 951 952
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
953

954 955 956 957
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
958

959 960
    tfree(sasArray[i].data);
  }
961

962 963 964 965 966 967 968
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
969

970
  int32_t GROUPRESULTID = 1;
971

972
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
973

974 975 976 977 978 979 980 981 982 983 984
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
985 986 987 988
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes);
  if (pWindowRes == NULL) {
    return -1;
  }
989

990 991 992
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

993 994 995 996 997 998
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
999

1000 1001 1002 1003 1004
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1005
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1006
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1007

1008
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1009 1010
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1011 1012
      continue;
    }
1013

1014
    int16_t colIndex = -1;
1015
    int32_t colId = pColIndex->colId;
1016

1017
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1018
      if (pQuery->colList[i].colId == colId) {
1019 1020 1021 1022
        colIndex = i;
        break;
      }
    }
1023

1024
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1025

1026 1027
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1028 1029 1030 1031 1032 1033
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
1034

1035 1036 1037 1038 1039 1040
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1041
  }
1042

1043
  return NULL;
1044 1045 1046 1047
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1048

1049 1050
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1051

1052 1053 1054 1055
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1056

1057 1058 1059
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1060 1061
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1062 1063
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1064

1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1078

1079 1080 1081 1082 1083
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1084
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1085 1086 1087 1088 1089

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1090
  
1091 1092 1093
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1094

1095
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1096 1097
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1098 1099 1100 1101 1102 1103

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1104
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1105
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1106 1107
    return false;
  }
1108

1109 1110 1111
  return true;
}

1112 1113
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1114
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1115

1116
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1117
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1118 1119 1120 1121

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1122 1123
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1124
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1125 1126 1127
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1128

1129 1130
  int16_t type = 0;
  int16_t bytes = 0;
1131

1132
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1133
  if (groupbyColumnValue) {
1134
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1135
  }
1136

1137
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1138
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1139
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1140
  }
1141

1142 1143
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1144
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1145 1146
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1147
  }
1148

1149
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1150

1151 1152 1153
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1154
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1155
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1156 1157
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1158

1159
  int32_t j = 0;
H
hjxilinx 已提交
1160
  int32_t offset = -1;
1161

1162
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1163
    offset = GET_COL_DATA_POS(pQuery, j, step);
1164

1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1175

1176
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1177 1178
      continue;
    }
1179

1180
    // interval window query
H
Haojun Liao 已提交
1181
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1182
      // decide the time window according to the primary timestamp
1183
      int64_t     ts = tsCols[offset];
1184
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1185

H
hjxilinx 已提交
1186
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win);
1187 1188 1189
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1190

1191 1192
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1193

1194 1195
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1196

1197 1198
      while (1) {
        getNextTimeWindow(pQuery, &nextWin);
H
Haojun Liao 已提交
1199
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1200
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1201
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1202 1203
          break;
        }
1204

1205 1206 1207
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1208

1209
        // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
1210
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
1211 1212
          break;
        }
1213

1214 1215 1216
        pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
        doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
      }
1217

1218 1219 1220
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1221
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1222
        char *val = groupbyColumnData + bytes * offset;
1223

H
hjxilinx 已提交
1224
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1225 1226 1227 1228
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1229

1230
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1231
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1232 1233 1234 1235 1236
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1237

1238 1239 1240
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1241
        setQueryStatus(pQuery, QUERY_COMPLETED);
1242 1243 1244 1245
        break;
      }
    }
  }
H
Haojun Liao 已提交
1246 1247 1248 1249 1250 1251 1252 1253

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1254 1255 1256 1257 1258
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1259

1260 1261
    tfree(sasArray[i].data);
  }
1262

1263 1264 1265 1266
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1267
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1268
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1269 1270 1271
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1272
  
H
Haojun Liao 已提交
1273
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1274
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1275
  } else {
1276
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1277
  }
1278

1279
  // update the lastkey of current table
1280
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1281
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1282

1283
  // interval query with limit applied
1284
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1285
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1286 1287 1288
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1289

1290 1291 1292 1293
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1294

1295 1296 1297
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1298

1299 1300 1301
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1302
    }
1303
  }
1304

1305
  return numOfRes;
1306 1307
}

H
Haojun Liao 已提交
1308
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1309 1310 1311 1312 1313 1314 1315
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
  pCtx->hasNull = hasNullValue(pQuery, colIndex, pBlockInfo->numOfCols, pStatis, &tpField);
1316
  pCtx->aInputElemBuf = inputData;
1317

1318
  if (tpField != NULL) {
H
Haojun Liao 已提交
1319
    pCtx->preAggVals.isSet  = true;
1320 1321
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1322 1323 1324
  } else {
    pCtx->preAggVals.isSet = false;
  }
1325

H
Haojun Liao 已提交
1326 1327 1328
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1329

1330 1331
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1332
    pCtx->ptsList = tsCol;
1333
  }
1334

1335 1336 1337 1338 1339
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1340
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1341
    /*
H
Haojun Liao 已提交
1342
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1353

1354 1355
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1356 1357 1358 1359 1360 1361
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1375
  }
1376

1377 1378 1379 1380 1381 1382
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1383
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1384 1385 1386
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1387
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1388 1389 1390 1391 1392 1393
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1394 1395 1396
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1397
  if (isSelectivityWithTagsQuery(pQuery)) {
1398
    int32_t num = 0;
1399
    int16_t tagLen = 0;
1400 1401
    
    SQLFunctionCtx *p = NULL;
1402
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1403

1404
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1405
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1406
      
1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1420 1421 1422 1423 1424 1425 1426
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
      tfree(pTagCtx); 
    }
1427 1428 1429
  }
}

H
Haojun Liao 已提交
1430
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
1431
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1432 1433
    assert(pQuery->pSelectExpr[i].interBytes <= DEFAULT_INTERN_BUF_PAGE_SIZE);
    
1434
    setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interBytes, isStableQuery);
1435 1436 1437
  }
}

1438
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1439
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1440 1441
  SQuery *pQuery = pRuntimeEnv->pQuery;

1442 1443
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1444

1445
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1446
    goto _clean;
1447
  }
1448

1449
  pRuntimeEnv->offset[0] = 0;
1450
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1451
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1452

1453
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1454
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1455

1456 1457
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1458
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1459 1460 1461 1462
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1463 1464 1465 1466
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1467
      
1468 1469 1470 1471
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1472
  
1473
    assert(isValidDataType(pCtx->inputType));
1474
    pCtx->ptsOutputBuf = NULL;
1475

1476 1477
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1478

1479 1480
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1481

1482 1483 1484 1485 1486 1487 1488 1489 1490 1491
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1492

1493 1494
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1495

1496
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1497
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1498
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1499

1500 1501 1502 1503
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1504

1505 1506
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1507

1508 1509 1510 1511
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1512

1513
  // set the intermediate result output buffer
1514
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
1515

1516
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1517
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1518 1519
    resetCtxOutputBuf(pRuntimeEnv);
  }
1520

H
Haojun Liao 已提交
1521
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1522
  return TSDB_CODE_SUCCESS;
1523

1524
_clean:
1525 1526
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1527

1528
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1529 1530 1531 1532 1533 1534
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1535

1536
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1537
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1538

1539
  qDebug("QInfo:%p teardown runtime env", pQInfo);
1540
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1541

1542
  if (pRuntimeEnv->pCtx != NULL) {
1543
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1544
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1545

1546 1547 1548
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1549

1550 1551 1552 1553
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
      tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
    }
1554

1555 1556 1557
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1558

H
Haojun Liao 已提交
1559
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1560

H
hjxilinx 已提交
1561
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1562
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1563
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1564

1565 1566 1567
  pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
}

1568
static bool isQueryKilled(SQInfo *pQInfo) {
1569
  return (pQInfo->code == TSDB_CODE_TSC_QUERY_CANCELLED);
1570 1571
}

1572
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
H
hjxilinx 已提交
1573

H
hjxilinx 已提交
1574
static bool isFixedOutputQuery(SQuery *pQuery) {
1575 1576 1577
  if (pQuery->intervalTime != 0) {
    return false;
  }
1578

1579 1580 1581 1582
  // Note:top/bottom query is fixed output query
  if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    return true;
  }
1583

1584
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1585
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1586

1587 1588
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1589
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1590 1591
      continue;
    }
1592

1593 1594 1595
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1596

1597 1598 1599 1600
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1601

1602 1603 1604
  return false;
}

1605
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1606
static bool isPointInterpoQuery(SQuery *pQuery) {
1607
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1608
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1609
    if (functionID == TSDB_FUNC_INTERP) {
1610 1611 1612
      return true;
    }
  }
1613

1614 1615 1616 1617
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1618
static bool isSumAvgRateQuery(SQuery *pQuery) {
1619
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1620
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1621 1622 1623
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1624

1625 1626 1627 1628 1629
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1630

1631 1632 1633
  return false;
}

H
hjxilinx 已提交
1634
static bool isFirstLastRowQuery(SQuery *pQuery) {
1635
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1636
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1637 1638 1639 1640
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1641

1642 1643 1644
  return false;
}

H
hjxilinx 已提交
1645
static bool needReverseScan(SQuery *pQuery) {
1646
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1647
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1648 1649 1650
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1651

1652
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1653 1654
      return true;
    }
1655 1656 1657 1658 1659

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1660
  }
1661

1662 1663
  return false;
}
H
hjxilinx 已提交
1664 1665 1666

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1667 1668 1669 1670 1671
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1672 1673 1674
      return false;
    }
  }
1675

H
hjxilinx 已提交
1676 1677 1678
  return true;
}

1679 1680
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1681
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *realWin, STimeWindow *win) {
1682
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
1683

H
Haojun Liao 已提交
1684
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
1685 1686 1687 1688 1689 1690
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    /*
     * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
     * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
     */
    assert(keyLast - keyFirst < pQuery->intervalTime);
1691

H
Haojun Liao 已提交
1692 1693
    realWin->skey = keyFirst;
    realWin->ekey = keyLast;
1694

1695 1696 1697
    win->ekey = INT64_MAX;
    return;
  }
1698

1699
  win->ekey = win->skey + pQuery->intervalTime - 1;
1700

H
Haojun Liao 已提交
1701 1702
  realWin->skey = (win->skey < keyFirst)? keyFirst : win->skey;
  realWin->ekey = (win->ekey < keyLast) ? win->ekey : keyLast;
1703 1704 1705 1706
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1707
    pQuery->checkBuffer = 0;
1708
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1709
    pQuery->checkBuffer = 0;
1710 1711
  } else {
    bool hasMultioutput = false;
1712
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1713
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1714 1715 1716
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1717

1718 1719 1720 1721 1722
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1723

1724
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1725 1726 1727 1728 1729 1730
  }
}

/*
 * todo add more parameters to check soon..
 */
1731
bool colIdCheck(SQuery *pQuery) {
1732 1733
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1734
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1735
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1736 1737 1738
      return false;
    }
  }
1739
  
1740 1741 1742 1743 1744 1745
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1746
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1747
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1748

1749 1750 1751 1752
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1753

1754 1755 1756 1757
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1758

1759 1760 1761 1762 1763 1764 1765
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
    }
  }
}

H
Haojun Liao 已提交
1780 1781 1782
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1783 1784 1785
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1786

1787 1788 1789
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1790
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1791
           pQuery->order.order, TSDB_ORDER_DESC);
1792

1793
    pQuery->order.order = TSDB_ORDER_DESC;
1794

1795 1796
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1797

1798 1799
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1800

1801 1802
    return;
  }
1803

1804 1805
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1806
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1807
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1808 1809
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1810

1811
    pQuery->order.order = TSDB_ORDER_ASC;
1812 1813
    return;
  }
1814

1815 1816 1817
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1818
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1819 1820
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1821
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1822
        doExchangeTimeWindow(pQInfo);
1823
      }
1824

1825
      pQuery->order.order = TSDB_ORDER_ASC;
1826 1827
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1828
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1829 1830
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1831
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1832
        doExchangeTimeWindow(pQInfo);
1833
      }
1834

1835
      pQuery->order.order = TSDB_ORDER_DESC;
1836
    }
1837

1838
  } else {  // interval query
1839
    if (stableQuery) {
1840 1841
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1842
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1843 1844
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1845 1846
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1847

1848
        pQuery->order.order = TSDB_ORDER_ASC;
1849 1850
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1851
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1852 1853
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1854 1855
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1856

1857
        pQuery->order.order = TSDB_ORDER_DESC;
1858 1859 1860 1861 1862 1863 1864 1865
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1866

1867
  int32_t num = 0;
1868

1869 1870
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1871
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1872
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1873
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1874 1875
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1876
  }
1877

1878 1879 1880 1881
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1882
#define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pSelectExpr[1].base.arg->argValue.i64:1)
1883

H
Haojun Liao 已提交
1884 1885
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1886
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1887 1888 1889 1890
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1891

H
Haojun Liao 已提交
1892 1893 1894
  SQuery    *pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = GET_RES_BUF_PAGE_BY_ID(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
  int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
1895

H
Haojun Liao 已提交
1896
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
1897
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1898 1899 1900 1901 1902 1903
}

/**
 * decrease the refcount for each table involved in this query
 * @param pQInfo
 */
1904
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
1905
  if (pQInfo != NULL) {
1906
    //    assert(taosHashGetSize(pQInfo->tableqinfoGroupInfo) >= 1);
1907 1908 1909
  }

#if 0
1910
  if (pQInfo == NULL || pQInfo->tableqinfoGroupInfo.numOfTables == 1) {
1911
    atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
1912
    qDebug("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
1913 1914 1915
           pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
  } else {
    int32_t num = 0;
1916 1917
    for (int32_t i = 0; i < pQInfo->tableqinfoGroupInfo.numOfTables; ++i) {
      SMeterObj *pMeter = getMeterObj(pQInfo->tableqinfoGroupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
1918
      atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
1919

1920
      if (pMeter->numOfQueries > 0) {
1921
        qDebug("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
1922 1923 1924 1925
               pMeter->meterId, pMeter->numOfQueries);
        num++;
      }
    }
1926

1927 1928 1929 1930
    /*
     * in order to reduce log output, for all meters of which numOfQueries count are 0,
     * we do not output corresponding information
     */
1931
    num = pQInfo->tableqinfoGroupInfo.numOfTables - num;
1932
    qDebug("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
1933
           pQInfo->tableqinfoGroupInfo.numOfTables, num);
1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946
  }
#endif
}

static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
                                int32_t numOfTotalPoints) {
  if (pDataStatis == NULL) {
    return true;
  }

#if 0
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1947
    int32_t                  colIndex = pFilterInfo->info.colIndex;
1948

1949 1950 1951 1952
    // this column not valid in current data block
    if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
      continue;
    }
1953

1954 1955 1956 1957
    // not support pre-filter operation on binary/nchar data type
    if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
      continue;
    }
1958

1959 1960 1961 1962
    // all points in current column are NULL, no need to check its boundary value
    if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
      continue;
    }
1963

1964 1965 1966
    if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataStatis[colIndex].min);
      float maxval = *(double *)(&pDataStatis[colIndex].max);
1967

1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
                                        (char *)&pDataStatis[colIndex].max)) {
          return true;
        }
      }
    }
  }
1982

1983
  // todo disable this opt code block temporarily
1984
  //  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1985
  //    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
1986 1987 1988 1989
  //    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
  //      return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
  //    }
  //  }
1990

1991 1992 1993 1994 1995 1996 1997
#endif
  return true;
}

// previous time window may not be of the same size of pQuery->intervalTime
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1998

1999 2000 2001 2002
  pTimeWindow->skey += (pQuery->slidingTime * factor);
  pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
}

H
hjxilinx 已提交
2003
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis) {
2004
  SQuery *pQuery = pRuntimeEnv->pQuery;
2005 2006 2007 2008

  uint32_t r = 0;
  SArray * pDataBlock = NULL;

2009 2010 2011
  if (pQuery->numOfFilterCols > 0) {
    r = BLK_DATA_ALL_NEEDED;
  } else {
2012
    // check if this data block is required to load
2013
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2014 2015 2016 2017
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
      
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
2018
      r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
2019
    }
2020

H
Haojun Liao 已提交
2021
    if (pRuntimeEnv->pTSBuf > 0 || QUERY_IS_INTERVAL_QUERY(pQuery)) {
2022 2023 2024
      r |= BLK_DATA_ALL_NEEDED;
    }
  }
2025

2026
  if (r == BLK_DATA_NO_NEEDED) {
2027
    qDebug("QInfo:%p data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
2028
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2029 2030
    pRuntimeEnv->summary.discardBlocks += 1;
  } else if (r == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2031
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2032
      //        return DISK_DATA_LOAD_FAILED;
2033
    }
2034 2035 2036 2037
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
hjxilinx 已提交
2038
      pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2039
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2040 2041 2042
    }
  } else {
    assert(r == BLK_DATA_ALL_NEEDED);
2043 2044 2045
  
    // load the data block statistics to perform further filter
    pRuntimeEnv->summary.loadBlockStatis +=1;
H
hjxilinx 已提交
2046
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2047
    }
2048 2049
    
    if (!needToLoadDataBlock(pQuery,*pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2050
#if defined(_DEBUG_VIEW)
2051
      qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2052
#endif
2053 2054
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
2055 2056
      //        return DISK_DATA_DISCARDED;
    }
2057
  
2058
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2059
    pRuntimeEnv->summary.loadBlocks += 1;
H
hjxilinx 已提交
2060
    pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2061
  }
2062

2063 2064 2065
  return pDataBlock;
}

H
hjxilinx 已提交
2066
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2067
  int32_t midPos = -1;
H
Haojun Liao 已提交
2068
  int32_t numOfRows;
2069

2070 2071 2072
  if (num <= 0) {
    return -1;
  }
2073

2074
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2075 2076

  TSKEY * keyList = (TSKEY *)pValue;
2077
  int32_t firstPos = 0;
2078
  int32_t lastPos = num - 1;
2079

2080
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2081 2082 2083 2084 2085
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2086

H
Haojun Liao 已提交
2087 2088
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2089

H
hjxilinx 已提交
2090 2091 2092 2093 2094 2095 2096 2097
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2098

H
hjxilinx 已提交
2099 2100 2101 2102 2103
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2104

H
hjxilinx 已提交
2105 2106 2107 2108 2109 2110 2111
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2112

H
Haojun Liao 已提交
2113 2114
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2115

H
hjxilinx 已提交
2116 2117 2118 2119 2120 2121 2122 2123 2124
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2125

H
hjxilinx 已提交
2126 2127 2128
  return midPos;
}

2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2151
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2152 2153 2154 2155 2156
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2157 2158 2159
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2160
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pQuery)) {
2161 2162 2163 2164 2165 2166 2167 2168
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2169 2170
        assert(bytes > 0 && newSize > 0);

2171 2172 2173 2174
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
H
Hongze Cheng 已提交
2175
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (newSize - pRec->rows) * bytes);
2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2188
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2189 2190 2191 2192 2193 2194 2195
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2196 2197
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2198
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2199
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2200

2201
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2202 2203
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2204

2205
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2206 2207
  SDataBlockInfo blockInfo = {0};

2208
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2209
    summary->totalBlocks += 1;
2210
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
2211
      return 0;
2212
    }
2213

H
Haojun Liao 已提交
2214
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2215

2216
    // todo extract methods
H
Haojun Liao 已提交
2217
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2218
      STimeWindow realWin = TSWINDOW_INITIALIZER, w = TSWINDOW_INITIALIZER;
2219 2220
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2221
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2222
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
2223 2224 2225 2226
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2227
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
2228

H
hjxilinx 已提交
2229
        pWindowResInfo->startTime = pQuery->window.skey;
2230 2231
        pWindowResInfo->prevSKey = w.skey;
      }
2232 2233 2234 2235
      
      if (pRuntimeEnv->pFillInfo != NULL) {
        pRuntimeEnv->pFillInfo->start = w.skey;
      }
2236
    }
2237

H
hjxilinx 已提交
2238
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2239
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2240

2241
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2242
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
2243

H
Haojun Liao 已提交
2244 2245
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2246
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2247

H
Haojun Liao 已提交
2248
    summary->totalRows += blockInfo.rows;
2249
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2250
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2251

2252 2253
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2254
      break;
2255 2256
    }
  }
2257

H
hjxilinx 已提交
2258
  // if the result buffer is not full, set the query complete
2259 2260 2261
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2262

H
Haojun Liao 已提交
2263
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2264
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2265 2266
      int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;

2267
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2268
      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2269
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2270 2271 2272 2273
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2274

2275
  return 0;
2276 2277 2278 2279 2280 2281
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2282
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2283
  tVariantDestroy(tag);
2284

2285
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2286
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2287 2288 2289
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2290
  } else {
2291
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2292 2293 2294 2295
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2296 2297
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2298
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2299 2300 2301 2302
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2303
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2304
    } else {
H
Haojun Liao 已提交
2305 2306 2307 2308 2309
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2310
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2311
    }
2312
  }
2313 2314
}

2315
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2316
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2317
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2318

H
[td-90]  
Haojun Liao 已提交
2319 2320 2321
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2322 2323 2324 2325 2326 2327 2328 2329 2330 2331

    // todo refactor extract function.
    int16_t type = -1, bytes = -1;
    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
        type = pQuery->tagColList[i].type;
        bytes = pQuery->tagColList[i].bytes;
      }
    }

2332
    doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2333 2334
  } else {
    // set tag value, by which the results are aggregated.
2335
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2336
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2337
  
2338
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2339
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2340 2341
        continue;
      }
2342

2343
      // todo use tag column index to optimize performance
2344
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2345
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2346
    }
2347

2348
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2349
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2350
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2351 2352
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2353 2354 2355 2356 2357 2358 2359 2360 2361 2362

      // todo refactor
      int16_t type = -1, bytes = -1;
      for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
        if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
          type = pQuery->tagColList[i].type;
          bytes = pQuery->tagColList[i].bytes;
        }
      }

2363
      doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2364
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2365
          pRuntimeEnv->pCtx[0].tag.i64Key)
2366 2367 2368 2369 2370 2371 2372
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2373

2374
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2375
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2376 2377 2378
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2379

2380 2381 2382
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2383

2384 2385 2386
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2387

2388 2389 2390
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2391 2392 2393 2394 2395 2396 2397 2398
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2399 2400
    }
  }
2401

2402
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2403
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2404 2405 2406
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2407

2408 2409 2410 2411
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2412
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2481
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2482
  SQuery* pQuery = pRuntimeEnv->pQuery;
2483
  int32_t numOfCols = pQuery->numOfOutput;
2484
  printf("super table query intermediate result, total:%d\n", numOfRows);
2485

2486 2487
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2488
      
2489
      switch (pQuery->pSelectExpr[i].type) {
2490
        case TSDB_DATA_TYPE_BINARY: {
2491 2492 2493 2494 2495
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2496
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2497 2498 2499 2500 2501
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2502
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2503 2504
          break;
        case TSDB_DATA_TYPE_INT:
2505
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2506 2507
          break;
        case TSDB_DATA_TYPE_FLOAT:
2508
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2509 2510
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2511
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2512 2513 2514 2515 2516 2517 2518 2519
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2520 2521 2522
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2523 2524 2525 2526 2527
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2528

2529 2530
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2531

2532 2533
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2534

2535 2536 2537 2538
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2539

2540 2541 2542 2543
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2544

H
hjxilinx 已提交
2545
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2546
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2547

2548 2549
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2550

H
hjxilinx 已提交
2551
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2552
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2553

2554 2555
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2556

2557 2558 2559
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2560

2561 2562 2563
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2564
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2565
  int64_t st = taosGetTimestampMs();
2566
  int32_t ret = TSDB_CODE_SUCCESS;
2567

H
Haojun Liao 已提交
2568
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
2569

2570
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2571
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2572
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2573 2574 2575 2576
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2577
    pQInfo->groupIndex += 1;
2578 2579

    // this group generates at least one result, return results
2580 2581 2582
    if (ret > 0) {
      break;
    }
2583 2584

    assert(pQInfo->numOfGroupResultPages == 0);
2585
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2586
  }
2587

2588
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2589
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2590

2591 2592 2593 2594 2595 2596
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2597

2598
    // current results of group has been sent to client, try next group
2599
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2600 2601
      return;  // failed to save data in the disk
    }
2602

2603
    // check if all results has been sent to client
H
Haojun Liao 已提交
2604
    int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
2605
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2606
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2607 2608
      return;
    }
2609
  }
2610 2611

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2612
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2613

2614
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2615
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2616

2617 2618
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
H
Haojun Liao 已提交
2619
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[i]);
2620
    total += pData->num;
2621
  }
2622

2623
  int32_t rows = total;
2624

2625 2626
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
H
Haojun Liao 已提交
2627
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[num]);
2628

2629
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2630
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2631
      char *  pDest = pQuery->sdata[i]->data;
2632

2633 2634
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2635
    }
2636

2637
    offset += pData->num;
2638
  }
2639

2640
  assert(pQuery->rec.rows == 0);
2641

2642
  pQuery->rec.rows += rows;
2643 2644 2645
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2646
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2647
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2648
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2649

2650 2651 2652 2653 2654 2655 2656
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2657

2658
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2659
    assert(pResultInfo != NULL);
2660

H
Haojun Liao 已提交
2661 2662
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2663 2664
    }
  }
2665

H
Haojun Liao 已提交
2666
  return 0;
2667 2668
}

2669
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2670
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2671
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2672

2673
  size_t size = taosArrayGetSize(pGroup);
2674
  tFilePage **buffer = pQuery->sdata;
2675

2676
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2677
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2678

2679 2680 2681 2682 2683
  if (pTableList == NULL || posList == NULL) {
    tfree(posList);
    tfree(pTableList);

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2684
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2685 2686
  }

2687
  // todo opt for the case of one table per group
2688
  int32_t numOfTables = 0;
2689
  for (int32_t i = 0; i < size; ++i) {
2690
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2691

H
Haojun Liao 已提交
2692
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
2693 2694
    if (list.size > 0 && item->windowResInfo.size > 0) {
      pTableList[numOfTables] = item;
2695
      numOfTables += 1;
2696 2697
    }
  }
2698

2699
  if (numOfTables == 0) {
2700 2701
    tfree(posList);
    tfree(pTableList);
2702

2703 2704
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2705
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2706

2707
  }
2708

2709
  SCompSupporter cs = {pTableList, posList, pQInfo};
2710

2711
  SLoserTreeInfo *pTree = NULL;
2712
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2713

2714
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2715 2716 2717 2718
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

2719 2720
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2721

2722 2723
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2724

2725 2726
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2727

H
hjxilinx 已提交
2728
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2729
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2730

2731 2732
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2733

2734
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2735
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2736 2737
    if (num <= 0) {
      cs.position[pos] += 1;
2738

2739 2740
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2741

2742
        // all input sources are exhausted
2743
        if (--numOfTables == 0) {
2744 2745 2746 2747 2748 2749 2750
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2751
        if (buffer[0]->num == pQuery->rec.capacity) {
2752 2753 2754
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2755

2756 2757
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2758

2759
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2760
        buffer[0]->num += 1;
2761
      }
2762

2763
      lastTimestamp = ts;
2764

2765 2766 2767
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2768

2769
        // all input sources are exhausted
2770
        if (--numOfTables == 0) {
2771 2772 2773 2774
          break;
        }
      }
    }
2775

2776 2777
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2778

2779
  if (buffer[0]->num != 0) {  // there are data in buffer
2780
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2781
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2782

2783 2784 2785 2786
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2787

2788 2789 2790
      return -1;
    }
  }
2791

2792 2793 2794
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2795
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2796
#endif
2797

2798
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2799

2800 2801
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2802
  tfree(pTree);
2803

2804
  pQInfo->offset = 0;
2805
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2806 2807
    tfree(pResultInfo[i].interResultBuf);
  }
2808

2809 2810 2811 2812 2813
  tfree(pResultInfo);
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2814 2815 2816
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2817
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2818
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2819

2820 2821
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2822

2823
  int32_t remain = pQuery->sdata[0]->num;
2824
  int32_t offset = 0;
2825

2826 2827 2828 2829 2830
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2831

2832
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2833
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2834

2835
    // pagewise copy to dest buffer
2836
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2837
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2838
      buf->num = r;
2839

2840 2841
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2842
    }
2843

2844 2845 2846
    offset += r;
    remain -= r;
  }
2847

2848 2849 2850 2851 2852
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2853
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2854
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2855 2856 2857
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2858

2859
    pQuery->sdata[k]->num = 0;
2860 2861 2862
  }
}

2863 2864 2865 2866 2867 2868 2869
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2870 2871 2872 2873 2874 2875 2876
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2890 2891 2892 2893 2894
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2895

2896
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2897

2898
    // open/close the specified query for each group result
2899
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2900
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2901

2902 2903
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2904 2905 2906 2907 2908 2909 2910 2911
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2912 2913
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2914
  SQuery *pQuery = pRuntimeEnv->pQuery;
2915
  int32_t order = pQuery->order.order;
2916

2917 2918
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
2919
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
2920
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2921
  } else {  // for simple result of table query,
2922
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2923
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2924

2925
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2926 2927 2928
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2929

2930 2931
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2932 2933 2934 2935 2936 2937
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2938
  
H
Haojun Liao 已提交
2939
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
hjxilinx 已提交
2940 2941
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
2942
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
hjxilinx 已提交
2943 2944 2945
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
2946 2947
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
hjxilinx 已提交
2948 2949
    }
  }
2950 2951
}

2952
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2953
  SQuery *pQuery = pRuntimeEnv->pQuery;
2954
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2955
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2956 2957 2958 2959
  }
}

void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
2960
  int32_t numOfCols = pQuery->numOfOutput;
2961

2962 2963
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2964

2965 2966 2967 2968 2969 2970
  // set the intermediate result output buffer
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2971

2972
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2973 2974
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2975

2976 2977 2978 2979 2980 2981
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2982

2983
    // set the timestamp output buffer for top/bottom/diff query
2984
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2985 2986 2987
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2988

2989
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2990
  }
2991

2992 2993 2994 2995 2996
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2997

2998
  // reset the execution contexts
2999
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3000
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3001
    assert(functionId != TSDB_FUNC_DIFF);
3002

3003 3004 3005 3006
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3007

3008 3009 3010 3011 3012 3013 3014 3015 3016 3017
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
3018

3019 3020 3021 3022 3023 3024
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3025

3026
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3027
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3028
    pRuntimeEnv->pCtx[j].currentStage = 0;
3029

H
Haojun Liao 已提交
3030 3031 3032 3033
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3034

3035 3036 3037 3038
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3039
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3040
  SQuery *pQuery = pRuntimeEnv->pQuery;
3041
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3042 3043
    return;
  }
3044

3045
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3046
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3047 3048
        pQuery->limit.offset - pQuery->rec.rows);
    
3049 3050
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3051

3052
    resetCtxOutputBuf(pRuntimeEnv);
3053

H
Haojun Liao 已提交
3054
    // clear the buffer full flag if exists
3055
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3056
  } else {
3057
    int64_t numOfSkip = pQuery->limit.offset;
3058
    pQuery->rec.rows -= numOfSkip;
3059 3060
    pQuery->limit.offset = 0;
  
3061
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3062 3063
           0, pQuery->rec.rows);
    
3064
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3065
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3066
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3067
      
H
Haojun Liao 已提交
3068 3069
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3070

3071
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3072
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3073 3074
      }
    }
3075

3076
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
3077 3078 3079 3080 3081 3082 3083 3084
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3085
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3086 3087 3088 3089 3090 3091
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3092

H
hjxilinx 已提交
3093
  bool toContinue = false;
H
Haojun Liao 已提交
3094
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3095 3096
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3097

3098 3099 3100 3101 3102
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3103

3104
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3105

3106
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3107
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3108 3109 3110
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3111

3112 3113
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3114

3115 3116 3117 3118
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3119
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3120
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3121 3122 3123
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3124

3125 3126
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3127

3128 3129 3130
      toContinue |= (!pResInfo->complete);
    }
  }
3131

3132 3133 3134
  return toContinue;
}

H
Haojun Liao 已提交
3135
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3136
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3137 3138
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3139 3140 3141
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3142
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3143
      .status      = pQuery->status,
3144
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3145
      .lastKey     = start,
H
hjxilinx 已提交
3146
      .w           = pQuery->window,
H
Haojun Liao 已提交
3147
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3148
  };
3149

3150 3151 3152
  return info;
}

3153 3154 3155 3156
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3157 3158 3159 3160 3161
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3162

3163
  // reverse order time range
3164 3165 3166
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3167
  SWITCH_ORDER(pQuery->order.order);
3168
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3169

3170
  STsdbQueryCond cond = {
3171
      .twindow = pQuery->window,
H
hjxilinx 已提交
3172
      .order   = pQuery->order.order,
3173
      .colList = pQuery->colList,
3174 3175
      .numOfCols = pQuery->numOfCols,
  };
3176

3177 3178 3179 3180
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3181

3182 3183
  // add ref for table
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3184

3185 3186
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3187
  disableFuncInReverseScan(pQInfo);
3188 3189
}

3190 3191
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3192
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3193

3194 3195
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3196

3197 3198 3199 3200
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3201

3202
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3203

3204 3205
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3206
  pTableQueryInfo->lastKey = pStatus->lastKey;
3207
  pQuery->status = pStatus->status;
3208
  
H
hjxilinx 已提交
3209
  pTableQueryInfo->win = pStatus->w;
3210
  pQuery->window = pTableQueryInfo->win;
3211 3212
}

3213
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3214
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3215
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3216 3217
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3218
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3219

3220
  // store the start query position
H
Haojun Liao 已提交
3221
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3222

3223 3224
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3225

3226 3227
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3228

3229 3230
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3231
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3232
      qstatus.lastKey = pTableQueryInfo->lastKey;
3233
    }
3234

3235
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3236
      // restore the status code and jump out of loop
3237
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3238
        pQuery->status = qstatus.status;
3239
      }
3240

3241 3242
      break;
    }
3243

3244
    STsdbQueryCond cond = {
3245
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3246
        .order   = pQuery->order.order,
3247
        .colList = pQuery->colList,
3248
        .numOfCols = pQuery->numOfCols,
3249
    };
3250

3251 3252
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3253
    }
3254

3255
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3256
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3257

3258 3259
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3260
    
3261
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3262
        cond.twindow.skey, cond.twindow.ekey);
3263

3264
    // check if query is killed or not
3265
    if (isQueryKilled(pQInfo)) {
3266 3267 3268
      return;
    }
  }
3269

H
hjxilinx 已提交
3270
  if (!needReverseScan(pQuery)) {
3271 3272
    return;
  }
3273

3274
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3275

3276
  // reverse scan from current position
3277
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3278
  doScanAllDataBlocks(pRuntimeEnv);
3279 3280

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3281 3282
}

H
hjxilinx 已提交
3283
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3284
  SQuery *pQuery = pRuntimeEnv->pQuery;
3285

H
Haojun Liao 已提交
3286
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3287 3288
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3289
    if (pRuntimeEnv->groupbyNormalCol) {
3290 3291
      closeAllTimeWindow(pWindowResInfo);
    }
3292

3293 3294 3295 3296 3297
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3298

3299
      setWindowResOutputBuf(pRuntimeEnv, buf);
3300

3301
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3302
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3303
      }
3304

3305 3306 3307 3308 3309 3310
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3311

3312
  } else {
3313
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3314
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3315 3316 3317 3318 3319
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3320
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3321
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3322

3323 3324 3325 3326
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3327

3328 3329 3330
  return false;
}

H
Haojun Liao 已提交
3331
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3332
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3333

H
Haojun Liao 已提交
3334
  STableQueryInfo *pTableQueryInfo = buf;
3335

H
hjxilinx 已提交
3336 3337
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3338

3339
  pTableQueryInfo->pTable = pTable;
3340
  pTableQueryInfo->cur.vgroupIndex = -1;
3341

H
Haojun Liao 已提交
3342 3343
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3344 3345 3346 3347
    int32_t initialSize = 20;
    int32_t initialThreshold = 100;
    initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3348 3349
  }

3350 3351 3352
  return pTableQueryInfo;
}

3353
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3354 3355 3356
  if (pTableQueryInfo == NULL) {
    return;
  }
3357

3358 3359 3360
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
}

H
Haojun Liao 已提交
3361 3362 3363 3364 3365 3366 3367
#define SET_CURRENT_QUERY_TABLE_INFO(_runtime, _tableInfo)                                      \
  do {                                                                                          \
    SQuery *_query = (_runtime)->pQuery;                                                        \
    _query->current = _tableInfo;                                                               \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_query)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_query))); \
  } while (0)
3368 3369 3370 3371

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3372
 * @param pDataBlockInfo
3373
 */
H
Haojun Liao 已提交
3374
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3375
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3376 3377 3378
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3379 3380
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3381 3382 3383 3384

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3385

H
Haojun Liao 已提交
3386 3387 3388
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3389

H
Haojun Liao 已提交
3390
  int32_t GROUPRESULTID = 1;
3391
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex, sizeof(groupIndex));
3392 3393 3394
  if (pWindowRes == NULL) {
    return;
  }
3395

3396 3397 3398 3399 3400 3401 3402 3403 3404 3405
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3406

H
Haojun Liao 已提交
3407 3408
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3409 3410 3411 3412
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3413
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3414
  SQuery *pQuery = pRuntimeEnv->pQuery;
3415

3416
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3417
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3418 3419
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3420

3421
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3422 3423 3424
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3425

3426 3427 3428 3429 3430
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3431

3432 3433 3434 3435 3436 3437
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3438 3439
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3440

H
Haojun Liao 已提交
3441 3442 3443 3444 3445
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3446
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3447 3448
      continue;
    }
3449

H
Haojun Liao 已提交
3450 3451
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3452

H
Haojun Liao 已提交
3453 3454 3455 3456
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3457

H
Haojun Liao 已提交
3458 3459 3460 3461 3462
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3463

H
Haojun Liao 已提交
3464 3465 3466 3467 3468 3469
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3470
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3471
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3472

3473
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3474

3475 3476
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3477
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3478
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3479

3480
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3481

3482 3483 3484 3485 3486 3487
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3488

3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3501
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3502 3503
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3504 3505
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3506 3507 3508
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3509
    pTableQueryInfo->win.skey = key;
3510
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3511

3512 3513 3514 3515 3516
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3517

3518 3519 3520 3521 3522 3523
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3524
    STimeWindow     w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3525
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3526

H
Haojun Liao 已提交
3527 3528
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3529
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &realWin, &w);
3530
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3531

3532 3533
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3534
        assert(win.ekey == pQuery->window.ekey);
3535
      }
3536 3537
      
      pWindowResInfo->prevSKey = w.skey;
3538
    }
3539

3540
    pTableQueryInfo->queryRangeSet = 1;
3541
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3542 3543 3544 3545
  }
}

bool requireTimestamp(SQuery *pQuery) {
3546
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3547
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3561 3562 3563
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3564 3565
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3566 3567 3568 3569 3570
  return loadPrimaryTS;
}

static int32_t getNumOfSubset(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3571

3572
  int32_t totalSubset = 0;
H
Haojun Liao 已提交
3573
  if (pQInfo->runtimeEnv.groupbyNormalCol || (QUERY_IS_INTERVAL_QUERY(pQuery))) {
3574 3575
    totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  } else {
H
Haojun Liao 已提交
3576
    totalSubset = GET_NUM_OF_TABLEGROUP(pQInfo);
3577
  }
3578

3579 3580 3581 3582 3583 3584
  return totalSubset;
}

static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3585

3586 3587 3588
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3589

3590
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3591
  int32_t totalSubset = getNumOfSubset(pQInfo);
3592

3593
  if (orderType == TSDB_ORDER_ASC) {
3594
    startIdx = pQInfo->groupIndex;
3595 3596
    step = 1;
  } else {  // desc order copy all data
3597
    startIdx = totalSubset - pQInfo->groupIndex - 1;
3598 3599
    step = -1;
  }
3600

3601 3602 3603
  for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3604
      pQInfo->groupIndex += 1;
3605 3606
      continue;
    }
3607

dengyihao's avatar
dengyihao 已提交
3608
    assert(pQInfo->offset <= 1);
3609

3610 3611
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3612

3613 3614 3615 3616
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3617 3618 3619 3620 3621
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3622
      pQInfo->groupIndex += 1;
3623
    }
3624

3625
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3626
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3627

3628 3629 3630 3631
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3632

3633
    numOfResult += numOfRowsToCopy;
3634 3635 3636
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3637
  }
3638

3639
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3640 3641

#ifdef _DEBUG_VIEW
3642
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3658

3659
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3660
  int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
3661

3662
  pQuery->rec.rows += numOfResult;
3663

3664
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3665 3666
}

H
Haojun Liao 已提交
3667
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3668
  SQuery *pQuery = pRuntimeEnv->pQuery;
3669

3670
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3671
  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
3672

3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683
    for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
      SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
        if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
          continue;
        }

        pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
      }
3684 3685 3686 3687
    }
  }
}

3688 3689
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3690
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3691 3692
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3693
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3694
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3695

H
Haojun Liao 已提交
3696
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3697
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3698
  } else {
3699
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3700
  }
3701

H
hjxilinx 已提交
3702
  updateWindowResNumOfRes(pRuntimeEnv, pTableQueryInfo);
3703 3704
}

3705 3706 3707
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3708

H
Haojun Liao 已提交
3709
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3710 3711
    return false;
  }
3712

3713
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737
    // There are results not returned to client yet, so filling operation applied to the remain result is required
    // in the first place.
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3738
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3739 3740 3741
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3742
  }
3743 3744

  return false;
3745 3746 3747
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3748
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3749

3750 3751
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3752

3753 3754 3755
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3756

weixin_48148422's avatar
weixin_48148422 已提交
3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3769 3770
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3771
    if (pQInfo->runtimeEnv.stableQuery) {
3772
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3773 3774 3775
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3776 3777 3778
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3779
    }
H
hjxilinx 已提交
3780
  }
3781 3782
}

H
Haojun Liao 已提交
3783
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3784
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3785
  SQuery *pQuery = pRuntimeEnv->pQuery;
3786 3787
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3788
  while (1) {
3789
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3790
    
3791
    // todo apply limit output function
3792 3793
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3794
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3795 3796
      return ret;
    }
3797

3798
    if (pQuery->limit.offset < ret) {
3799
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3800 3801
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3802 3803 3804
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3805 3806 3807
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3808
      }
3809
      
3810 3811 3812
      pQuery->limit.offset = 0;
      return ret;
    } else {
3813
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3814
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3815 3816
          pQuery->limit.offset - ret);
      
3817
      pQuery->limit.offset -= ret;
3818
      pQuery->rec.rows = 0;
3819 3820
      ret = 0;
    }
3821 3822

    if (!queryHasRemainResults(pRuntimeEnv)) {
3823 3824 3825 3826 3827
      return ret;
    }
  }
}

3828
static void queryCostStatis(SQInfo *pQInfo) {
3829
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3830 3831 3832 3833 3834 3835 3836
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
3837
//  qDebug("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
3838 3839
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
3840
//  qDebug("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
3841 3842 3843
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
3844
//  qDebug(
3845 3846 3847
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3848
  
3849
  qDebug("QInfo:%p :cost summary: elpased time:%"PRId64" us, total blocks:%d, use block statis:%d, use block data:%d, "
3850 3851
         "total rows:%"PRId64 ", check rows:%"PRId64, pQInfo, pSummary->elapsedTime, pSummary->totalBlocks,
         pSummary->loadBlockStatis, pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3852

3853
//  qDebug("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
3854
//
3855 3856
//  qDebug("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qDebug("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
3857 3858 3859
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3860 3861
  
  // todo add the intermediate result save cost!!
3862 3863
//  double computing = total - io;
//
3864
//  qDebug(
3865 3866 3867 3868 3869
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3870 3871
}

3872 3873
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3874 3875
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3876
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3877

3878
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3879
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3880 3881 3882
    pQuery->limit.offset = 0;
    return;
  }
3883

3884 3885 3886 3887 3888
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3889

3890
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3891

3892
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3893
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3894

3895
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3896
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3897 3898

  // update the offset value
H
hjxilinx 已提交
3899
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3900
  pQuery->limit.offset = 0;
3901

H
hjxilinx 已提交
3902
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3903

3904
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
3905
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3906
}
3907

3908 3909 3910 3911 3912
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3913
  }
3914

3915 3916 3917
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3918
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3919
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3920

H
Haojun Liao 已提交
3921
  SDataBlockInfo blockInfo = {0};
3922 3923 3924
  while (tsdbNextDataBlock(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      return;
3925
    }
3926

H
Haojun Liao 已提交
3927
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
3928

3929 3930
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3931 3932
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3933

3934
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3935 3936
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3937 3938 3939
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3940
  }
3941
}
3942

H
Haojun Liao 已提交
3943
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3944
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3945
  *start = pQuery->current->lastKey;
3946

3947
  // if queried with value filter, do NOT forward query start position
3948
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3949
    return true;
3950
  }
3951

3952 3953 3954 3955 3956
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3957
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3958

H
Haojun Liao 已提交
3959
  STimeWindow w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3960
  
3961
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3962
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3963

H
Haojun Liao 已提交
3964
  SDataBlockInfo blockInfo = {0};
3965
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
3966
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
3967

H
Haojun Liao 已提交
3968 3969
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3970
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
H
Haojun Liao 已提交
3971 3972 3973
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3974
    } else {
H
Haojun Liao 已提交
3975
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
3976

3977 3978 3979
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3980

3981 3982
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3983

3984 3985 3986 3987 3988 3989
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3990

3991 3992
      STimeWindow tw = win;
      getNextTimeWindow(pQuery, &tw);
3993

3994
      if (pQuery->limit.offset == 0) {
3995 3996
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3997 3998
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3999 4000 4001
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4002 4003 4004
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
4005 4006 4007 4008
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
4009 4010 4011 4012 4013 4014
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4015
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4016 4017
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4018
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4019 4020
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4021
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4022 4023
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4024
          return true;
H
Haojun Liao 已提交
4025 4026 4027 4028
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4029
          return true;
4030 4031 4032
        }
      }

H
Haojun Liao 已提交
4033 4034 4035 4036 4037 4038 4039
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4052
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4053 4054
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4055
      } else {
H
Haojun Liao 已提交
4056
        break;  // offset is not 0, and next time window begins or ends in the next block.
4057 4058 4059
      }
    }
  }
4060

4061 4062 4063
  return true;
}

B
Bomin Zhang 已提交
4064 4065
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4066 4067
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4068 4069 4070 4071
  if (onlyQueryTags(pQuery)) {
    return;
  }

H
Haojun Liao 已提交
4072
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pQuery))) {
B
Bomin Zhang 已提交
4073 4074
    return;
  }
4075 4076

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4077 4078 4079 4080
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4081
  };
weixin_48148422's avatar
weixin_48148422 已提交
4082

B
Bomin Zhang 已提交
4083
  if (!isSTableQuery
4084
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4085
    && (cond.order == TSDB_ORDER_ASC) 
H
Haojun Liao 已提交
4086
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4087 4088 4089
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
    && (!isFixedOutputQuery(pQuery))
  ) {
H
Haojun Liao 已提交
4090
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4091 4092
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4093
  }
B
Bomin Zhang 已提交
4094

H
Haojun Liao 已提交
4095
  if (isFirstLastRowQuery(pQuery)) {
4096
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4097
  } else if (isPointInterpoQuery(pQuery)) {
4098
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4099
  } else {
4100
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4101
  }
B
Bomin Zhang 已提交
4102 4103
}

4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4117
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4118 4119 4120 4121 4122 4123 4124
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4125
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4126 4127
  int32_t code = TSDB_CODE_SUCCESS;
  
4128 4129 4130
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4131
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
4132 4133

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4134
  changeExecuteScanOrder(pQInfo, false);
B
Bomin Zhang 已提交
4135
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4136
  
4137
  pQInfo->tsdb = tsdb;
4138
  pQInfo->vgId = vgId;
4139 4140

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4141
  pRuntimeEnv->pTSBuf = pTsBuf;
4142
  pRuntimeEnv->cur.vgroupIndex = -1;
4143
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4144
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4145
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4146

H
Haojun Liao 已提交
4147
  if (pTsBuf != NULL) {
4148 4149 4150 4151 4152 4153 4154 4155 4156 4157
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4158
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4159 4160 4161

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4162
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4163 4164 4165 4166 4167 4168 4169
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

H
Haojun Liao 已提交
4170
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4171 4172 4173 4174 4175 4176 4177 4178
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
    }

H
Haojun Liao 已提交
4179
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4180
    int32_t rows = getInitialPageNum(pQInfo);
4181
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4182 4183 4184 4185 4186
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4187
    if (pRuntimeEnv->groupbyNormalCol) {
4188 4189 4190 4191 4192 4193 4194 4195
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

4196
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4197 4198
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, 0, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4199 4200
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4201
  }
4202

4203 4204
  // todo refactor
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
H
Haojun Liao 已提交
4205
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4206

H
Haojun Liao 已提交
4207
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4208
  return TSDB_CODE_SUCCESS;
4209 4210
}

4211
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4212
  SQuery *pQuery = pRuntimeEnv->pQuery;
4213

4214
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4215 4216 4217 4218 4219 4220 4221
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4222
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4223
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4224 4225
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4226
  
H
hjxilinx 已提交
4227
  int64_t st = taosGetTimestampMs();
4228

4229
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4230 4231
  SDataBlockInfo blockInfo = {0};

4232
  while (tsdbNextDataBlock(pQueryHandle)) {
4233
    summary->totalBlocks += 1;
4234
    if (isQueryKilled(pQInfo)) {
4235 4236
      break;
    }
4237

H
Haojun Liao 已提交
4238
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4239 4240 4241 4242
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4243

H
Haojun Liao 已提交
4244
    assert(*pTableQueryInfo != NULL);
H
Haojun Liao 已提交
4245
    SET_CURRENT_QUERY_TABLE_INFO(pRuntimeEnv, *pTableQueryInfo);
4246

4247
    SDataStatis *pStatis = NULL;
H
hjxilinx 已提交
4248
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
4249

H
Haojun Liao 已提交
4250
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4251
      if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4252
        int32_t step = QUERY_IS_ASC_QUERY(pQuery)? 1:-1;
H
Haojun Liao 已提交
4253
        setExecutionContext(pQInfo, (*pTableQueryInfo)->groupIndex, blockInfo.window.ekey + step);
4254 4255 4256
      } else {  // interval query
        TSKEY nextKey = blockInfo.window.skey;
        setIntervalQueryRange(pQInfo, nextKey);
H
Haojun Liao 已提交
4257 4258 4259 4260

        if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
          setAdditionalInfo(pQInfo, (*pTableQueryInfo)->pTable, *pTableQueryInfo);
        }
4261
      }
4262
    }
4263

4264 4265 4266
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4267
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4268
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4269
  }
4270

H
hjxilinx 已提交
4271 4272
  int64_t et = taosGetTimestampMs();
  return et - st;
4273 4274
}

4275 4276
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4277
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4278

4279
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4280
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4281
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4282

4283
  setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
4284

H
Haojun Liao 已提交
4285
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4286
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4287
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4288

4289
  STsdbQueryCond cond = {
4290
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4291 4292
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4293
      .numOfCols = pQuery->numOfCols,
4294
  };
4295

H
hjxilinx 已提交
4296
  // todo refactor
4297
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
4298
  SArray *tx = taosArrayInit(1, POINTER_BYTES);
4299

4300
  taosArrayPush(tx, &pCheckInfo->pTable);
4301
  taosArrayPush(g1, &tx);
4302
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4303

4304
  // include only current table
4305 4306 4307 4308
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4309

H
Haojun Liao 已提交
4310
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4311 4312
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4313

4314
  if (pRuntimeEnv->pTSBuf != NULL) {
4315
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4316 4317
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4318

4319 4320 4321 4322 4323 4324 4325 4326
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4327

4328
  initCtxOutputBuf(pRuntimeEnv);
4329 4330 4331 4332 4333 4334 4335 4336 4337 4338
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4339
static void sequentialTableProcess(SQInfo *pQInfo) {
4340
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4341
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4342
  setQueryStatus(pQuery, QUERY_COMPLETED);
4343

H
Haojun Liao 已提交
4344
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4345

H
Haojun Liao 已提交
4346
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4347 4348
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4349

4350
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4351
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4352

4353
      qDebug("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4354
             numOfGroups, group);
H
Haojun Liao 已提交
4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4375
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4376
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4377
      } else {
H
Haojun Liao 已提交
4378
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4379
      }
H
Haojun Liao 已提交
4380 4381
      
      initCtxOutputBuf(pRuntimeEnv);
4382
      
4383
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4384 4385
      assert(taosArrayGetSize(s) >= 1);
      
4386
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4387 4388 4389
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4390

dengyihao's avatar
dengyihao 已提交
4391
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4392

H
Haojun Liao 已提交
4393
      // here we simply set the first table as current table
4394 4395 4396
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4397
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4410 4411 4412 4413 4414 4415

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4416
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4417
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4418
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4419

4420
      qDebug("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);

4443
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4444 4445
      assert(taosArrayGetSize(s) >= 1);

4446
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4447 4448 4449 4450 4451 4452 4453 4454

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4455
      taosArrayDestroy(s); 
4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4470
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
      copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4485 4486 4487
    }
  } else {
    /*
4488
     * 1. super table projection query, 2. ts-comp query
4489 4490 4491
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4492
    if (pQInfo->groupIndex > 0) {
4493
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4494
      pQuery->rec.total += pQuery->rec.rows;
4495

4496
      if (pQuery->rec.rows > 0) {
4497 4498 4499
        return;
      }
    }
4500

4501
    // all data have returned already
4502
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4503 4504
      return;
    }
4505

4506 4507
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4508

H
Haojun Liao 已提交
4509
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4510 4511
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4512

4513
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
4514
      if (isQueryKilled(pQInfo)) {
4515 4516
        return;
      }
4517

4518
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4519
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4520
        pQInfo->tableIndex++;
4521 4522
        continue;
      }
4523

H
hjxilinx 已提交
4524
      // TODO handle the limit offset problem
4525
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4526
        //        skipBlocks(pRuntimeEnv);
4527 4528
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4529 4530 4531
          continue;
        }
      }
4532

4533
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4534
      skipResults(pRuntimeEnv);
4535

4536
      // the limitation of output result is reached, set the query completed
4537
      if (limitResults(pRuntimeEnv)) {
4538
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4539 4540
        break;
      }
4541

4542 4543
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4544

4545
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4546 4547 4548 4549 4550 4551
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4552
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4553

H
Haojun Liao 已提交
4554
        STableIdInfo tidInfo = {0};
4555

H
Haojun Liao 已提交
4556 4557 4558
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4559
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4560 4561
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4562
        // if the buffer is full or group by each table, we need to jump out of the loop
4563 4564
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4565 4566
          break;
        }
4567

4568
      } else {
4569
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4570 4571
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4572 4573
          continue;
        } else {
4574 4575 4576
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4577 4578 4579
        }
      }
    }
H
Haojun Liao 已提交
4580

4581
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4582 4583
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4584
  }
4585

4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4598
    finalizeQueryResult(pRuntimeEnv);
4599
  }
4600

4601 4602 4603
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4604

4605
  qDebug(
B
Bomin Zhang 已提交
4606
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4607
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4608
      pQuery->limit.offset);
4609 4610
}

4611 4612 4613 4614
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4615 4616 4617 4618
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4619
  if (pRuntimeEnv->pTSBuf != NULL) {
4620
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4621
  }
4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4634 4635

  pRuntimeEnv->prevGroupId = INT32_MIN;
4636
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4637 4638 4639 4640
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4641 4642
}

4643 4644 4645 4646
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4647
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4648

4649
  if (pRuntimeEnv->pTSBuf != NULL) {
4650
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4651
  }
4652

4653
  switchCtxOrder(pRuntimeEnv);
4654 4655 4656
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4657 4658 4659
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4660 4661
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4662
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4663
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4664
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4665
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4666

4667
      size_t num = taosArrayGetSize(group);
4668
      for (int32_t j = 0; j < num; ++j) {
4669 4670
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
H
Haojun Liao 已提交
4671
        removeRedundantWindow(&item->windowResInfo, item->lastKey - step, step);
4672
      }
H
hjxilinx 已提交
4673 4674 4675 4676 4677 4678 4679
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4680 4681 4682
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4683
  if (pQInfo->groupIndex > 0) {
4684
    /*
4685
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4686 4687
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
4688
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4689 4690
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4691
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4692 4693 4694 4695
#endif
    } else {
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
    }
4696

4697
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4698 4699
    return;
  }
4700

4701
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4702 4703
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4704
  // do check all qualified data blocks
H
Haojun Liao 已提交
4705
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4706
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4707

H
hjxilinx 已提交
4708 4709
  // query error occurred or query is killed, abort current execution
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
4710
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4711
    return;
4712
  }
4713

H
hjxilinx 已提交
4714 4715
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4716

H
hjxilinx 已提交
4717 4718
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4719

H
Haojun Liao 已提交
4720
    el = scanMultiTableDataBlocks(pQInfo);
4721
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4722

H
Haojun Liao 已提交
4723
    doCloseAllTimeWindowAfterScan(pQInfo);
H
Haojun Liao 已提交
4724
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
4725
  } else {
4726
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4727
  }
4728

4729
  setQueryStatus(pQuery, QUERY_COMPLETED);
4730

H
hjxilinx 已提交
4731
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
4732
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4733 4734
    return;
  }
4735

H
Haojun Liao 已提交
4736
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4737
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4738
      copyResToQueryResultBuf(pQInfo, pQuery);
4739 4740

#ifdef _DEBUG_VIEW
4741
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4742 4743 4744 4745 4746
#endif
    }
  } else {  // not a interval query
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
  }
4747

4748
  // handle the limitation of output buffer
4749
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4750 4751 4752 4753 4754 4755 4756 4757
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4758
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4759
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4760 4761
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4762 4763 4764 4765
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4766 4767
  pQuery->current = pTableInfo;  // set current query table info
  
4768
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4769
  finalizeQueryResult(pRuntimeEnv);
4770

4771
  if (isQueryKilled(pQInfo)) {
4772 4773
    return;
  }
4774

H
Haojun Liao 已提交
4775
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4776
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4777

4778
  skipResults(pRuntimeEnv);
4779
  limitResults(pRuntimeEnv);
4780 4781
}

H
hjxilinx 已提交
4782
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4783
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4784 4785 4786 4787
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4788 4789 4790 4791
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4792

4793 4794 4795 4796 4797 4798
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4799 4800

  while (1) {
4801
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4802
    finalizeQueryResult(pRuntimeEnv);
4803

4804
    if (isQueryKilled(pQInfo)) {
4805 4806 4807
      return;
    }

4808 4809
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4810
      skipResults(pRuntimeEnv);
4811 4812 4813
    }

    /*
H
hjxilinx 已提交
4814 4815
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4816
     */
4817
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4818 4819 4820
      break;
    }

4821
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
4822
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4823 4824 4825 4826

    resetCtxOutputBuf(pRuntimeEnv);
  }

4827
  limitResults(pRuntimeEnv);
4828
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4829
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
4830
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4831 4832
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
H
Haojun Liao 已提交
4833
    STableId* id = TSDB_TABLEID(pQuery->current);
4834

H
Haojun Liao 已提交
4835 4836
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4837 4838
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4839 4840
  }

4841 4842 4843
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4844 4845
}

H
Haojun Liao 已提交
4846
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4847
  SQuery *pQuery = pRuntimeEnv->pQuery;
4848

4849
  while (1) {
4850
    scanOneTableDataBlocks(pRuntimeEnv, start);
4851

4852
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
4853 4854
      return;
    }
4855

4856
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4857
    finalizeQueryResult(pRuntimeEnv);
4858

4859 4860 4861
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4862
        pQuery->fillType == TSDB_FILL_NONE) {
4863 4864
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4865

4866 4867 4868 4869
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4870

4871
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4872 4873 4874 4875 4876
      break;
    }
  }
}

4877
// handle time interval query on table
H
hjxilinx 已提交
4878
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4879 4880
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4881 4882
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4883

H
Haojun Liao 已提交
4884
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
4885 4886
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4887
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4888
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4889
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4890 4891 4892 4893
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4894
  while (1) {
H
Haojun Liao 已提交
4895
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4896

H
Haojun Liao 已提交
4897
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4898
      pQInfo->groupIndex = 0;  // always start from 0
4899
      pQuery->rec.rows = 0;
4900
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4901

4902
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4903
    }
4904

4905
    // the offset is handled at prepare stage if no interpolation involved
4906
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4907
      limitResults(pRuntimeEnv);
4908 4909
      break;
    } else {
H
Haojun Liao 已提交
4910
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4911
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
4912
      numOfFilled = 0;
4913
      
H
Haojun Liao 已提交
4914
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
4915
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4916
        limitResults(pRuntimeEnv);
4917 4918
        break;
      }
4919

4920
      // no result generated yet, continue retrieve data
4921
      pQuery->rec.rows = 0;
4922 4923
    }
  }
4924

4925
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
4926
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
4927
    pQInfo->groupIndex = 0;
4928
    pQuery->rec.rows = 0;
4929
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4930
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4931
  }
4932

H
Haojun Liao 已提交
4933
  pQInfo->pointsInterpo += numOfFilled;
4934 4935
}

4936 4937 4938 4939
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4940
  if (queryHasRemainResults(pRuntimeEnv)) {
4941

H
Haojun Liao 已提交
4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
4954
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
4955
      return;
H
Haojun Liao 已提交
4956
    } else {
4957
      pQuery->rec.rows = 0;
4958
      pQInfo->groupIndex = 0;  // always start from 0
4959

4960 4961
      if (pRuntimeEnv->windowResInfo.size > 0) {
        copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4962
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4963

4964
        if (pQuery->rec.rows > 0) {
4965
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
4966 4967 4968

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
4969
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
4970 4971
          }

4972 4973 4974 4975 4976
          return;
        }
      }
    }
  }
4977

H
hjxilinx 已提交
4978
  // number of points returned during this query
4979
  pQuery->rec.rows = 0;
4980
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
4981
  
4982
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
4983
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
4984
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
4985
  
4986
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
4987
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
4988
    tableIntervalProcess(pQInfo, item);
4989
  } else if (isFixedOutputQuery(pQuery)) {
4990
    tableFixedOutputProcess(pQInfo, item);
4991 4992
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
4993
    tableMultiOutputProcess(pQInfo, item);
4994
  }
4995

4996
  // record the total elapsed time
4997
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
4998
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
4999 5000
}

5001
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5002 5003
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5004
  pQuery->rec.rows = 0;
5005

5006
  int64_t st = taosGetTimestampUs();
5007

H
Haojun Liao 已提交
5008 5009
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
      (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5010
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5011
    multiTableQueryProcess(pQInfo);
5012
  } else {
5013
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5014
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5015

5016
    sequentialTableProcess(pQInfo);
5017
  }
5018

H
hjxilinx 已提交
5019
  // record the total elapsed time
5020
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5021 5022
}

5023
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5024
  int32_t j = 0;
5025

5026
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5027 5028 5029 5030
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5031 5032 5033 5034
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5035

5036 5037
      j += 1;
    }
5038

5039 5040 5041 5042 5043
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5044

5045
      j += 1;
5046 5047 5048
    }
  }

5049
  assert(0);
5050 5051
}

5052 5053 5054
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5055 5056
}

5057
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5058
  if (pQueryMsg->intervalTime < 0) {
5059
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5060
    return false;
5061 5062
  }

H
hjxilinx 已提交
5063
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5064
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5065
    return false;
5066 5067
  }

H
hjxilinx 已提交
5068
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5069
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5070
    return false;
5071 5072
  }

5073 5074
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5075
    return false;
5076 5077
  }

5078 5079 5080 5081 5082 5083 5084 5085 5086 5087
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5088 5089 5090 5091 5092
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5093
        continue;
5094
      }
5095

5096
      return false;
5097 5098
    }
  }
5099

5100
  return true;
5101 5102
}

5103
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5104
  assert(pQueryMsg->numOfTables > 0);
5105

weixin_48148422's avatar
weixin_48148422 已提交
5106
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5107

weixin_48148422's avatar
weixin_48148422 已提交
5108 5109
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5110

5111
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5112 5113
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5114

H
hjxilinx 已提交
5115 5116 5117
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5118

H
hjxilinx 已提交
5119 5120
  return pMsg;
}
5121

5122
/**
H
hjxilinx 已提交
5123
 * pQueryMsg->head has been converted before this function is called.
5124
 *
H
hjxilinx 已提交
5125
 * @param pQueryMsg
5126 5127 5128 5129
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5130
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5131
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5132 5133
  int32_t code = TSDB_CODE_SUCCESS;

5134 5135 5136 5137 5138 5139 5140 5141
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5142

5143 5144
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5145
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5146
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5147 5148

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5149
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5150
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5151 5152 5153
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5154
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5155
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5156
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5157

5158
  // query msg safety check
5159
  if (!validateQueryMsg(pQueryMsg)) {
5160 5161
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5162 5163
  }

H
hjxilinx 已提交
5164 5165
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5166 5167
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5168
    pColInfo->colId = htons(pColInfo->colId);
5169
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5170 5171
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5172

H
hjxilinx 已提交
5173
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5174

H
hjxilinx 已提交
5175
    int32_t numOfFilters = pColInfo->numOfFilters;
5176
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5177
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5178 5179 5180
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5181 5182 5183 5184
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5185 5186 5187

      pMsg += sizeof(SColumnFilterInfo);

5188 5189
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5190

5191
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5192 5193
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5194
      } else {
5195 5196
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5197 5198
      }

5199 5200
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5201 5202 5203
    }
  }

5204 5205
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5206

5207
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5208
    (*pExpr)[i] = pExprMsg;
5209

5210
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5211 5212 5213 5214
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5215

5216
    pMsg += sizeof(SSqlFuncMsg);
5217 5218

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5219
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5220 5221 5222 5223
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5224
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5225 5226 5227 5228 5229
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5230 5231
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5232
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5233 5234
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5235 5236
      }
    } else {
5237
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5238
//        return TSDB_CODE_QRY_INVALID_MSG;
5239
//      }
5240 5241
    }

5242
    pExprMsg = (SSqlFuncMsg *)pMsg;
5243
  }
5244

5245
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5246
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5247
    goto _cleanup;
5248
  }
5249

H
hjxilinx 已提交
5250
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5251

H
hjxilinx 已提交
5252
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5253
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5254 5255 5256 5257
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5258 5259 5260

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5261
      pMsg += sizeof((*groupbyCols)[i].colId);
5262 5263

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5264 5265
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5266
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5267 5268 5269 5270 5271
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5272

H
hjxilinx 已提交
5273 5274
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5275 5276
  }

5277 5278
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5279
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5280 5281

    int64_t *v = (int64_t *)pMsg;
5282
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5283 5284
      v[i] = htobe64(v[i]);
    }
5285

5286
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5287
  }
5288

5289 5290 5291 5292
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5293

5294 5295 5296 5297
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5298

5299
      (*tagCols)[i] = *pTagCol;
5300
      pMsg += sizeof(SColumnInfo);
5301
    }
H
hjxilinx 已提交
5302
  }
5303

5304 5305 5306 5307 5308 5309
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5310

weixin_48148422's avatar
weixin_48148422 已提交
5311
  if (*pMsg != 0) {
5312
    size_t len = strlen(pMsg) + 1;
5313

5314
    *tbnameCond = malloc(len);
5315 5316 5317 5318 5319
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5320
    strcpy(*tbnameCond, pMsg);
5321
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5322
  }
5323

5324
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5325 5326
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5327
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5328
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5329 5330

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5331 5332 5333 5334 5335 5336 5337 5338 5339

_cleanup:
  tfree(*pExpr);
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
5340 5341

  return code;
5342 5343
}

H
hjxilinx 已提交
5344
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5345
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5346 5347 5348 5349 5350 5351 5352 5353 5354

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5355
  if (pExprNode == NULL) {
5356
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5357
    return TSDB_CODE_QRY_APP_ERROR;
5358
  }
5359

5360
  pArithExprInfo->pExpr = pExprNode;
5361 5362 5363
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5364
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5365 5366
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5367
  int32_t code = TSDB_CODE_SUCCESS;
5368

H
Haojun Liao 已提交
5369
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5370
  if (pExprs == NULL) {
5371
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5372 5373 5374 5375 5376
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5377
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5378
    pExprs[i].base = *pExprMsg[i];
5379
    pExprs[i].bytes = 0;
5380 5381 5382 5383

    int16_t type = 0;
    int16_t bytes = 0;

5384
    // parse the arithmetic expression
5385
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5386
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5387

5388 5389 5390
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5391 5392
      }

5393
      type  = TSDB_DATA_TYPE_DOUBLE;
5394
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5395
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5396 5397 5398
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5399
    } else{
5400
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5401
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5402

dengyihao's avatar
dengyihao 已提交
5403
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5404 5405 5406 5407
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5408
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5409

H
Haojun Liao 已提交
5410 5411 5412
        type  = s.type;
        bytes = s.bytes;
      }
5413 5414
    }

5415 5416
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5417
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5418
      tfree(pExprs);
5419
      return TSDB_CODE_QRY_INVALID_MSG;
5420 5421
    }

5422
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5423
      tagLen += pExprs[i].bytes;
5424
    }
5425
    assert(isValidDataType(pExprs[i].type));
5426 5427 5428
  }

  // TODO refactor
5429
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5430 5431
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5432

5433
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5434
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5435 5436 5437 5438 5439
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5440
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5441
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5442 5443 5444
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5445
  *pExprInfo = pExprs;
5446 5447 5448 5449

  return TSDB_CODE_SUCCESS;
}

5450
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5451 5452 5453 5454 5455
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5456
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5457
  if (pGroupbyExpr == NULL) {
5458
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5459 5460 5461 5462 5463 5464 5465
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5466 5467 5468 5469
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5470

5471 5472 5473
  return pGroupbyExpr;
}

5474
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5475
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5476
    if (pQuery->colList[i].numOfFilters > 0) {
5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5488
    if (pQuery->colList[i].numOfFilters > 0) {
5489 5490
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5491
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5492
      pFilterInfo->info = pQuery->colList[i];
5493

5494
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5495 5496 5497 5498
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5499
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5500 5501 5502 5503 5504

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5505
          qError("QInfo:%p invalid filter info", pQInfo);
5506
          return TSDB_CODE_QRY_INVALID_MSG;
5507 5508
        }

5509 5510
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5511

5512 5513 5514
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5515 5516

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5517
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5518
          return TSDB_CODE_QRY_INVALID_MSG;
5519 5520
        }

5521
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5522
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5523
          assert(rangeFilterArray != NULL);
5524
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5538
          assert(filterArray != NULL);
5539 5540 5541 5542
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5543
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5544
              return TSDB_CODE_QRY_INVALID_MSG;
5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5561
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5562
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5563

5564
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5565
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5566
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5567 5568
      continue;
    }
5569

5570
    // todo opt performance
H
Haojun Liao 已提交
5571 5572
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5573 5574
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5575 5576
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5577 5578 5579
          break;
        }
      }
5580 5581
      
      assert (f < pQuery->numOfCols);
5582
    } else {
5583 5584
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5585 5586
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5587 5588
          break;
        }
5589
      }
5590 5591
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5592 5593 5594 5595
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5596

5597
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5598 5599 5600 5601 5602 5603 5604
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5605 5606
static void freeQInfo(SQInfo *pQInfo);

weixin_48148422's avatar
weixin_48148422 已提交
5607
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5608
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
5609 5610
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5611
    return NULL;
5612 5613 5614 5615 5616 5617
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5618
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5619

5620
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5621
  pQuery->numOfOutput     = numOfOutput;
5622 5623 5624
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5625
  pQuery->order.orderColId = pQueryMsg->orderColId;
5626 5627 5628 5629
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5630
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5631
  pQuery->fillType        = pQueryMsg->fillType;
5632
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5633
  
5634
  // todo do not allocate ??
5635
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5636
  if (pQuery->colList == NULL) {
5637
    goto _cleanup;
5638
  }
5639

H
hjxilinx 已提交
5640
  for (int16_t i = 0; i < numOfCols; ++i) {
5641
    pQuery->colList[i] = pQueryMsg->colList[i];
5642
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5643
  }
5644

5645
  pQuery->tagColList = pTagCols;
5646

5647
  // calculate the result row size
5648 5649 5650
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5651
  }
5652

5653
  doUpdateExprColumnIndex(pQuery);
5654

5655
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5656
  if (ret != TSDB_CODE_SUCCESS) {
5657
    goto _cleanup;
5658 5659 5660
  }

  // prepare the result buffer
5661
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5662
  if (pQuery->sdata == NULL) {
5663
    goto _cleanup;
5664 5665
  }

H
hjxilinx 已提交
5666
  // set the output buffer capacity
H
hjxilinx 已提交
5667
  pQuery->rec.capacity = 4096;
5668
  pQuery->rec.threshold = 4000;
5669

5670
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5671
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5672 5673

    // allocate additional memory for interResults that are usually larger then final results
5674 5675
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5676
    if (pQuery->sdata[col] == NULL) {
5677
      goto _cleanup;
5678 5679 5680
    }
  }

5681
  if (pQuery->fillType != TSDB_FILL_NONE) {
5682 5683
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5684
      goto _cleanup;
5685 5686 5687
    }

    // the first column is the timestamp
5688
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5689 5690 5691
  }

  // to make sure third party won't overwrite this structure
5692
  pQInfo->signature = pQInfo;
5693

5694
  pQInfo->tableGroupInfo = *pTableGroupInfo;
dengyihao's avatar
dengyihao 已提交
5695 5696 5697 5698 5699 5700
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5701 5702 5703
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
  }
5704

weixin_48148422's avatar
weixin_48148422 已提交
5705 5706
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5707
  taosArraySort(pTableIdList, compareTableIdInfo);
5708

H
Haojun Liao 已提交
5709 5710 5711 5712
  // TODO optimize the STableQueryInfo malloc strategy
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5713
  for(int32_t i = 0; i < numOfGroups; ++i) {
5714
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5715

H
Haojun Liao 已提交
5716
    size_t s = taosArrayGetSize(pa);
5717
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
5718

H
hjxilinx 已提交
5719
    for(int32_t j = 0; j < s; ++j) {
5720
      void* pTable = taosArrayGetP(pa, j);
H
Haojun Liao 已提交
5721
      STableId* id = TSDB_TABLEID(pTable);
5722

H
Haojun Liao 已提交
5723
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5724 5725 5726
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5727
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5728
      }
5729

H
Haojun Liao 已提交
5730 5731
      void* buf = pQInfo->pBuf + index * sizeof(STableQueryInfo);
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, pTable, window, buf);
5732
      item->groupIndex = i;
H
hjxilinx 已提交
5733
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5734 5735
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5736
    }
5737

5738
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5739
  }
5740

weixin_48148422's avatar
weixin_48148422 已提交
5741 5742
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5743
  pQuery->pos = -1;
5744
  pQuery->window = pQueryMsg->window;
5745

5746
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
5747 5748
    int32_t code = TAOS_SYSTEM_ERROR(errno);
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
5749
    goto _cleanup;
5750
  }
5751

5752
  colIdCheck(pQuery);
5753

5754
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5755 5756
  return pQInfo;

5757
_cleanup:
dengyihao's avatar
dengyihao 已提交
5758
  freeQInfo(pQInfo);
5759 5760 5761
  return NULL;
}

H
hjxilinx 已提交
5762
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5763 5764 5765 5766
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5767

H
hjxilinx 已提交
5768 5769 5770 5771
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5772
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5773 5774 5775
  return (sig == (uint64_t)pQInfo);
}

H
Haojun Liao 已提交
5776
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param, _qinfo_free_fn_t fn) {
H
hjxilinx 已提交
5777
  int32_t code = TSDB_CODE_SUCCESS;
5778
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5779

H
hjxilinx 已提交
5780 5781
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5782
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5783
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5784

H
hjxilinx 已提交
5785
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
5786 5787
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
5788
  }
5789

5790 5791
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
5792
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5793
           pQuery->window.ekey, pQuery->order.order);
5794
    setQueryStatus(pQuery, QUERY_COMPLETED);
5795

5796 5797 5798
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5799

5800 5801 5802
  pQInfo->param = param;
  pQInfo->freeFn = fn;

5803
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
5804
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
5805 5806 5807 5808 5809
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5810 5811

  // filter the qualified
5812
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5813 5814
    goto _error;
  }
H
hjxilinx 已提交
5815
  
H
hjxilinx 已提交
5816 5817 5818 5819
  return code;

_error:
  // table query ref will be decrease during error handling
5820
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5821 5822 5823 5824 5825 5826 5827
  return code;
}

static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5828 5829

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5830
  setQueryKilled(pQInfo);
5831

5832
  qDebug("QInfo:%p start to free QInfo", pQInfo);
5833
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5834 5835
    tfree(pQuery->sdata[col]);
  }
5836

H
hjxilinx 已提交
5837
  sem_destroy(&(pQInfo->dataReady));
5838
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5839

H
hjxilinx 已提交
5840 5841 5842 5843 5844 5845
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5846

H
hjxilinx 已提交
5847
  if (pQuery->pSelectExpr != NULL) {
5848
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5849
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5850

H
hjxilinx 已提交
5851 5852 5853
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5854
    }
5855

H
hjxilinx 已提交
5856 5857
    tfree(pQuery->pSelectExpr);
  }
5858

5859 5860
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5861
  }
5862

5863
  // todo refactor, extract method to destroytableDataInfo
H
Haojun Liao 已提交
5864
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5865
  for (int32_t i = 0; i < numOfGroups; ++i) {
5866
    SArray *p = GET_TABLEGROUP(pQInfo, i);
5867

5868 5869
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
5870 5871 5872
      STableQueryInfo* item = taosArrayGetP(p, j);
      if (item != NULL) {
        destroyTableQueryInfo(item, pQuery->numOfOutput);
5873 5874
      }
    }
5875

H
hjxilinx 已提交
5876 5877
    taosArrayDestroy(p);
  }
5878

H
Haojun Liao 已提交
5879
  tfree(pQInfo->pBuf);
5880
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
5881
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
5882
  tsdbDestoryTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5883
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5884
  
5885 5886 5887 5888
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5889

5890 5891 5892 5893
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);
  tfree(pQuery->sdata);
5894

5895
  tfree(pQuery);
5896

5897
  qDebug("QInfo:%p QInfo is freed", pQInfo);
5898

5899
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5900 5901 5902 5903
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5904
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5905 5906
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5918
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5919 5920 5921 5922
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5923
  }
H
hjxilinx 已提交
5924
}
5925

H
hjxilinx 已提交
5926 5927 5928
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5929

H
hjxilinx 已提交
5930 5931 5932
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5933

H
hjxilinx 已提交
5934 5935
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
5936 5937
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
5938
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
5939
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
5940 5941
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
5942 5943
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
5944
      }
H
Haojun Liao 已提交
5945

H
hjxilinx 已提交
5946 5947 5948
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
5949
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
5950
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5951
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
5952 5953 5954
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
5955
    }
5956

H
hjxilinx 已提交
5957 5958 5959 5960
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5961
  } else {
5962
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5963
  }
5964

5965
  pQuery->rec.total += pQuery->rec.rows;
5966
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5967

5968
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
5969
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
5970 5971 5972
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
5973
  return TSDB_CODE_SUCCESS;
5974 5975
}

5976 5977 5978 5979 5980 5981 5982
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

H
Haojun Liao 已提交
5983 5984
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, _qinfo_free_fn_t fn,
    qinfo_t* pQInfo) {
5985
  assert(pQueryMsg != NULL && tsdb != NULL);
5986 5987

  int32_t code = TSDB_CODE_SUCCESS;
5988

weixin_48148422's avatar
weixin_48148422 已提交
5989
  char *        tagCond = NULL, *tbnameCond = NULL;
5990
  SArray *      pTableIdList = NULL;
5991
  SSqlFuncMsg **pExprMsg = NULL;
5992 5993
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
dengyihao's avatar
dengyihao 已提交
5994 5995
  SExprInfo     *pExprs = NULL;
  SSqlGroupbyExpr *pGroupbyExpr = NULL;
5996

weixin_48148422's avatar
weixin_48148422 已提交
5997
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
5998
         TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
5999
    goto _over;
6000 6001
  }

H
hjxilinx 已提交
6002
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6003
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6004
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6005
    goto _over;
6006 6007
  }

H
hjxilinx 已提交
6008
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6009
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6010
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6011
    goto _over;
6012 6013
  }

H
Haojun Liao 已提交
6014
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6015
    goto _over;
6016 6017
  }

dengyihao's avatar
dengyihao 已提交
6018
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6019
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6020
    goto _over;
6021
  }
6022

H
hjxilinx 已提交
6023
  bool isSTableQuery = false;
6024
  STableGroupInfo tableGroupInfo = {0};
6025
  
H
Haojun Liao 已提交
6026
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6027
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6028

6029
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
6030
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6031
      goto _over;
6032
    }
H
Haojun Liao 已提交
6033
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6034
    isSTableQuery = true;
H
Haojun Liao 已提交
6035 6036 6037 6038
    // TODO: need a macro from TSDB to check if table is super table

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6039 6040 6041 6042 6043 6044 6045 6046
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
6047
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
6048
                                          numOfGroupByCols);
6049 6050 6051
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6052
    } else {
6053 6054 6055 6056
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6057

6058
      qDebug("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6059
    }
H
hjxilinx 已提交
6060
  } else {
6061
    assert(0);
6062
  }
6063

6064
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6065 6066 6067 6068
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6069
  if ((*pQInfo) == NULL) {
6070
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6071
    goto _over;
6072
  }
6073

H
Haojun Liao 已提交
6074
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param, fn);
6075

H
hjxilinx 已提交
6076
_over:
dengyihao's avatar
dengyihao 已提交
6077 6078 6079
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6080 6081
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6082
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6083
  } 
dengyihao's avatar
dengyihao 已提交
6084 6085
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6086
  free(pExprMsg);
H
hjxilinx 已提交
6087
  taosArrayDestroy(pTableIdList);
6088

H
Haojun Liao 已提交
6089
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6090 6091
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
H
Haojun Liao 已提交
6092 6093 6094 6095 6096
  } else {
    SQInfo* pq = (SQInfo*) (*pQInfo);

    T_REF_INC(pq);
    T_REF_INC(pq);
6097 6098
  }

6099
  // if failed to add ref for all meters in this query, abort current query
6100
  return code;
H
hjxilinx 已提交
6101 6102
}

H
Haojun Liao 已提交
6103 6104
static void doDestoryQueryInfo(SQInfo* pQInfo) {
  assert(pQInfo != NULL);
6105
  qDebug("QInfo:%p query completed", pQInfo);
H
Haojun Liao 已提交
6106
  queryCostStatis(pQInfo);   // print the query cost summary
6107 6108 6109
  freeQInfo(pQInfo);
}

H
Haojun Liao 已提交
6110
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6111 6112 6113 6114 6115
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

dengyihao's avatar
dengyihao 已提交
6116
  int32_t ref = T_REF_DEC(pQInfo);
6117
  qDebug("QInfo:%p dec refCount, value:%d", pQInfo, ref);
H
Haojun Liao 已提交
6118

H
Haojun Liao 已提交
6119
  if (ref == 0) {
6120
    _qinfo_free_fn_t freeFp = pQInfo->freeFn;
H
Hongze Cheng 已提交
6121
    void* param = pQInfo->param;
H
Haojun Liao 已提交
6122 6123

    doDestoryQueryInfo(pQInfo);
6124
    if (freeFp != NULL) {
H
Hongze Cheng 已提交
6125
      assert(param != NULL);
6126
      freeFp(param);
H
Hongze Cheng 已提交
6127 6128
    }

H
Haojun Liao 已提交
6129 6130 6131
  }
}

6132
void qTableQuery(qinfo_t qinfo) {
6133 6134
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6135
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
6136
    qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
6137 6138
    return;
  }
6139

H
hjxilinx 已提交
6140
  if (isQueryKilled(pQInfo)) {
6141
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6142 6143

    sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
6144
    qDestroyQueryInfo(pQInfo);
H
hjxilinx 已提交
6145 6146
    return;
  }
6147

6148 6149
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6150 6151 6152 6153 6154 6155

    sem_post(&pQInfo->dataReady);
    qDestroyQueryInfo(pQInfo);
    return;
  }

H
Haojun Liao 已提交
6156 6157
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);

6158 6159 6160 6161 6162 6163 6164
  // error occurs, record the error code and return to client
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
    qDebug("QInfo:%p query abort due to error occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
    sem_post(&pQInfo->dataReady);
    qDestroyQueryInfo(pQInfo);

6165 6166 6167
    return;
  }

6168
  qDebug("QInfo:%p query task is launched", pQInfo);
6169

6170
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6171
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6172
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
6173
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
6174
  } else if (pQInfo->runtimeEnv.stableQuery) {
6175
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6176
  } else {
6177
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6178
  }
6179

6180 6181 6182 6183 6184 6185 6186 6187 6188 6189
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (isQueryKilled(pQInfo)) {
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
    qDebug("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

H
hjxilinx 已提交
6190
  sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
6191
  qDestroyQueryInfo(pQInfo);
H
hjxilinx 已提交
6192 6193
}

H
hjxilinx 已提交
6194
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
6195 6196
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6197
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6198
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6199
  }
6200

H
hjxilinx 已提交
6201
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6202
  if (isQueryKilled(pQInfo)) {
6203
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6204
    return pQInfo->code;
H
hjxilinx 已提交
6205
  }
6206

H
hjxilinx 已提交
6207
  sem_wait(&pQInfo->dataReady);
6208
  qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6209 6210
         pQInfo->code);

H
hjxilinx 已提交
6211
  return pQInfo->code;
H
hjxilinx 已提交
6212
}
6213

H
hjxilinx 已提交
6214
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6215 6216
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6217
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
6218
    qDebug("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6219 6220
    return false;
  }
6221 6222

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6223
  bool ret = false;
H
hjxilinx 已提交
6224
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6225
    ret = false;
H
hjxilinx 已提交
6226
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6227
    ret = true;
H
hjxilinx 已提交
6228
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6229
    ret = true;
H
hjxilinx 已提交
6230 6231
  } else {
    assert(0);
6232
  }
H
Haojun Liao 已提交
6233 6234 6235

  if (ret) {
    T_REF_INC(pQInfo);
6236
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6237 6238 6239
  }

  return ret;
6240 6241
}

6242 6243 6244
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6245
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6246
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6247
  }
6248

6249
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6250 6251
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6252 6253
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6254
  *contLen = size + sizeof(SRetrieveTableRsp);
6255

6256 6257
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6258
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6259

6260 6261 6262
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6263
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6264 6265 6266 6267
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6268 6269
  
  (*pRsp)->precision = htons(pQuery->precision);
6270
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6271
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6272
  } else {
H
hjxilinx 已提交
6273
    setQueryStatus(pQuery, QUERY_OVER);
6274
    code = pQInfo->code;
6275
  }
6276

H
hjxilinx 已提交
6277
  if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6278
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6279
  }
6280

H
hjxilinx 已提交
6281
  return code;
6282
}
H
hjxilinx 已提交
6283

H
Haojun Liao 已提交
6284
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6285 6286 6287 6288 6289 6290 6291
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
H
Haojun Liao 已提交
6292
  qDestroyQueryInfo(pQInfo);
H
Haojun Liao 已提交
6293 6294 6295
  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6296 6297 6298
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6299

H
Haojun Liao 已提交
6300
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6301
  assert(numOfGroup == 0 || numOfGroup == 1);
6302

H
Haojun Liao 已提交
6303
  if (numOfGroup == 0) {
6304 6305
    return;
  }
H
hjxilinx 已提交
6306
  
H
Haojun Liao 已提交
6307
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6308

H
Haojun Liao 已提交
6309
  size_t num = taosArrayGetSize(pa);
6310
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6311

H
Haojun Liao 已提交
6312
  int32_t count = 0;
6313 6314 6315
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6316

6317 6318
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6319
    count = 0;
6320

H
Haojun Liao 已提交
6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6332 6333
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6334
      STableQueryInfo *item = taosArrayGetP(pa, i);
6335

6336
      char *output = pQuery->sdata[0]->data + i * rsize;
6337
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6338

6339
      output = varDataVal(output);
H
Haojun Liao 已提交
6340
      STableId* id = TSDB_TABLEID(item->pTable);
6341

H
Haojun Liao 已提交
6342 6343
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6344

H
Haojun Liao 已提交
6345 6346
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6347

6348
      *(int32_t *)output = pQInfo->vgId;
6349
      output += sizeof(pQInfo->vgId);
6350

6351
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6352
        char *data = tsdbGetTableName(item->pTable);
6353
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6354
      } else {
6355
        char *val = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
6356 6357 6358 6359 6360 6361 6362 6363

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6364
        } else {
6365 6366
          if (val == NULL) {
            setNull(output, type, bytes);
H
Haojun Liao 已提交
6367
          } else {  // todo here stop will cause client crash
6368 6369
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6370 6371
        }
      }
6372

H
Haojun Liao 已提交
6373
      count += 1;
6374
    }
6375

6376
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6377

H
Haojun Liao 已提交
6378 6379 6380 6381 6382
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
6383
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6384
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6385
    count = 0;
H
Haojun Liao 已提交
6386
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6387 6388
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6389

6390
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6391
      STableQueryInfo* item = taosArrayGetP(pa, i);
6392

6393 6394
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6395
          char* data = tsdbGetTableName(item->pTable);
H
Haojun Liao 已提交
6396
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6397
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6398 6399 6400 6401
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
6402
          char* data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6403
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6404

H
hjxilinx 已提交
6405
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6406 6407 6408 6409 6410
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6411
          } else {
H
[td-90]  
Haojun Liao 已提交
6412 6413 6414 6415 6416
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6417
          }
6418
        }
H
hjxilinx 已提交
6419
      }
H
Haojun Liao 已提交
6420
      count += 1;
H
hjxilinx 已提交
6421
    }
6422

6423
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6424
  }
6425

H
Haojun Liao 已提交
6426
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6427
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6428 6429
}


void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
}

void* qOpenQueryMgmt(int32_t vgId) {
  const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

  SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt));

  pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryHandle->closed    = false;
  pthread_mutex_init(&pQueryHandle->lock, NULL);

  qDebug("vgId:%d, open querymgmt success", vgId);
  return pQueryHandle;
}

void qSetQueryMgmtClosed(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = true;
  pthread_mutex_unlock(&pQueryMgmt->lock);

  taosCacheEmpty(pQueryMgmt->qinfoPool, true);
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
  tfree(pQueryMgmt);

  qDebug("vgId:%d querymgmt cleanup completed", vgId);
}

void** qRegisterQInfo(void* pMgmt, void* qInfo) {
  if (pMgmt == NULL) {
    return NULL;
  }

  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  pthread_mutex_lock(&pQueryMgmt->lock);
  if (pQueryMgmt->closed) {
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return NULL;
  } else {
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, qInfo, POINTER_BYTES, &qInfo, POINTER_BYTES, tsShellActivityTimer*2);
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return handle;
  }
}

void** qAcquireQInfo(void* pMgmt, void** key) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, key, POINTER_BYTES);
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, needFree);
  return 0;
}