qExecutor.c 218.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
16 17
#include "tcache.h"
#include "tglobal.h"
H
Haojun Liao 已提交
18
#include "qfill.h"
19
#include "taosmsg.h"
20 21

#include "hash.h"
22 23
#include "qExecutor.h"
#include "qUtil.h"
H
hjxilinx 已提交
24
#include "qast.h"
25
#include "qresultBuf.h"
H
hjxilinx 已提交
26
#include "query.h"
S
slguan 已提交
27
#include "queryLog.h"
28
#include "tlosertree.h"
H
Haojun Liao 已提交
29
#include "exception.h"
30 31
#include "tscompression.h"
#include "ttime.h"
32 33 34 35 36 37 38 39 40

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

41
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
42
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
43
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
44
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
45

46
#define GET_QINFO_ADDR(x) ((void *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
47

48
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
49
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
50 51 52

/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
53 54
  ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].bytes)
#define GET_COLUMN_TYPE(query, colidx) ((query)->colList[(query)->pSelectExpr[colidx].base.colInfo.colIndex].type)
55

56
enum {
H
hjxilinx 已提交
57
  // when query starts to execute, this status will set
58 59
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
60 61
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
62
   */
63 64
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
65 66 67
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
68
   */
69
  QUERY_COMPLETED = 0x4u,
70

H
hjxilinx 已提交
71 72
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
73
   */
74
  QUERY_OVER = 0x8u,
75
};
76 77

enum {
78 79
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
80 81 82
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

83
typedef struct {
84 85 86 87 88 89
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
90 91
} SQueryStatusInfo;

H
Haojun Liao 已提交
92
#if 0
H
Haojun Liao 已提交
93
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
94 95 96 97
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
98
    return malloc(__size);
H
Haojun Liao 已提交
99
  }
H
Haojun Liao 已提交
100 101
}

H
Haojun Liao 已提交
102 103 104 105 106 107 108 109 110 111
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
112
#define malloc  u_malloc
H
Haojun Liao 已提交
113
#endif
H
Haojun Liao 已提交
114

115
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
116 117 118
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

119
static void setQueryStatus(SQuery *pQuery, int8_t status);
120

H
Haojun Liao 已提交
121
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
H
hjxilinx 已提交
122
static bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; }
123

H
hjxilinx 已提交
124
// todo move to utility
125
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
126

H
hjxilinx 已提交
127
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
128
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
129 130 131
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow);
132

133 134 135
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

136
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
137
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols);
138 139
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
140
static void buildTagQueryResult(SQInfo *pQInfo);
141

142
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
143
static int32_t flushFromResultBuf(SQInfo *pQInfo);
144

145
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
146 147
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
148

149 150
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
151 152
      return false;
    }
153

154 155
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
156
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
157

158 159 160 161 162
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
163

164 165 166 167
    if (!qualified) {
      return false;
    }
  }
168

169 170 171 172 173 174
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
175

176
  int64_t maxOutput = 0;
177
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
178
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
179

180 181 182 183 184 185 186 187
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
188

189 190 191 192 193
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
194

195
  assert(maxOutput >= 0);
196 197 198
  return maxOutput;
}

199 200 201 202 203 204 205 206 207
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
208 209 210 211 212 213 214
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
215 216 217 218
    pResInfo->numOfRes = numOfRes;
  }
}

219 220 221 222 223 224 225 226 227
static int32_t getGroupResultId(int32_t groupIndex) {
  int32_t base = 200000;
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
228

229
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
230
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
231 232 233 234 235
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      /*
       * make sure the normal column locates at the second position if tbname exists in group by clause
       */
      if (pGroupbyExpr->numOfGroupCols > 1) {
236
        assert(pColIndex->colIndex > 0);
237
      }
238

239 240 241
      return true;
    }
  }
242

243 244 245 246 247
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
248

249 250
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
251

252
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
253
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
254 255 256 257 258
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
259

260
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
261 262
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
263 264 265
      break;
    }
  }
266

267 268 269 270 271 272
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
273

274
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
275
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
276 277 278 279
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
280

281 282 283 284
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
285

286 287 288
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
289

290 291 292
  return false;
}

293
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
294

295 296 297 298
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
299 300
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
301
    
302
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
303 304
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
305 306 307
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
308

309 310 311 312
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
313
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
314
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
315 316 317
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
318

319 320 321 322
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
323

324 325 326
  return false;
}

H
Haojun Liao 已提交
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
345
static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, int32_t numOfCols, int32_t index) {
346
  // for a tag column, no corresponding field info
H
Haojun Liao 已提交
347 348
  SColIndex *pColIndex = &pQuery->pSelectExpr[index].base.colInfo;
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
349 350
    return NULL;
  }
H
Haojun Liao 已提交
351
  
352 353 354
  /*
   * Choose the right column field info by field id, since the file block may be out of date,
   * which means the newest table schema is not equalled to the schema of this block.
H
Haojun Liao 已提交
355
   * TODO: speedup by using bsearch
356
   */
H
Haojun Liao 已提交
357 358
  for (int32_t i = 0; i < numOfCols; ++i) {
    if (pColIndex->colId == pStatis[i].colId) {
359 360 361
      return &pStatis[i];
    }
  }
H
Haojun Liao 已提交
362
  
363 364 365
  return NULL;
}

366 367 368 369 370 371 372 373
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
374
static bool hasNullValue(SQuery *pQuery, int32_t col, int32_t numOfCols, SDataStatis *pStatis, SDataStatis **pColStatis) {
375
  SColIndex *pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
376
  if (TSDB_COL_IS_TAG(pColIndex->flag)) {
377 378
    return false;
  }
379

380 381 382 383
  // query on primary timestamp column, not null value at all
  if (pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }
384

385
  if (pStatis != NULL) {
H
Haojun Liao 已提交
386
    *pColStatis = getStatisInfo(pQuery, pStatis, numOfCols, col);
H
hjxilinx 已提交
387 388
  } else {
    *pColStatis = NULL;
389
  }
390

391 392 393
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
394

395 396 397 398 399 400
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
                                             int16_t bytes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
401

402
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
403 404 405 406 407
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
  } else {  // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 2;
408

409 410 411 412 413 414 415
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
      if (t != NULL) {
        pWindowResInfo->pResult = (SWindowResult *)t;
        memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity);
      } else {
        // todo
      }
416

417 418 419 420 421 422
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        SPosInfo pos = {-1, -1};
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos);
      }
      pWindowResInfo->capacity = newCap;
    }
423

424 425 426 427
    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
  }
428

429 430 431 432 433 434
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
435

436 437 438 439 440 441 442
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
443

444 445
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
446

447 448 449
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
450

451 452 453 454
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
455

456 457 458
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
459

460 461 462 463 464 465 466
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
467

468
  assert(ts >= w.skey && ts <= w.ekey);
469

470 471 472 473 474 475 476 477
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
478

479
  tFilePage *pData = NULL;
480

481 482 483
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
484

485 486 487 488
  if (list.size == 0) {
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
    pageId = getLastPageId(&list);
H
Haojun Liao 已提交
489
    pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, pageId);
490

491
    if (pData->num >= numOfRowsPerPage) {
492 493
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
494
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
495 496 497
      }
    }
  }
498

499 500 501
  if (pData == NULL) {
    return -1;
  }
502

503 504 505
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
506
    pWindowRes->pos.rowId = pData->num++;
507
  }
508

509 510 511 512 513 514 515
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
                                       STimeWindow *win) {
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
516

517 518 519 520
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE);
  if (pWindowRes == NULL) {
    return -1;
  }
521

522 523 524 525 526 527 528
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
529

530 531
  // set time window for current result
  pWindowRes->window = *win;
532

H
Haojun Liao 已提交
533
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
534 535 536 537 538 539 540 541
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
542
static int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
543
                                      int16_t order, int64_t *pData) {
H
Haojun Liao 已提交
544
  int32_t endPos = searchFn((char *)pData, numOfRows, ekey, order);
545
  int32_t forwardStep = 0;
546

547
  if (endPos >= 0) {
548
    forwardStep = (order == TSDB_ORDER_ASC) ? (endPos - pos) : (pos - endPos);
549
    assert(forwardStep >= 0);
550

551 552 553 554 555
    // endPos data is equalled to the key so, we do need to read the element in endPos
    if (pData[endPos] == ekey) {
      forwardStep += 1;
    }
  }
556

557 558 559 560 561 562
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
563
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
564 565
  SQuery *pQuery = pRuntimeEnv->pQuery;
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) {
566
    return pWindowResInfo->size;
567
  }
568

569
  // no qualified results exist, abort check
570 571
  int32_t numOfClosed = 0;
  
572
  if (pWindowResInfo->size == 0) {
573
    return pWindowResInfo->size;
574
  }
575

576
  // query completed
H
hjxilinx 已提交
577 578
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
579
    closeAllTimeWindow(pWindowResInfo);
580

581 582 583 584
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
585
    int64_t skey = TSKEY_INITIAL_VAL;
586

587 588 589
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
590
        numOfClosed += 1;
591 592
        continue;
      }
593

594 595 596 597 598 599 600 601
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
602

603
    // all windows are closed, set the last one to be the skey
604
    if (skey == TSKEY_INITIAL_VAL) {
605 606 607 608 609
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
610

611
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
612

613 614
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
615
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
616 617
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
618
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
619
    } else {
620
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
621
             numOfClosed);
622 623
    }
  }
624 625 626 627 628 629 630
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
631
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
632
  return numOfClosed;
633 634 635
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
636
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
637
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
638

639 640 641
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
642

H
hjxilinx 已提交
643 644
  STableQueryInfo* item = pQuery->current;
  
645 646
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
647
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
648 649 650 651
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey < pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
652
          item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
653 654 655
        }
      }
    } else {
656
      num = pDataBlockInfo->rows - startPos;
657
      if (updateLastKey) {
H
hjxilinx 已提交
658
        item->lastKey = pDataBlockInfo->window.ekey + step;
659 660 661 662
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
663
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
664 665 666 667
      if (num == 0) {  // no qualified data in current block, do not update the lastKey value
        assert(ekey > pPrimaryColumn[startPos]);
      } else {
        if (updateLastKey) {
H
hjxilinx 已提交
668
          item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
669 670 671 672 673
        }
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
674
        item->lastKey = pDataBlockInfo->window.skey + step;
675 676 677
      }
    }
  }
678

679 680 681 682 683
  assert(num >= 0);
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
684
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
685 686
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
687

H
Haojun Liao 已提交
688 689
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
690

H
Haojun Liao 已提交
691 692 693
    pCtx[k].nStartQueryTimestamp = pWin->skey;
    pCtx[k].size = forwardStep;
    pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
694

H
Haojun Liao 已提交
695 696 697
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      pCtx[k].ptsList = &tsBuf[offset];
    }
698

H
Haojun Liao 已提交
699 700 701 702 703 704 705
    // not a whole block involved in query processing, statistics data can not be used
    if (forwardStep != numOfTotal) {
      pCtx[k].preAggVals.isSet = false;
    }

    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunction(&pCtx[k]);
706 707 708 709 710 711 712 713
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
714

H
Haojun Liao 已提交
715 716
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
    pCtx[k].nStartQueryTimestamp = pWin->skey;
717

H
Haojun Liao 已提交
718 719 720
    int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
    if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
      aAggs[functionId].xFunctionF(&pCtx[k], offset);
721 722 723 724 725
    }
  }
}

static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin,
726 727
                                      SDataBlockInfo *pDataBlockInfo, TSKEY *primaryKeys,
                                      __block_search_fn_t searchFn) {
728
  SQuery *pQuery = pRuntimeEnv->pQuery;
729

H
Haojun Liao 已提交
730 731 732 733
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime) {
    // todo opt
  }
734

H
Haojun Liao 已提交
735
  getNextTimeWindow(pQuery, pNextWin);
736

H
Haojun Liao 已提交
737 738 739 740 741
  // next time window is not in current block
  if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
    return -1;
  }
742

H
Haojun Liao 已提交
743 744 745 746 747
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    startKey = pNextWin->skey;
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
748
    }
H
Haojun Liao 已提交
749 750 751 752
  } else {
    startKey = pNextWin->ekey;
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
753
    }
H
Haojun Liao 已提交
754
  }
755

H
Haojun Liao 已提交
756
  int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
757

H
Haojun Liao 已提交
758 759 760 761 762 763
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNextWin->ekey) {
    TSKEY next = primaryKeys[startPos];
764

H
Haojun Liao 已提交
765 766 767 768
    pNextWin->ekey += ((next - pNextWin->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->skey = pNextWin->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNextWin->skey) {
    TSKEY next = primaryKeys[startPos];
769

H
Haojun Liao 已提交
770 771
    pNextWin->skey -= ((pNextWin->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNextWin->ekey = pNextWin->skey + pQuery->intervalTime - 1;
772
  }
773

H
Haojun Liao 已提交
774
  return startPos;
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
}

static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
790

791 792 793
  return ekey;
}

H
hjxilinx 已提交
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
809
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
810 811 812
  if (pDataBlock == NULL) {
    return NULL;
  }
813
  char *dataBlock = NULL;
814

H
Haojun Liao 已提交
815
  SQuery *pQuery = pRuntimeEnv->pQuery;
816
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
817

818
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
819
  if (functionId == TSDB_FUNC_ARITHM) {
820
    sas->pArithExpr = &pQuery->pSelectExpr[col];
821

822 823 824 825 826 827
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
828

829 830 831 832
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
833

H
Haojun Liao 已提交
834 835 836 837
    if (sas->data == NULL) {
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

838
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
839
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
840
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
841
      SColumnInfo *pColMsg = &pQuery->colList[i];
842

843 844 845 846 847 848 849 850
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
851

852
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
853
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
854
    }
855

856
  } else {  // other type of query function
857
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
858
    if (TSDB_COL_IS_TAG(pCol->flag) || pDataBlock == NULL) {
859 860
      dataBlock = NULL;
    } else {
H
hjxilinx 已提交
861
      dataBlock = getDataBlockImpl(pDataBlock, pCol->colId);
862 863
    }
  }
864

865 866 867 868
  return dataBlock;
}

/**
H
Haojun Liao 已提交
869
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
870 871
 * @param pRuntimeEnv
 * @param forwardStep
872
 * @param tsCols
873 874 875 876 877
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
878
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
879 880
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
881
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
882 883 884
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
885
  if (pDataBlock != NULL) {
886
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
887
    tsCols = (TSKEY *)(pColInfo->pData);
888
  }
889

890
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
891 892 893
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
894

895
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
896
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
897
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
898
  }
899

900
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
901
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && tsCols != NULL) {
902
    int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
903
    TSKEY   ts = tsCols[offset];
904

905
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
hjxilinx 已提交
906
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win) != TSDB_CODE_SUCCESS) {
dengyihao's avatar
dengyihao 已提交
907
      tfree(sasArray);
H
hjxilinx 已提交
908
      return;
909
    }
910

911 912
    TSKEY   ekey = reviseWindowEkey(pQuery, &win);
    int32_t forwardStep =
913
        getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
914

915
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
916
    doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, tsCols, pDataBlockInfo->rows);
917

918 919
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
920

921
    while (1) {
922
      int32_t startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn);
923 924 925
      if (startPos < 0) {
        break;
      }
926

927
      // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
928
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
929 930
        break;
      }
931

932
      ekey = reviseWindowEkey(pQuery, &nextWin);
933
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
934

935
      pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
936
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
937
    }
938

939 940 941 942 943 944 945
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
946
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
947
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
948 949 950 951 952
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
953

954 955 956 957
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
958

959 960
    tfree(sasArray[i].data);
  }
961

962 963 964 965 966 967 968
  tfree(sasArray);
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
969

970
  int32_t GROUPRESULTID = 1;
971

972
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
973

974 975 976 977 978 979 980 981 982 983 984
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

//  assert(pRuntimeEnv->windowResInfo.hashList->size <= 2);
985 986 987 988
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes);
  if (pWindowRes == NULL) {
    return -1;
  }
989

990 991 992
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

993 994 995 996 997 998
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
999

1000 1001 1002 1003 1004
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1005
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1006
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1007

1008
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1009 1010
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1011 1012
      continue;
    }
1013

1014
    int16_t colIndex = -1;
1015
    int32_t colId = pColIndex->colId;
1016

1017
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1018
      if (pQuery->colList[i].colId == colId) {
1019 1020 1021 1022
        colIndex = i;
        break;
      }
    }
1023

1024
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1025

1026 1027
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1028 1029 1030 1031 1032 1033
    /*
     *  the colIndex is acquired from the first meter of all qualified meters in this vnode during query prepare
     * stage, the remain meter may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding meter schema is reinforced.
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
1034

1035 1036 1037 1038 1039 1040
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1041
  }
1042

1043
  return NULL;
1044 1045 1046 1047
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1048

1049 1050
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1051

1052 1053 1054 1055
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1056

1057 1058 1059
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1060 1061
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1062 1063
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1064

1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1078

1079 1080 1081 1082 1083
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1084
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1085 1086 1087 1088 1089

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1090
  
1091 1092 1093
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1094

1095
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1096 1097
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1098 1099 1100 1101 1102 1103 1104

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
//    return !QUERY_IS_ASC_QUERY(pQuery);
  }

1105
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1106
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1107 1108
    return false;
  }
1109

1110 1111 1112
  return true;
}

1113 1114
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1115
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1116

1117
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1118
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1119 1120 1121 1122

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1123 1124
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1125
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1126 1127 1128
  if (sasArray == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1129

1130 1131
  int16_t type = 0;
  int16_t bytes = 0;
1132

1133
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1134
  if (groupbyColumnValue) {
1135
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1136
  }
1137

1138
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1139
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1140
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1141
  }
1142

1143 1144
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1145
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1146 1147
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1148
  }
1149

1150
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1151

1152 1153 1154
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1155
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1156
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1157 1158
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1159

1160
  int32_t j = 0;
H
hjxilinx 已提交
1161
  int32_t offset = -1;
1162

1163
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1164
    offset = GET_COL_DATA_POS(pQuery, j, step);
1165

1166 1167 1168 1169 1170 1171 1172 1173 1174 1175
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1176

1177
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1178 1179
      continue;
    }
1180

1181 1182 1183
    // interval window query
    if (isIntervalQuery(pQuery)) {
      // decide the time window according to the primary timestamp
1184
      int64_t     ts = tsCols[offset];
1185
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1186

H
hjxilinx 已提交
1187
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win);
1188 1189 1190
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1191

1192 1193
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1194

1195 1196
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1197

1198 1199
      while (1) {
        getNextTimeWindow(pQuery, &nextWin);
H
Haojun Liao 已提交
1200
        if (/*pWindowResInfo->startTime > nextWin.skey ||*/
1201
            (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1202
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1203 1204
          break;
        }
1205

1206 1207 1208
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1209

1210
        // null data, failed to allocate more memory buffer
H
hjxilinx 已提交
1211
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin) != TSDB_CODE_SUCCESS) {
1212 1213
          break;
        }
1214

1215 1216 1217
        pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
        doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
      }
1218

1219 1220 1221
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1222
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1223
        char *val = groupbyColumnData + bytes * offset;
1224

H
hjxilinx 已提交
1225
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1226 1227 1228 1229
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1230

1231
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1232
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1233 1234 1235 1236 1237
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1238

1239 1240 1241
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1242
        setQueryStatus(pQuery, QUERY_COMPLETED);
1243 1244 1245 1246
        break;
      }
    }
  }
H
Haojun Liao 已提交
1247 1248 1249 1250 1251 1252 1253 1254

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1255 1256 1257 1258 1259
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1260

1261 1262
    tfree(sasArray[i].data);
  }
1263

1264 1265 1266 1267
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1268
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1269
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1270 1271 1272
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1273
  
H
Haojun Liao 已提交
1274
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1275
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1276
  } else {
1277
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1278
  }
1279

1280
  // update the lastkey of current table
1281
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1282
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1283

1284
  // interval query with limit applied
1285 1286 1287 1288 1289
  int32_t numOfRes = 0;
  if (isIntervalQuery(pQuery)) {
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1290

1291 1292 1293 1294
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1295

1296 1297 1298
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1299

1300 1301 1302
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1303
    }
1304
  }
1305

1306
  return numOfRes;
1307 1308
}

H
Haojun Liao 已提交
1309
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1310 1311 1312 1313 1314 1315 1316
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
  pCtx->hasNull = hasNullValue(pQuery, colIndex, pBlockInfo->numOfCols, pStatis, &tpField);
1317
  pCtx->aInputElemBuf = inputData;
1318

1319
  if (tpField != NULL) {
H
Haojun Liao 已提交
1320
    pCtx->preAggVals.isSet  = true;
1321 1322
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1323 1324 1325
  } else {
    pCtx->preAggVals.isSet = false;
  }
1326

H
Haojun Liao 已提交
1327 1328 1329
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1330

1331 1332
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1333
    pCtx->ptsList = tsCol;
1334
  }
1335

1336 1337 1338 1339 1340
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1341
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1342
    /*
H
Haojun Liao 已提交
1343
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1354

1355 1356
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1357 1358 1359 1360 1361 1362
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1376
  }
1377

1378 1379 1380 1381 1382 1383
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1384
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1385 1386 1387
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1388
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1389 1390 1391 1392 1393 1394
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1395 1396 1397
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1398
  if (isSelectivityWithTagsQuery(pQuery)) {
1399
    int32_t num = 0;
1400
    int16_t tagLen = 0;
1401 1402
    
    SQLFunctionCtx *p = NULL;
1403
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1404

1405
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1406
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1407
      
1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1421 1422 1423 1424 1425 1426 1427
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
      tfree(pTagCtx); 
    }
1428 1429 1430
  }
}

H
Haojun Liao 已提交
1431
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) {
1432
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1433 1434
    assert(pQuery->pSelectExpr[i].interBytes <= DEFAULT_INTERN_BUF_PAGE_SIZE);
    
1435
    setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interBytes, isStableQuery);
1436 1437 1438
  }
}

1439
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1440
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1441 1442
  SQuery *pQuery = pRuntimeEnv->pQuery;

1443 1444
  pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1445

1446
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1447
    goto _clean;
1448
  }
1449

1450
  pRuntimeEnv->offset[0] = 0;
1451
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1452
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1453

1454
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1455
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1456

1457 1458
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1459
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1460 1461 1462 1463
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1464 1465 1466 1467
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1468
      
1469 1470 1471 1472
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1473
  
1474
    assert(isValidDataType(pCtx->inputType));
1475
    pCtx->ptsOutputBuf = NULL;
1476

1477 1478
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1479

1480 1481
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1482

1483 1484 1485 1486 1487 1488 1489 1490 1491 1492
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1493

1494 1495
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1496

1497
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1498
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1499
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1500

1501 1502 1503 1504
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1505

1506 1507
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1508

1509 1510 1511 1512
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1513

1514
  // set the intermediate result output buffer
1515
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery);
1516

1517
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1518
  if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !pRuntimeEnv->stableQuery) {
1519 1520
    resetCtxOutputBuf(pRuntimeEnv);
  }
1521

H
Haojun Liao 已提交
1522
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1523
  return TSDB_CODE_SUCCESS;
1524

1525
_clean:
1526 1527
  tfree(pRuntimeEnv->resultInfo);
  tfree(pRuntimeEnv->pCtx);
1528

1529
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1530 1531 1532 1533 1534 1535
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1536

1537
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1538
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1539

1540
  qDebug("QInfo:%p teardown runtime env", pQInfo);
1541
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutput);
1542

1543
  if (pRuntimeEnv->pCtx != NULL) {
1544
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1545
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1546

1547 1548 1549
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1550

1551 1552 1553 1554
      tVariantDestroy(&pCtx->tag);
      tfree(pCtx->tagInfo.pTagCtxList);
      tfree(pRuntimeEnv->resultInfo[i].interResultBuf);
    }
1555

1556 1557 1558
    tfree(pRuntimeEnv->resultInfo);
    tfree(pRuntimeEnv->pCtx);
  }
1559

H
Haojun Liao 已提交
1560
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1561

H
hjxilinx 已提交
1562
  destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo);
1563
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1564
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1565

1566 1567 1568
  pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf);
}

1569
static bool isQueryKilled(SQInfo *pQInfo) {
1570
  return (pQInfo->code == TSDB_CODE_TSC_QUERY_CANCELLED);
1571 1572
}

1573
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
H
hjxilinx 已提交
1574

H
hjxilinx 已提交
1575
static bool isFixedOutputQuery(SQuery *pQuery) {
1576 1577 1578
  if (pQuery->intervalTime != 0) {
    return false;
  }
1579

1580 1581 1582 1583
  // Note:top/bottom query is fixed output query
  if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    return true;
  }
1584

1585
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1586
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1587

1588 1589
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1590
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1591 1592
      continue;
    }
1593

1594 1595 1596
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1597

1598 1599 1600 1601
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1602

1603 1604 1605
  return false;
}

1606
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1607
static bool isPointInterpoQuery(SQuery *pQuery) {
1608
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1609
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1610
    if (functionID == TSDB_FUNC_INTERP) {
1611 1612 1613
      return true;
    }
  }
1614

1615 1616 1617 1618
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1619
static bool isSumAvgRateQuery(SQuery *pQuery) {
1620
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1621
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1622 1623 1624
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1625

1626 1627 1628 1629 1630
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1631

1632 1633 1634
  return false;
}

H
hjxilinx 已提交
1635
static bool isFirstLastRowQuery(SQuery *pQuery) {
1636
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1637
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1638 1639 1640 1641
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1642

1643 1644 1645
  return false;
}

H
hjxilinx 已提交
1646
static bool needReverseScan(SQuery *pQuery) {
1647
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1648
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1649 1650 1651
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1652

1653
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1654 1655
      return true;
    }
1656 1657 1658 1659 1660

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1661
  }
1662

1663 1664
  return false;
}
H
hjxilinx 已提交
1665 1666 1667

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1668 1669 1670 1671 1672
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1673 1674 1675
      return false;
    }
  }
1676

H
hjxilinx 已提交
1677 1678 1679
  return true;
}

1680 1681
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1682
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *realWin, STimeWindow *win) {
1683
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
1684

H
Haojun Liao 已提交
1685
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
1686 1687 1688 1689 1690 1691
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    /*
     * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
     * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
     */
    assert(keyLast - keyFirst < pQuery->intervalTime);
1692

H
Haojun Liao 已提交
1693 1694
    realWin->skey = keyFirst;
    realWin->ekey = keyLast;
1695

1696 1697 1698
    win->ekey = INT64_MAX;
    return;
  }
1699

1700
  win->ekey = win->skey + pQuery->intervalTime - 1;
1701

H
Haojun Liao 已提交
1702 1703
  realWin->skey = (win->skey < keyFirst)? keyFirst : win->skey;
  realWin->ekey = (win->ekey < keyLast) ? win->ekey : keyLast;
1704 1705 1706 1707
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1708
    pQuery->checkBuffer = 0;
1709
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1710
    pQuery->checkBuffer = 0;
1711 1712
  } else {
    bool hasMultioutput = false;
1713
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1714
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1715 1716 1717
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1718

1719 1720 1721 1722 1723
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1724

1725
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1726 1727 1728 1729 1730 1731
  }
}

/*
 * todo add more parameters to check soon..
 */
1732
bool colIdCheck(SQuery *pQuery) {
1733 1734
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1735
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1736
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1737 1738 1739
      return false;
    }
  }
1740
  
1741 1742 1743 1744 1745 1746
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1747
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1748
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1749

1750 1751 1752 1753
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1754

1755 1756 1757 1758
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1759

1760 1761 1762 1763 1764 1765 1766
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
    }
  }
}

H
Haojun Liao 已提交
1781 1782 1783
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1784 1785 1786
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1787

1788 1789 1790
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1791
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1792
           pQuery->order.order, TSDB_ORDER_DESC);
1793

1794
    pQuery->order.order = TSDB_ORDER_DESC;
1795

1796 1797
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1798

1799 1800
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1801

1802 1803
    return;
  }
1804

1805 1806
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1807
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1808
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1809 1810
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1811

1812
    pQuery->order.order = TSDB_ORDER_ASC;
1813 1814
    return;
  }
1815

1816 1817 1818
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1819
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1820 1821
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1822
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1823
        doExchangeTimeWindow(pQInfo);
1824
      }
1825

1826
      pQuery->order.order = TSDB_ORDER_ASC;
1827 1828
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1829
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1830 1831
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1832
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1833
        doExchangeTimeWindow(pQInfo);
1834
      }
1835

1836
      pQuery->order.order = TSDB_ORDER_DESC;
1837
    }
1838

1839
  } else {  // interval query
1840
    if (stableQuery) {
1841 1842
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1843
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1844 1845
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1846 1847
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1848

1849
        pQuery->order.order = TSDB_ORDER_ASC;
1850 1851
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1852
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1853 1854
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1855 1856
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1857

1858
        pQuery->order.order = TSDB_ORDER_DESC;
1859 1860 1861 1862 1863 1864 1865 1866
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1867

1868
  int32_t num = 0;
1869

1870 1871 1872
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
  } else if (isIntervalQuery(pQuery)) {  // time window query, allocate one page for each table
1873
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1874
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1875 1876
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1877
  }
1878

1879 1880 1881 1882
  assert(num > 0);
  return num;
}

H
Haojun Liao 已提交
1883
#define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pSelectExpr[1].base.arg->argValue.i64:1)
1884

H
Haojun Liao 已提交
1885 1886
static FORCE_INLINE int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool topBotQuery, bool isSTableQuery) {
  int32_t rowSize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, topBotQuery, isSTableQuery);
1887
  return (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / rowSize;
1888 1889 1890 1891
}

char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) {
  assert(pResult != NULL && pRuntimeEnv != NULL);
1892

H
Haojun Liao 已提交
1893 1894 1895
  SQuery    *pQuery = pRuntimeEnv->pQuery;
  tFilePage *page = GET_RES_BUF_PAGE_BY_ID(pRuntimeEnv->pResultBuf, pResult->pos.pageId);
  int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
1896

H
Haojun Liao 已提交
1897
  return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage +
1898
         pQuery->pSelectExpr[columnIndex].bytes * realRowId;
1899 1900 1901 1902 1903 1904
}

/**
 * decrease the refcount for each table involved in this query
 * @param pQInfo
 */
1905
UNUSED_FUNC void vnodeDecMeterRefcnt(SQInfo *pQInfo) {
1906
  if (pQInfo != NULL) {
1907
    //    assert(taosHashGetSize(pQInfo->tableqinfoGroupInfo) >= 1);
1908 1909 1910
  }

#if 0
1911
  if (pQInfo == NULL || pQInfo->tableqinfoGroupInfo.numOfTables == 1) {
1912
    atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1);
1913
    qDebug("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode,
1914 1915 1916
           pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries);
  } else {
    int32_t num = 0;
1917 1918
    for (int32_t i = 0; i < pQInfo->tableqinfoGroupInfo.numOfTables; ++i) {
      SMeterObj *pMeter = getMeterObj(pQInfo->tableqinfoGroupInfo, pQInfo->pSidSet->pTableIdList[i]->sid);
1919
      atomic_fetch_sub_32(&(pMeter->numOfQueries), 1);
1920

1921
      if (pMeter->numOfQueries > 0) {
1922
        qDebug("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid,
1923 1924 1925 1926
               pMeter->meterId, pMeter->numOfQueries);
        num++;
      }
    }
1927

1928 1929 1930 1931
    /*
     * in order to reduce log output, for all meters of which numOfQueries count are 0,
     * we do not output corresponding information
     */
1932
    num = pQInfo->tableqinfoGroupInfo.numOfTables - num;
1933
    qDebug("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo,
1934
           pQInfo->tableqinfoGroupInfo.numOfTables, num);
1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947
  }
#endif
}

static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
                                int32_t numOfTotalPoints) {
  if (pDataStatis == NULL) {
    return true;
  }

#if 0
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1948
    int32_t                  colIndex = pFilterInfo->info.colIndex;
1949

1950 1951 1952 1953
    // this column not valid in current data block
    if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) {
      continue;
    }
1954

1955 1956 1957 1958
    // not support pre-filter operation on binary/nchar data type
    if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) {
      continue;
    }
1959

1960 1961 1962 1963
    // all points in current column are NULL, no need to check its boundary value
    if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) {
      continue;
    }
1964

1965 1966 1967
    if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataStatis[colIndex].min);
      float maxval = *(double *)(&pDataStatis[colIndex].max);
1968

1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min,
                                        (char *)&pDataStatis[colIndex].max)) {
          return true;
        }
      }
    }
  }
1983

1984
  // todo disable this opt code block temporarily
1985
  //  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1986
  //    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
1987 1988 1989 1990
  //    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
  //      return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max);
  //    }
  //  }
1991

1992 1993 1994 1995 1996 1997 1998
#endif
  return true;
}

// previous time window may not be of the same size of pQuery->intervalTime
static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) {
  int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1999

2000 2001 2002 2003
  pTimeWindow->skey += (pQuery->slidingTime * factor);
  pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1);
}

H
hjxilinx 已提交
2004
SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis) {
2005
  SQuery *pQuery = pRuntimeEnv->pQuery;
2006 2007 2008 2009

  uint32_t r = 0;
  SArray * pDataBlock = NULL;

2010 2011 2012
  if (pQuery->numOfFilterCols > 0) {
    r = BLK_DATA_ALL_NEEDED;
  } else {
2013
    // check if this data block is required to load
2014
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2015 2016 2017 2018
      SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;
      
      int32_t functionId = pSqlFunc->functionId;
      int32_t colId = pSqlFunc->colInfo.colId;
2019
      r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pQuery->window.skey, pQuery->window.ekey, colId);
2020
    }
2021

2022 2023 2024 2025
    if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) {
      r |= BLK_DATA_ALL_NEEDED;
    }
  }
2026

2027
  if (r == BLK_DATA_NO_NEEDED) {
2028
    qDebug("QInfo:%p data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
2029
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2030 2031
    pRuntimeEnv->summary.discardBlocks += 1;
  } else if (r == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2032
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2033
      //        return DISK_DATA_LOAD_FAILED;
2034
    }
2035 2036 2037 2038
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
hjxilinx 已提交
2039
      pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2040
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2041 2042 2043
    }
  } else {
    assert(r == BLK_DATA_ALL_NEEDED);
2044 2045 2046
  
    // load the data block statistics to perform further filter
    pRuntimeEnv->summary.loadBlockStatis +=1;
H
hjxilinx 已提交
2047
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2048
    }
2049 2050
    
    if (!needToLoadDataBlock(pQuery,*pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2051
#if defined(_DEBUG_VIEW)
2052
      qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv));
2053
#endif
2054 2055
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
2056 2057
      //        return DISK_DATA_DISCARDED;
    }
2058
  
2059
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2060
    pRuntimeEnv->summary.loadBlocks += 1;
H
hjxilinx 已提交
2061
    pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2062
  }
2063

2064 2065 2066
  return pDataBlock;
}

H
hjxilinx 已提交
2067
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2068
  int32_t midPos = -1;
H
Haojun Liao 已提交
2069
  int32_t numOfRows;
2070

2071 2072 2073
  if (num <= 0) {
    return -1;
  }
2074

2075
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2076 2077

  TSKEY * keyList = (TSKEY *)pValue;
2078
  int32_t firstPos = 0;
2079
  int32_t lastPos = num - 1;
2080

2081
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2082 2083 2084 2085 2086
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2087

H
Haojun Liao 已提交
2088 2089
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2090

H
hjxilinx 已提交
2091 2092 2093 2094 2095 2096 2097 2098
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2099

H
hjxilinx 已提交
2100 2101 2102 2103 2104
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2105

H
hjxilinx 已提交
2106 2107 2108 2109 2110 2111 2112
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2113

H
Haojun Liao 已提交
2114 2115
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2116

H
hjxilinx 已提交
2117 2118 2119 2120 2121 2122 2123 2124 2125
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2126

H
hjxilinx 已提交
2127 2128 2129
  return midPos;
}

2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2152
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2153 2154 2155 2156 2157
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2158 2159 2160
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2161
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pQuery)) {
2162 2163 2164 2165 2166 2167 2168 2169
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2170 2171
        assert(bytes > 0 && newSize > 0);

2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2188
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2189 2190 2191 2192 2193 2194 2195
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2196 2197
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2198
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2199
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2200

2201
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2202 2203
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2204

2205
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
2206
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2207
    summary->totalBlocks += 1;
2208
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
2209
      return 0;
2210
    }
2211

2212
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
2213

2214
    // todo extract methods
H
Haojun Liao 已提交
2215
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
2216
      STimeWindow realWin = TSWINDOW_INITIALIZER, w = TSWINDOW_INITIALIZER;
2217 2218
      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

2219
      if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
2220
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
2221 2222 2223 2224
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      } else {
        // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
H
Haojun Liao 已提交
2225
        getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
2226

H
hjxilinx 已提交
2227
        pWindowResInfo->startTime = pQuery->window.skey;
2228 2229
        pWindowResInfo->prevSKey = w.skey;
      }
2230 2231 2232 2233
      
      if (pRuntimeEnv->pFillInfo != NULL) {
        pRuntimeEnv->pFillInfo->start = w.skey;
      }
2234
    }
2235

H
hjxilinx 已提交
2236
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2237
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2238

2239
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2240
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
2241

H
Haojun Liao 已提交
2242 2243
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2244
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2245

H
Haojun Liao 已提交
2246
    summary->totalRows += blockInfo.rows;
2247
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2248
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2249

2250 2251
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2252
      break;
2253 2254
    }
  }
2255

H
hjxilinx 已提交
2256
  // if the result buffer is not full, set the query complete
2257 2258 2259
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2260

H
Haojun Liao 已提交
2261
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2262
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2263 2264
      int32_t step = QUERY_IS_ASC_QUERY(pQuery) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP;

2265
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2266
      removeRedundantWindow(&pRuntimeEnv->windowResInfo, pTableQueryInfo->lastKey - step, step);
H
hjxilinx 已提交
2267
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2268 2269 2270 2271
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2272

2273
  return 0;
2274 2275 2276 2277 2278 2279
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2280
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2281
  tVariantDestroy(tag);
2282

2283
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2284
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2285 2286 2287
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2288
  } else {
2289
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2290 2291 2292 2293
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2294 2295
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2296
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2297 2298 2299 2300
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2301
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2302
    } else {
H
Haojun Liao 已提交
2303 2304 2305 2306 2307
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2308
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2309
    }
2310
  }
2311 2312
}

2313
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2314
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2315
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2316

H
[td-90]  
Haojun Liao 已提交
2317 2318 2319
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2320 2321 2322 2323 2324 2325 2326 2327 2328 2329

    // todo refactor extract function.
    int16_t type = -1, bytes = -1;
    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
        type = pQuery->tagColList[i].type;
        bytes = pQuery->tagColList[i].bytes;
      }
    }

2330
    doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2331 2332
  } else {
    // set tag value, by which the results are aggregated.
2333
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2334
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2335
  
2336
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2337
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2338 2339
        continue;
      }
2340

2341
      // todo use tag column index to optimize performance
2342
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2343
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2344
    }
2345

2346
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2347
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
H
Haojun Liao 已提交
2348
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX &&
2349 2350
        pRuntimeEnv->pTSBuf != NULL) {
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2351 2352 2353 2354 2355 2356 2357 2358 2359 2360

      // todo refactor
      int16_t type = -1, bytes = -1;
      for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
        if (pQuery->tagColList[i].colId == pExprInfo->base.arg->argValue.i64) {
          type = pQuery->tagColList[i].type;
          bytes = pQuery->tagColList[i].bytes;
        }
      }

2361
      doSetTagValueInParam(tsdb, pTable, pExprInfo->base.arg->argValue.i64, &pRuntimeEnv->pCtx[0].tag, type, bytes);
2362
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2363
          pRuntimeEnv->pCtx[0].tag.i64Key)
2364 2365 2366 2367 2368 2369 2370
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2371

2372
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2373
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2374 2375 2376
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2377

2378 2379 2380
      resetResultInfo(pCtx[i].resultInfo);
      aAggs[functionId].init(&pCtx[i]);
    }
2381

2382 2383 2384
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes);
2385

2386 2387 2388
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2389 2390 2391 2392 2393 2394 2395 2396
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2397 2398
    }
  }
2399

2400
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2401
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2402 2403 2404
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2405

2406 2407 2408 2409
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2410
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2479
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2480
  SQuery* pQuery = pRuntimeEnv->pQuery;
2481
  int32_t numOfCols = pQuery->numOfOutput;
2482
  printf("super table query intermediate result, total:%d\n", numOfRows);
2483

2484 2485
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2486
      
2487
      switch (pQuery->pSelectExpr[i].type) {
2488
        case TSDB_DATA_TYPE_BINARY: {
2489 2490 2491 2492 2493
//          int32_t colIndex = pQuery->pSelectExpr[i].base.colInfo.colIndex;
          int32_t type = pQuery->pSelectExpr[i].type;
//          } else {
//            type = pMeterObj->schema[colIndex].type;
//          }
2494
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2495 2496 2497 2498 2499
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2500
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2501 2502
          break;
        case TSDB_DATA_TYPE_INT:
2503
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2504 2505
          break;
        case TSDB_DATA_TYPE_FLOAT:
2506
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2507 2508
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2509
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2510 2511 2512 2513 2514 2515 2516 2517
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2518 2519 2520
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2521 2522 2523 2524 2525
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2526

2527 2528
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2529

2530 2531
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2532

2533 2534 2535 2536
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2537

2538 2539 2540 2541
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2542

H
hjxilinx 已提交
2543
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2544
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
2545

2546 2547
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1);
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2548

H
hjxilinx 已提交
2549
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2550
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
2551

2552 2553
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2);
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2554

2555 2556 2557
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2558

2559 2560 2561
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2562
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2563
  int64_t st = taosGetTimestampMs();
2564
  int32_t ret = TSDB_CODE_SUCCESS;
2565

H
Haojun Liao 已提交
2566
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
2567

2568
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2569
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2570
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2571 2572 2573 2574
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2575
    pQInfo->groupIndex += 1;
2576 2577

    // this group generates at least one result, return results
2578 2579 2580
    if (ret > 0) {
      break;
    }
2581 2582

    assert(pQInfo->numOfGroupResultPages == 0);
2583
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2584
  }
2585

2586
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2587
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2588

2589 2590 2591 2592 2593 2594
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2595

2596
    // current results of group has been sent to client, try next group
2597
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2598 2599
      return;  // failed to save data in the disk
    }
2600

2601
    // check if all results has been sent to client
H
Haojun Liao 已提交
2602
    int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
2603
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2604
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2605 2606
      return;
    }
2607
  }
2608 2609

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2610
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2611

2612
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2613
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2614

2615 2616
  int32_t total = 0;
  for (int32_t i = 0; i < list.size; ++i) {
H
Haojun Liao 已提交
2617
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[i]);
2618
    total += pData->num;
2619
  }
2620

2621
  int32_t rows = total;
2622

2623 2624
  int32_t offset = 0;
  for (int32_t num = 0; num < list.size; ++num) {
H
Haojun Liao 已提交
2625
    tFilePage *pData = GET_RES_BUF_PAGE_BY_ID(pResultBuf, list.pData[num]);
2626

2627
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2628
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2629
      char *  pDest = pQuery->sdata[i]->data;
2630

2631 2632
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num,
             bytes * pData->num);
2633
    }
2634

2635
    offset += pData->num;
2636
  }
2637

2638
  assert(pQuery->rec.rows == 0);
2639

2640
  pQuery->rec.rows += rows;
2641 2642 2643
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2644
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2645
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2646
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2647

2648 2649 2650 2651 2652 2653 2654
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2655

2656
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2657
    assert(pResultInfo != NULL);
2658

H
Haojun Liao 已提交
2659 2660
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2661 2662
    }
  }
2663

H
Haojun Liao 已提交
2664
  return 0;
2665 2666
}

2667
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2668
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2669
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2670

2671
  size_t size = taosArrayGetSize(pGroup);
2672
  tFilePage **buffer = pQuery->sdata;
2673

2674
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2675
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2676

2677 2678 2679 2680 2681
  if (pTableList == NULL || posList == NULL) {
    tfree(posList);
    tfree(pTableList);

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2682
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2683 2684
  }

2685
  // todo opt for the case of one table per group
2686
  int32_t numOfTables = 0;
2687
  for (int32_t i = 0; i < size; ++i) {
2688
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2689

H
Haojun Liao 已提交
2690
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
2691 2692
    if (list.size > 0 && item->windowResInfo.size > 0) {
      pTableList[numOfTables] = item;
2693
      numOfTables += 1;
2694 2695
    }
  }
2696

2697
  if (numOfTables == 0) {
2698 2699
    tfree(posList);
    tfree(pTableList);
2700

2701 2702
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2703
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2704

2705
  }
2706

2707
  SCompSupporter cs = {pTableList, posList, pQInfo};
2708

2709
  SLoserTreeInfo *pTree = NULL;
2710
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2711

2712
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2713 2714 2715 2716
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

2717 2718
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery);
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2719

2720 2721
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2722

2723 2724
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2725

H
hjxilinx 已提交
2726
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
2727
    SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
2728

2729 2730
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes);
    TSKEY ts = GET_INT64_VAL(b);
2731

2732
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2733
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2734 2735
    if (num <= 0) {
      cs.position[pos] += 1;
2736

2737 2738
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2739

2740
        // all input sources are exhausted
2741
        if (--numOfTables == 0) {
2742 2743 2744 2745 2746 2747 2748
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2749
        if (buffer[0]->num == pQuery->rec.capacity) {
2750 2751 2752
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2753

2754 2755
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2756

2757
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2758
        buffer[0]->num += 1;
2759
      }
2760

2761
      lastTimestamp = ts;
2762

2763 2764 2765
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2766

2767
        // all input sources are exhausted
2768
        if (--numOfTables == 0) {
2769 2770 2771 2772
          break;
        }
      }
    }
2773

2774 2775
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2776

2777
  if (buffer[0]->num != 0) {  // there are data in buffer
2778
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2779
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2780

2781 2782 2783 2784
      tfree(pTree);
      tfree(pTableList);
      tfree(posList);
      tfree(pResultInfo);
2785

2786 2787 2788
      return -1;
    }
  }
2789

2790 2791 2792
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2793
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2794
#endif
2795

2796
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2797

2798 2799
  tfree(pTableList);
  tfree(posList);
H
Haojun Liao 已提交
2800
  tfree(pTree);
2801

2802
  pQInfo->offset = 0;
2803
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2804 2805
    tfree(pResultInfo[i].interResultBuf);
  }
2806

2807 2808 2809 2810 2811
  tfree(pResultInfo);
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2812 2813 2814
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2815
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2816
  int32_t              capacity = (DEFAULT_INTERN_BUF_PAGE_SIZE - sizeof(tFilePage)) / pQuery->rowSize;
2817

2818 2819
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2820

2821
  int32_t remain = pQuery->sdata[0]->num;
2822
  int32_t offset = 0;
2823

2824 2825 2826 2827 2828
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2829

2830
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2831
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2832

2833
    // pagewise copy to dest buffer
2834
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2835
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2836
      buf->num = r;
2837

2838 2839
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2840
    }
2841

2842 2843 2844
    offset += r;
    remain -= r;
  }
2845

2846 2847 2848 2849 2850
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2851
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2852
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2853 2854 2855
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2856

2857
    pQuery->sdata[k]->num = 0;
2858 2859 2860
  }
}

2861 2862 2863 2864 2865 2866 2867
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
  // order has change already!
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2868 2869 2870 2871 2872 2873 2874
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887
  
  pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2888 2889 2890 2891 2892
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2893

2894
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2895

2896
    // open/close the specified query for each group result
2897
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2898
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2899

2900 2901
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2902 2903 2904 2905 2906 2907 2908 2909
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2910 2911
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2912
  SQuery *pQuery = pRuntimeEnv->pQuery;
2913
  int32_t order = pQuery->order.order;
2914

2915 2916
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
2917
  if (pRuntimeEnv->groupbyNormalCol || isIntervalQuery(pQuery)) {
2918
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
2919
  } else {  // for simple result of table query,
2920
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
2921
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2922

2923
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
2924 2925 2926
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
2927

2928 2929
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2930 2931 2932 2933 2934 2935
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
hjxilinx 已提交
2936
  
H
Haojun Liao 已提交
2937
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
hjxilinx 已提交
2938 2939
  
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
2940
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
hjxilinx 已提交
2941 2942 2943
    
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
2944 2945
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
hjxilinx 已提交
2946 2947
    }
  }
2948 2949
}

2950
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
2951
  SQuery *pQuery = pRuntimeEnv->pQuery;
2952
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
2953
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
2954 2955 2956 2957
  }
}

void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) {
2958
  int32_t numOfCols = pQuery->numOfOutput;
2959

2960 2961
  pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo));
  pResultRow->pos = *posInfo;
2962

2963 2964 2965 2966 2967 2968
  // set the intermediate result output buffer
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery);
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2969

2970
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2971 2972
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
2973

2974 2975 2976 2977 2978 2979
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
    resetResultInfo(&pRuntimeEnv->resultInfo[i]);
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
2980

2981
    // set the timestamp output buffer for top/bottom/diff query
2982
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2983 2984 2985
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
2986

2987
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
2988
  }
2989

2990 2991 2992 2993 2994
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
2995

2996
  // reset the execution contexts
2997
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2998
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2999
    assert(functionId != TSDB_FUNC_DIFF);
3000

3001 3002 3003 3004
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3005

3006 3007 3008 3009 3010 3011 3012 3013 3014 3015
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
3016

3017 3018 3019 3020 3021 3022
    resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo);
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3023

3024
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3025
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3026
    pRuntimeEnv->pCtx[j].currentStage = 0;
3027

H
Haojun Liao 已提交
3028 3029 3030 3031
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3032

3033 3034 3035 3036
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3037
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3038
  SQuery *pQuery = pRuntimeEnv->pQuery;
3039
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3040 3041
    return;
  }
3042

3043
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3044
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3045 3046
        pQuery->limit.offset - pQuery->rec.rows);
    
3047 3048
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3049

3050
    resetCtxOutputBuf(pRuntimeEnv);
3051

H
Haojun Liao 已提交
3052
    // clear the buffer full flag if exists
3053
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3054
  } else {
3055
    int64_t numOfSkip = pQuery->limit.offset;
3056
    pQuery->rec.rows -= numOfSkip;
3057 3058
    pQuery->limit.offset = 0;
  
3059
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3060 3061
           0, pQuery->rec.rows);
    
3062
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3063
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3064
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3065
      
H
Haojun Liao 已提交
3066 3067
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3068

3069
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3070
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3071 3072
      }
    }
3073

3074
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
3075 3076 3077 3078 3079 3080 3081 3082
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3083
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3084 3085 3086 3087 3088 3089
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3090

H
hjxilinx 已提交
3091
  bool toContinue = false;
H
Haojun Liao 已提交
3092
  if (pRuntimeEnv->groupbyNormalCol || isIntervalQuery(pQuery)) {
3093 3094
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3095

3096 3097 3098 3099 3100
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3101

3102
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3103

3104
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3105
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3106 3107 3108
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3109

3110 3111
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3112

3113 3114 3115 3116
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3117
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3118
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3119 3120 3121
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3122

3123 3124
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3125

3126 3127 3128
      toContinue |= (!pResInfo->complete);
    }
  }
3129

3130 3131 3132
  return toContinue;
}

H
Haojun Liao 已提交
3133
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3134
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3135 3136
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3137 3138 3139
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3140
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3141
      .status      = pQuery->status,
3142
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3143
      .lastKey     = start,
H
hjxilinx 已提交
3144
      .w           = pQuery->window,
H
Haojun Liao 已提交
3145
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3146
  };
3147

3148 3149 3150
  return info;
}

3151 3152 3153 3154
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3155 3156 3157 3158 3159
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3160

3161
  // reverse order time range
3162 3163 3164
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3165
  SWITCH_ORDER(pQuery->order.order);
3166
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3167

3168
  STsdbQueryCond cond = {
3169
      .twindow = pQuery->window,
H
hjxilinx 已提交
3170
      .order   = pQuery->order.order,
3171
      .colList = pQuery->colList,
3172 3173
      .numOfCols = pQuery->numOfCols,
  };
3174

3175 3176 3177 3178
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3179

3180 3181
  // add ref for table
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3182

3183 3184
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
3185
  disableFuncInReverseScan(pQInfo);
3186 3187
}

3188 3189
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3190
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3191

3192 3193
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3194

3195 3196 3197 3198
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3199

3200
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3201

3202 3203
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query
  // during reverse scan
H
hjxilinx 已提交
3204
  pTableQueryInfo->lastKey = pStatus->lastKey;
3205
  pQuery->status = pStatus->status;
3206
  
H
hjxilinx 已提交
3207
  pTableQueryInfo->win = pStatus->w;
3208
  pQuery->window = pTableQueryInfo->win;
3209 3210
}

3211
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3212
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3213
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3214 3215
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3216
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3217

3218
  // store the start query position
H
Haojun Liao 已提交
3219
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3220

3221 3222
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3223

3224 3225
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3226

3227 3228
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
H
hjxilinx 已提交
3229
      qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
3230
      qstatus.lastKey = pTableQueryInfo->lastKey;
3231
    }
3232

3233
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3234
      // restore the status code and jump out of loop
3235
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3236
        pQuery->status = qstatus.status;
3237
      }
3238

3239 3240
      break;
    }
3241

3242
    STsdbQueryCond cond = {
3243
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3244
        .order   = pQuery->order.order,
3245
        .colList = pQuery->colList,
3246
        .numOfCols = pQuery->numOfCols,
3247
    };
3248

3249 3250
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3251
    }
3252

3253
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
3254
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3255

3256 3257
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3258
    
3259
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3260
        cond.twindow.skey, cond.twindow.ekey);
3261

3262
    // check if query is killed or not
3263
    if (isQueryKilled(pQInfo)) {
3264 3265 3266
      return;
    }
  }
3267

H
hjxilinx 已提交
3268
  if (!needReverseScan(pQuery)) {
3269 3270
    return;
  }
3271

3272
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3273

3274
  // reverse scan from current position
3275
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3276
  doScanAllDataBlocks(pRuntimeEnv);
3277 3278

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3279 3280
}

H
hjxilinx 已提交
3281
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3282
  SQuery *pQuery = pRuntimeEnv->pQuery;
3283

H
Haojun Liao 已提交
3284
  if (pRuntimeEnv->groupbyNormalCol || isIntervalQuery(pQuery)) {
3285 3286
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3287
    if (pRuntimeEnv->groupbyNormalCol) {
3288 3289
      closeAllTimeWindow(pWindowResInfo);
    }
3290

3291 3292 3293 3294 3295
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3296

3297
      setWindowResOutputBuf(pRuntimeEnv, buf);
3298

3299
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3300
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3301
      }
3302

3303 3304 3305 3306 3307 3308
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3309

3310
  } else {
3311
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3312
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3313 3314 3315 3316 3317
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3318
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3319
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3320

3321 3322 3323 3324
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3325

3326 3327 3328
  return false;
}

H
Haojun Liao 已提交
3329
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3330 3331
  SQuery* pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
3332
  STableQueryInfo *pTableQueryInfo = buf;//calloc(1, sizeof(STableQueryInfo));
3333

H
hjxilinx 已提交
3334 3335
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3336

3337
  pTableQueryInfo->pTable = pTable;
3338
  pTableQueryInfo->cur.vgroupIndex = -1;
3339

H
Haojun Liao 已提交
3340 3341 3342
  int32_t initialSize      = 1;
  int32_t initialThreshold = 1;

H
Haojun Liao 已提交
3343 3344
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3345 3346 3347 3348 3349
    initialSize = 20;
    initialThreshold = 100;
  }

  initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
3350 3351 3352
  return pTableQueryInfo;
}

3353
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) {
3354 3355 3356
  if (pTableQueryInfo == NULL) {
    return;
  }
3357

3358 3359 3360
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols);
}

H
Haojun Liao 已提交
3361 3362 3363 3364 3365 3366 3367
#define SET_CURRENT_QUERY_TABLE_INFO(_runtime, _tableInfo)                                      \
  do {                                                                                          \
    SQuery *_query = (_runtime)->pQuery;                                                        \
    _query->current = _tableInfo;                                                               \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_query)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_query))); \
  } while (0)
3368 3369 3370 3371

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3372
 * @param pDataBlockInfo
3373
 */
H
Haojun Liao 已提交
3374
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3375
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3376 3377 3378
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3379 3380
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3381 3382 3383 3384

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3385

H
Haojun Liao 已提交
3386 3387 3388
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3389

H
Haojun Liao 已提交
3390
  int32_t GROUPRESULTID = 1;
3391
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex, sizeof(groupIndex));
3392 3393 3394
  if (pWindowRes == NULL) {
    return;
  }
3395

3396 3397 3398 3399 3400 3401 3402 3403 3404 3405
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) !=
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3406

H
Haojun Liao 已提交
3407 3408
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3409 3410 3411 3412
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3413
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3414
  SQuery *pQuery = pRuntimeEnv->pQuery;
3415

3416
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
3417
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3418 3419
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
3420

3421
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3422 3423 3424
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3425

3426 3427 3428 3429 3430
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3431

3432 3433 3434 3435 3436 3437
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3438 3439
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3440

H
Haojun Liao 已提交
3441 3442 3443 3444 3445
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3446
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3447 3448
      continue;
    }
3449

H
Haojun Liao 已提交
3450 3451
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult);
    pCtx->currentStage = 0;
3452

H
Haojun Liao 已提交
3453 3454 3455 3456
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3457

H
Haojun Liao 已提交
3458 3459 3460 3461 3462
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3463

H
Haojun Liao 已提交
3464 3465 3466 3467 3468 3469
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3470
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3471
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3472

3473
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3474

3475 3476
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3477
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3478
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3479

3480
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3481

3482 3483 3484 3485 3486 3487
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3488

3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3501
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3502 3503
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3504 3505
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3506 3507 3508
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3509
    pTableQueryInfo->win.skey = key;
3510
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3511

3512 3513 3514 3515 3516
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3517

3518 3519 3520 3521 3522 3523
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3524
    STimeWindow     w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3525
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3526

H
Haojun Liao 已提交
3527 3528
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3529
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &realWin, &w);
3530
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3531

3532 3533
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3534
        assert(win.ekey == pQuery->window.ekey);
3535
      }
3536 3537
      
      pWindowResInfo->prevSKey = w.skey;
3538
    }
3539

3540
    pTableQueryInfo->queryRangeSet = 1;
3541
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3542 3543 3544 3545
  }
}

bool requireTimestamp(SQuery *pQuery) {
3546
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3547
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3561 3562 3563
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3564 3565
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3566 3567 3568 3569 3570
  return loadPrimaryTS;
}

static int32_t getNumOfSubset(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3571

3572
  int32_t totalSubset = 0;
H
Haojun Liao 已提交
3573
  if (pQInfo->runtimeEnv.groupbyNormalCol || (isIntervalQuery(pQuery))) {
3574 3575
    totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  } else {
H
Haojun Liao 已提交
3576
    totalSubset = GET_NUM_OF_TABLEGROUP(pQInfo);
3577
  }
3578

3579 3580 3581 3582 3583 3584
  return totalSubset;
}

static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3585

3586 3587 3588
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3589

3590
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3591
  int32_t totalSubset = getNumOfSubset(pQInfo);
3592

3593
  if (orderType == TSDB_ORDER_ASC) {
3594
    startIdx = pQInfo->groupIndex;
3595 3596
    step = 1;
  } else {  // desc order copy all data
3597
    startIdx = totalSubset - pQInfo->groupIndex - 1;
3598 3599
    step = -1;
  }
3600

3601 3602 3603
  for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) {
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3604
      pQInfo->groupIndex += 1;
3605 3606
      continue;
    }
3607

dengyihao's avatar
dengyihao 已提交
3608
    assert(pQInfo->offset <= 1);
3609

3610 3611
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3612

3613 3614 3615 3616
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3617 3618 3619 3620 3621
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3622
      pQInfo->groupIndex += 1;
3623
    }
3624

3625
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3626
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3627

3628 3629 3630 3631
      char *out = pQuery->sdata[j]->data + numOfResult * size;
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]);
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3632

3633
    numOfResult += numOfRowsToCopy;
3634 3635 3636
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3637
  }
3638

3639
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3640 3641

#ifdef _DEBUG_VIEW
3642
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3658

3659
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3660
  int32_t numOfResult = doCopyToSData(pQInfo, result, orderType);
3661

3662
  pQuery->rec.rows += numOfResult;
3663

3664
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3665 3666
}

H
Haojun Liao 已提交
3667
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) {
3668
  SQuery *pQuery = pRuntimeEnv->pQuery;
3669

3670
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3671
  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
3672

3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683
    for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
      SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];

      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
        if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
          continue;
        }

        pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
      }
3684 3685 3686 3687
    }
  }
}

3688 3689
void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3690
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3691 3692
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3693
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3694
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3695

H
Haojun Liao 已提交
3696
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3697
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3698
  } else {
3699
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3700
  }
3701

H
hjxilinx 已提交
3702
  updateWindowResNumOfRes(pRuntimeEnv, pTableQueryInfo);
3703 3704
}

3705 3706 3707
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3708

H
Haojun Liao 已提交
3709
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3710 3711
    return false;
  }
3712

3713
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737
    // There are results not returned to client yet, so filling operation applied to the remain result is required
    // in the first place.
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3738
        (pRuntimeEnv->groupbyNormalCol || isIntervalQuery(pQuery)) &&
H
Haojun Liao 已提交
3739 3740 3741
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3742
  }
3743 3744

  return false;
3745 3746 3747
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3748
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3749

3750 3751
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3752

3753 3754 3755
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3756

weixin_48148422's avatar
weixin_48148422 已提交
3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3769 3770
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3771
    if (pQInfo->runtimeEnv.stableQuery) {
3772
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3773 3774 3775
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3776 3777 3778
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3779
    }
H
hjxilinx 已提交
3780
  }
3781 3782
}

H
Haojun Liao 已提交
3783
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3784
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3785
  SQuery *pQuery = pRuntimeEnv->pQuery;
3786 3787
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3788
  while (1) {
3789
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3790
    
3791
    // todo apply limit output function
3792 3793
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3794
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3795 3796
      return ret;
    }
3797

3798
    if (pQuery->limit.offset < ret) {
3799
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3800 3801
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3802 3803 3804
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3805 3806 3807
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3808
      }
3809
      
3810 3811 3812
      pQuery->limit.offset = 0;
      return ret;
    } else {
3813
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3814
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3815 3816
          pQuery->limit.offset - ret);
      
3817
      pQuery->limit.offset -= ret;
3818
      pQuery->rec.rows = 0;
3819 3820
      ret = 0;
    }
3821 3822

    if (!queryHasRemainResults(pRuntimeEnv)) {
3823 3824 3825 3826 3827
      return ret;
    }
  }
}

3828
static void queryCostStatis(SQInfo *pQInfo) {
3829
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3830 3831 3832 3833 3834 3835 3836
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
//  if (pRuntimeEnv->pResultBuf == NULL) {
////    pSummary->tmpBufferInDisk = 0;
//  } else {
////    pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf);
//  }
//
3837
//  qDebug("QInfo:%p cost: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo,
3838 3839
//         pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0);
//
3840
//  qDebug("QInfo:%p cost: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo,
3841 3842 3843
//         pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField,
//         pSummary->loadFieldUs / 1000.0);
//
3844
//  qDebug(
3845 3846 3847
//      "QInfo:%p cost: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes",
//      pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0,
//      pSummary->skippedFileBlocks, pSummary->totalGenData);
3848
  
3849
  qDebug("QInfo:%p :cost summary: elpased time:%"PRId64" us, total blocks:%d, use block statis:%d, use block data:%d, "
3850 3851
         "total rows:%"PRId64 ", check rows:%"PRId64, pQInfo, pSummary->elapsedTime, pSummary->totalBlocks,
         pSummary->loadBlockStatis, pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3852

3853
//  qDebug("QInfo:%p cost: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk);
3854
//
3855 3856
//  qDebug("QInfo:%p cost: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables);
//  qDebug("QInfo:%p cost: seek ops:%d", pQInfo, pSummary->numOfSeek);
3857 3858 3859
//
//  double total = pSummary->fileTimeUs + pSummary->cacheTimeUs;
//  double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs;
3860 3861
  
  // todo add the intermediate result save cost!!
3862 3863
//  double computing = total - io;
//
3864
//  qDebug(
3865 3866 3867 3868 3869
//      "QInfo:%p cost: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%),"
//      "comput:%.2fms(%.2f%)",
//      pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total,
//      pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total,
//      computing / 1000.0, computing * 100 / total);
3870 3871
}

3872 3873
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3874 3875
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3876
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3877

3878
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3879
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3880 3881 3882
    pQuery->limit.offset = 0;
    return;
  }
3883

3884 3885 3886 3887 3888
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3889

3890
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3891

3892
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3893
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3894

3895
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3896
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3897 3898

  // update the offset value
H
hjxilinx 已提交
3899
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3900
  pQuery->limit.offset = 0;
3901

H
hjxilinx 已提交
3902
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3903

3904
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
3905
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3906
}
3907

3908 3909 3910 3911 3912
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3913
  }
3914

3915 3916 3917
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
3918
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3919
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
3920

3921 3922 3923
  while (tsdbNextDataBlock(pQueryHandle)) {
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
      return;
3924
    }
3925

3926
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
3927

3928 3929
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
3930 3931
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
3932

3933
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
3934 3935
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
3936 3937 3938
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
3939
  }
3940
}
3941

H
Haojun Liao 已提交
3942
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
3943
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3944
  *start = pQuery->current->lastKey;
3945

3946
  // if queried with value filter, do NOT forward query start position
3947
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
3948
    return true;
3949
  }
3950

3951 3952 3953 3954 3955
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
3956
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
3957

H
Haojun Liao 已提交
3958
  STimeWindow w = TSWINDOW_INITIALIZER, realWin = TSWINDOW_INITIALIZER;
3959
  
3960
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
3961
  STableQueryInfo *pTableQueryInfo = pQuery->current;
3962

3963 3964
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
    SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle);
3965

H
Haojun Liao 已提交
3966 3967
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
3968
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &realWin, &w);
H
Haojun Liao 已提交
3969 3970 3971
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
3972
    } else {
H
Haojun Liao 已提交
3973
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &realWin, &w);
3974

3975 3976 3977
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
3978

3979 3980
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
3981

3982 3983 3984 3985 3986 3987
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
3988

3989 3990
      STimeWindow tw = win;
      getNextTimeWindow(pQuery, &tw);
3991

3992
      if (pQuery->limit.offset == 0) {
3993 3994
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
3995 3996
          // load the data block and check data remaining in current data block
          // TODO optimize performance
3997 3998 3999
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4000 4001 4002
          tw = win;
          int32_t startPos =
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
4003 4004 4005 4006
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
4007 4008 4009 4010 4011 4012
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4013
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4014 4015
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4016
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4017 4018
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4019
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4020 4021
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4022
          return true;
H
Haojun Liao 已提交
4023 4024 4025 4026
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4027
          return true;
4028 4029 4030
        }
      }

H
Haojun Liao 已提交
4031 4032 4033 4034 4035 4036 4037
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey);
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4050
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4051 4052
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4053
      } else {
H
Haojun Liao 已提交
4054
        break;  // offset is not 0, and next time window begins or ends in the next block.
4055 4056 4057
      }
    }
  }
4058

4059 4060 4061
  return true;
}

B
Bomin Zhang 已提交
4062 4063
static void setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4064 4065
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4066 4067 4068 4069 4070 4071 4072
  if (onlyQueryTags(pQuery)) {
    return;
  }

  if (isSTableQuery && (!isIntervalQuery(pQuery)) && (!isFixedOutputQuery(pQuery))) {
    return;
  }
4073 4074

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4075 4076 4077 4078
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4079
  };
weixin_48148422's avatar
weixin_48148422 已提交
4080

B
Bomin Zhang 已提交
4081
  if (!isSTableQuery
4082
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4083 4084 4085 4086 4087
    && (cond.order == TSDB_ORDER_ASC) 
    && (!isIntervalQuery(pQuery))
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
    && (!isFixedOutputQuery(pQuery))
  ) {
H
Haojun Liao 已提交
4088
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4089 4090
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4091
  }
B
Bomin Zhang 已提交
4092

H
Haojun Liao 已提交
4093
  if (isFirstLastRowQuery(pQuery)) {
4094
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4095
  } else if (isPointInterpoQuery(pQuery)) {
4096
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4097
  } else {
4098
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4099
  }
B
Bomin Zhang 已提交
4100 4101
}

4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4115
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4116 4117 4118 4119 4120 4121 4122
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4123
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4124 4125
  int32_t code = TSDB_CODE_SUCCESS;
  
4126 4127 4128
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
4129
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
4130 4131

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4132
  changeExecuteScanOrder(pQInfo, false);
B
Bomin Zhang 已提交
4133
  setupQueryHandle(tsdb, pQInfo, isSTableQuery);
4134
  
4135
  pQInfo->tsdb = tsdb;
4136
  pQInfo->vgId = vgId;
4137 4138

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4139
  pRuntimeEnv->pTSBuf = pTsBuf;
4140
  pRuntimeEnv->cur.vgroupIndex = -1;
4141
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4142
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4143
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4144

H
Haojun Liao 已提交
4145
  if (pTsBuf != NULL) {
4146 4147 4148 4149 4150 4151 4152 4153 4154 4155
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

H
Haojun Liao 已提交
4156
  pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->topBotQuery, isSTableQuery);
4157 4158 4159

  if (isSTableQuery) {
    int32_t rows = getInitialPageNum(pQInfo);
4160
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4161 4162 4163 4164 4165 4166 4167
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    if (pQuery->intervalTime == 0) {
      int16_t type = TSDB_DATA_TYPE_NULL;

H
Haojun Liao 已提交
4168
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4169 4170 4171 4172 4173 4174 4175 4176
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
      }

      initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type);
    }

H
Haojun Liao 已提交
4177
  } else if (pRuntimeEnv->groupbyNormalCol || isIntervalQuery(pQuery)) {
4178
    int32_t rows = getInitialPageNum(pQInfo);
4179
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize, pQInfo);
4180 4181 4182 4183 4184
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4185
    if (pRuntimeEnv->groupbyNormalCol) {
4186 4187 4188 4189 4190 4191 4192 4193
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

    initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type);
  }

4194
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4195 4196
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, 0, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4197 4198
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4199
  }
4200

4201 4202
  // todo refactor
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
H
Haojun Liao 已提交
4203
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4204

H
Haojun Liao 已提交
4205
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4206
  return TSDB_CODE_SUCCESS;
4207 4208
}

4209
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4210
  SQuery *pQuery = pRuntimeEnv->pQuery;
4211

4212
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4213 4214 4215 4216 4217 4218 4219
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4220
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4221
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4222 4223
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4224
  
H
hjxilinx 已提交
4225
  int64_t st = taosGetTimestampMs();
4226

4227
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
4228
  while (tsdbNextDataBlock(pQueryHandle)) {
4229
    summary->totalBlocks += 1;
4230
    if (isQueryKilled(pQInfo)) {
4231 4232
      break;
    }
4233

4234
    SDataBlockInfo  blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle);
H
Haojun Liao 已提交
4235 4236 4237 4238
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4239

H
Haojun Liao 已提交
4240
    assert(*pTableQueryInfo != NULL);
H
Haojun Liao 已提交
4241
    SET_CURRENT_QUERY_TABLE_INFO(pRuntimeEnv, *pTableQueryInfo);
4242

4243
    SDataStatis *pStatis = NULL;
H
hjxilinx 已提交
4244
    SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis);
4245

H
Haojun Liao 已提交
4246
    if (!pRuntimeEnv->groupbyNormalCol) {
4247 4248
      if (!isIntervalQuery(pQuery)) {
        int32_t step = QUERY_IS_ASC_QUERY(pQuery)? 1:-1;
H
Haojun Liao 已提交
4249
        setExecutionContext(pQInfo, (*pTableQueryInfo)->groupIndex, blockInfo.window.ekey + step);
4250 4251 4252
      } else {  // interval query
        TSKEY nextKey = blockInfo.window.skey;
        setIntervalQueryRange(pQInfo, nextKey);
H
Haojun Liao 已提交
4253 4254 4255 4256

        if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
          setAdditionalInfo(pQInfo, (*pTableQueryInfo)->pTable, *pTableQueryInfo);
        }
4257
      }
4258
    }
4259

4260 4261 4262
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4263
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4264
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4265
  }
4266

H
hjxilinx 已提交
4267 4268
  int64_t et = taosGetTimestampMs();
  return et - st;
4269 4270
}

4271 4272
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4273
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4274

4275
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4276
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4277
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4278

4279
  setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
4280

H
Haojun Liao 已提交
4281
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4282
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4283
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4284

4285
  STsdbQueryCond cond = {
4286
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4287 4288
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4289
      .numOfCols = pQuery->numOfCols,
4290
  };
4291

H
hjxilinx 已提交
4292
  // todo refactor
4293
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
4294
  SArray *tx = taosArrayInit(1, POINTER_BYTES);
4295

4296
  taosArrayPush(tx, &pCheckInfo->pTable);
4297
  taosArrayPush(g1, &tx);
4298
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4299

4300
  // include only current table
4301 4302 4303 4304
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4305

H
Haojun Liao 已提交
4306
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4307 4308
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
4309

4310
  if (pRuntimeEnv->pTSBuf != NULL) {
4311
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4312 4313
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4314

4315 4316 4317 4318 4319 4320 4321 4322
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4323

4324
  initCtxOutputBuf(pRuntimeEnv);
4325 4326 4327 4328 4329 4330 4331 4332 4333 4334
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4335
static void sequentialTableProcess(SQInfo *pQInfo) {
4336
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4337
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4338
  setQueryStatus(pQuery, QUERY_COMPLETED);
4339

H
Haojun Liao 已提交
4340
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4341

H
Haojun Liao 已提交
4342
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4343 4344
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4345

4346
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4347
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4348

4349
      qDebug("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4350
             numOfGroups, group);
H
Haojun Liao 已提交
4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4371
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4372
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4373
      } else {
H
Haojun Liao 已提交
4374
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4375
      }
H
Haojun Liao 已提交
4376 4377
      
      initCtxOutputBuf(pRuntimeEnv);
4378
      
4379
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4380 4381
      assert(taosArrayGetSize(s) >= 1);
      
4382
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4383 4384 4385
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4386

dengyihao's avatar
dengyihao 已提交
4387
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4388

H
Haojun Liao 已提交
4389
      // here we simply set the first table as current table
4390 4391 4392
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4393
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4406 4407 4408 4409 4410 4411

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4412
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4413
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4414
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4415

4416
      qDebug("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);

4439
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4440 4441
      assert(taosArrayGetSize(s) >= 1);

4442
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4443 4444 4445 4446 4447 4448 4449 4450

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4451
      taosArrayDestroy(s); 
4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4466
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
      copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4481 4482 4483
    }
  } else {
    /*
4484
     * 1. super table projection query, 2. ts-comp query
4485 4486 4487
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4488
    if (pQInfo->groupIndex > 0) {
4489
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4490
      pQuery->rec.total += pQuery->rec.rows;
4491

4492
      if (pQuery->rec.rows > 0) {
4493 4494 4495
        return;
      }
    }
4496

4497
    // all data have returned already
4498
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4499 4500
      return;
    }
4501

4502 4503
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4504

H
Haojun Liao 已提交
4505
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4506 4507
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4508

4509
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
4510
      if (isQueryKilled(pQInfo)) {
4511 4512
        return;
      }
4513

4514
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4515
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4516
        pQInfo->tableIndex++;
4517 4518
        continue;
      }
4519

H
hjxilinx 已提交
4520
      // TODO handle the limit offset problem
4521
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4522
        //        skipBlocks(pRuntimeEnv);
4523 4524
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4525 4526 4527
          continue;
        }
      }
4528

4529
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4530
      skipResults(pRuntimeEnv);
4531

4532
      // the limitation of output result is reached, set the query completed
4533
      if (limitResults(pRuntimeEnv)) {
4534
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4535 4536
        break;
      }
4537

4538 4539
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4540

4541
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4542 4543 4544 4545 4546 4547
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4548
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4549

H
Haojun Liao 已提交
4550
        STableIdInfo tidInfo = {0};
4551

H
Haojun Liao 已提交
4552 4553 4554
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4555
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4556 4557
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4558
        // if the buffer is full or group by each table, we need to jump out of the loop
4559 4560
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL) /*||
            isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)*/) {
4561 4562
          break;
        }
4563

4564
      } else {
4565
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4566 4567
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4568 4569
          continue;
        } else {
4570 4571 4572
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4573 4574 4575
        }
      }
    }
H
Haojun Liao 已提交
4576

4577
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4578 4579
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4580
  }
4581

4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4594
    finalizeQueryResult(pRuntimeEnv);
4595
  }
4596

4597 4598 4599
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4600

4601
  qDebug(
B
Bomin Zhang 已提交
4602
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4603
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4604
      pQuery->limit.offset);
4605 4606
}

4607 4608 4609 4610
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4611 4612 4613 4614
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4615
  if (pRuntimeEnv->pTSBuf != NULL) {
4616
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4617
  }
4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
  
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4630 4631

  pRuntimeEnv->prevGroupId = INT32_MIN;
4632
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4633 4634 4635 4636
  
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
H
hjxilinx 已提交
4637 4638
}

4639 4640 4641 4642
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4643
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4644

4645
  if (pRuntimeEnv->pTSBuf != NULL) {
4646
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4647
  }
4648

4649
  switchCtxOrder(pRuntimeEnv);
4650 4651 4652
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4653 4654 4655
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4656 4657
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4658
  if (isIntervalQuery(pQuery)) {
H
Haojun Liao 已提交
4659
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4660
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4661
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4662

4663
      size_t num = taosArrayGetSize(group);
4664
      for (int32_t j = 0; j < num; ++j) {
4665 4666
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
H
Haojun Liao 已提交
4667
        removeRedundantWindow(&item->windowResInfo, item->lastKey - step, step);
4668
      }
H
hjxilinx 已提交
4669 4670 4671 4672 4673 4674 4675
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4676 4677 4678
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4679
  if (pQInfo->groupIndex > 0) {
4680
    /*
4681
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4682 4683
     * copy the data into output buffer
     */
H
hjxilinx 已提交
4684
    if (isIntervalQuery(pQuery)) {
4685 4686
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4687
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4688 4689 4690 4691
#endif
    } else {
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
    }
4692

4693
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4694 4695
    return;
  }
4696

4697
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4698 4699
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4700
  // do check all qualified data blocks
H
Haojun Liao 已提交
4701
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4702
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4703

H
hjxilinx 已提交
4704 4705
  // query error occurred or query is killed, abort current execution
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
4706
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4707
    return;
4708
  }
4709

H
hjxilinx 已提交
4710 4711
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4712

H
hjxilinx 已提交
4713 4714
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4715

H
Haojun Liao 已提交
4716
    el = scanMultiTableDataBlocks(pQInfo);
4717
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4718

H
hjxilinx 已提交
4719
    doRestoreContext(pQInfo);
H
Haojun Liao 已提交
4720
    doCloseAllTimeWindowAfterScan(pQInfo);
H
hjxilinx 已提交
4721
  } else {
4722
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4723
  }
4724

4725
  setQueryStatus(pQuery, QUERY_COMPLETED);
4726

H
hjxilinx 已提交
4727
  if (pQInfo->code != TSDB_CODE_SUCCESS || isQueryKilled(pQInfo)) {
4728
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
hjxilinx 已提交
4729 4730
    return;
  }
4731

H
Haojun Liao 已提交
4732
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4733
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4734
      copyResToQueryResultBuf(pQInfo, pQuery);
4735 4736

#ifdef _DEBUG_VIEW
4737
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4738 4739 4740 4741 4742
#endif
    }
  } else {  // not a interval query
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
  }
4743

4744
  // handle the limitation of output buffer
4745
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4746 4747 4748 4749 4750 4751 4752 4753
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4754
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4755
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4756 4757
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4758 4759 4760 4761
  if (!isTopBottomQuery(pQuery) && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
    return;
  }
  
H
hjxilinx 已提交
4762 4763
  pQuery->current = pTableInfo;  // set current query table info
  
4764
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4765
  finalizeQueryResult(pRuntimeEnv);
4766

4767
  if (isQueryKilled(pQInfo)) {
4768 4769
    return;
  }
4770

H
Haojun Liao 已提交
4771
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4772
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4773

4774
  skipResults(pRuntimeEnv);
4775
  limitResults(pRuntimeEnv);
4776 4777
}

H
hjxilinx 已提交
4778
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4779
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4780 4781 4782 4783
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4784 4785 4786 4787
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4788

4789 4790 4791 4792 4793 4794
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4795 4796

  while (1) {
4797
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4798
    finalizeQueryResult(pRuntimeEnv);
4799

4800
    if (isQueryKilled(pQInfo)) {
4801 4802 4803
      return;
    }

4804 4805
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4806
      skipResults(pRuntimeEnv);
4807 4808 4809
    }

    /*
H
hjxilinx 已提交
4810 4811
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4812
     */
4813
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4814 4815 4816
      break;
    }

4817
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
4818
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4819 4820 4821 4822

    resetCtxOutputBuf(pRuntimeEnv);
  }

4823
  limitResults(pRuntimeEnv);
4824
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4825
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
4826
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4827 4828
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
H
Haojun Liao 已提交
4829
    STableId* id = TSDB_TABLEID(pQuery->current);
4830

H
Haojun Liao 已提交
4831 4832
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4833 4834
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
4835 4836
  }

4837 4838 4839
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
4840 4841
}

H
Haojun Liao 已提交
4842
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
4843
  SQuery *pQuery = pRuntimeEnv->pQuery;
4844

4845
  while (1) {
4846
    scanOneTableDataBlocks(pRuntimeEnv, start);
4847

4848
    if (isQueryKilled(GET_QINFO_ADDR(pRuntimeEnv))) {
4849 4850
      return;
    }
4851

4852
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
4853
    finalizeQueryResult(pRuntimeEnv);
4854

4855 4856 4857
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
4858
        pQuery->fillType == TSDB_FILL_NONE) {
4859 4860
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
4861

4862 4863 4864 4865
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
4866

4867
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
4868 4869 4870 4871 4872
      break;
    }
  }
}

4873
// handle time interval query on table
H
hjxilinx 已提交
4874
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4875 4876
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
4877 4878
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
4879

H
Haojun Liao 已提交
4880
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
4881 4882
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
4883
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
4884
  skipTimeInterval(pRuntimeEnv, &newStartKey);
4885
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
4886 4887 4888 4889
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

4890
  while (1) {
H
Haojun Liao 已提交
4891
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
4892

H
hjxilinx 已提交
4893
    if (isIntervalQuery(pQuery)) {
4894
      pQInfo->groupIndex = 0;  // always start from 0
4895
      pQuery->rec.rows = 0;
4896
      copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4897

4898
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4899
    }
4900

4901
    // the offset is handled at prepare stage if no interpolation involved
4902
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
4903
      limitResults(pRuntimeEnv);
4904 4905
      break;
    } else {
H
Haojun Liao 已提交
4906
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
4907
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
4908
      numOfFilled = 0;
4909
      
H
Haojun Liao 已提交
4910
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
4911
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4912
        limitResults(pRuntimeEnv);
4913 4914
        break;
      }
4915

4916
      // no result generated yet, continue retrieve data
4917
      pQuery->rec.rows = 0;
4918 4919
    }
  }
4920

4921
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
4922
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
4923
    pQInfo->groupIndex = 0;
4924
    pQuery->rec.rows = 0;
4925
    copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4926
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4927
  }
4928

H
Haojun Liao 已提交
4929
  pQInfo->pointsInterpo += numOfFilled;
4930 4931
}

4932 4933 4934 4935
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4936
  if (queryHasRemainResults(pRuntimeEnv)) {
4937

H
Haojun Liao 已提交
4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
4950
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
4951
      return;
H
Haojun Liao 已提交
4952
    } else {
4953
      pQuery->rec.rows = 0;
4954
      pQInfo->groupIndex = 0;  // always start from 0
4955

4956 4957
      if (pRuntimeEnv->windowResInfo.size > 0) {
        copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
4958
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
4959

4960
        if (pQuery->rec.rows > 0) {
4961
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
4962 4963 4964

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
4965
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
4966 4967
          }

4968 4969 4970 4971 4972
          return;
        }
      }
    }
  }
4973

H
hjxilinx 已提交
4974
  // number of points returned during this query
4975
  pQuery->rec.rows = 0;
4976
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
4977
  
4978
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
4979
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
4980
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
4981
  
4982
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
4983
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
4984
    tableIntervalProcess(pQInfo, item);
4985
  } else if (isFixedOutputQuery(pQuery)) {
4986
    tableFixedOutputProcess(pQInfo, item);
4987 4988
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
4989
    tableMultiOutputProcess(pQInfo, item);
4990
  }
4991

4992
  // record the total elapsed time
4993
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
4994
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
4995 4996
}

4997
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
4998 4999
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5000
  pQuery->rec.rows = 0;
5001

5002
  int64_t st = taosGetTimestampUs();
5003

H
Haojun Liao 已提交
5004 5005
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
      (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5006
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5007
    multiTableQueryProcess(pQInfo);
5008
  } else {
5009
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5010
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5011

5012
    sequentialTableProcess(pQInfo);
5013
  }
5014

H
hjxilinx 已提交
5015
  // record the total elapsed time
5016
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5017 5018
}

5019
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5020
  int32_t j = 0;
5021

5022
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5023 5024 5025 5026
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5027 5028 5029 5030
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5031

5032 5033
      j += 1;
    }
5034

5035 5036 5037 5038 5039
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5040

5041
      j += 1;
5042 5043 5044
    }
  }

5045
  assert(0);
5046 5047
}

5048 5049 5050
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5051 5052
}

5053
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5054
  if (pQueryMsg->intervalTime < 0) {
5055
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5056
    return false;
5057 5058
  }

H
hjxilinx 已提交
5059
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5060
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5061
    return false;
5062 5063
  }

H
hjxilinx 已提交
5064
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5065
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5066
    return false;
5067 5068
  }

5069 5070
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5071
    return false;
5072 5073
  }

5074 5075 5076 5077 5078 5079 5080 5081 5082 5083
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5084 5085 5086 5087 5088
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5089
        continue;
5090
      }
5091

5092
      return false;
5093 5094
    }
  }
5095

5096
  return true;
5097 5098
}

5099
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5100
  assert(pQueryMsg->numOfTables > 0);
5101

weixin_48148422's avatar
weixin_48148422 已提交
5102
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5103

weixin_48148422's avatar
weixin_48148422 已提交
5104 5105
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5106

5107
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5108 5109
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5110

H
hjxilinx 已提交
5111 5112 5113
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5114

H
hjxilinx 已提交
5115 5116
  return pMsg;
}
5117

5118
/**
H
hjxilinx 已提交
5119
 * pQueryMsg->head has been converted before this function is called.
5120
 *
H
hjxilinx 已提交
5121
 * @param pQueryMsg
5122 5123 5124 5125
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5126
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5127
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5128 5129
  int32_t code = TSDB_CODE_SUCCESS;

5130 5131 5132 5133 5134 5135 5136 5137
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5138

5139 5140
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5141
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5142
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5143 5144

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5145
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5146
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5147 5148 5149
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5150
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5151
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5152
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5153

5154
  // query msg safety check
5155
  if (!validateQueryMsg(pQueryMsg)) {
5156 5157
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5158 5159
  }

H
hjxilinx 已提交
5160 5161
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5162 5163
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5164
    pColInfo->colId = htons(pColInfo->colId);
5165
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5166 5167
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5168

H
hjxilinx 已提交
5169
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5170

H
hjxilinx 已提交
5171
    int32_t numOfFilters = pColInfo->numOfFilters;
5172
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5173
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5174 5175 5176
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5177 5178 5179 5180
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5181 5182 5183

      pMsg += sizeof(SColumnFilterInfo);

5184 5185
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5186

5187
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5188 5189
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5190
      } else {
5191 5192
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5193 5194
      }

5195 5196
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5197 5198 5199
    }
  }

5200 5201
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5202

5203
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5204
    (*pExpr)[i] = pExprMsg;
5205

5206
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5207 5208 5209 5210
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5211

5212
    pMsg += sizeof(SSqlFuncMsg);
5213 5214

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5215
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5216 5217 5218 5219
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5220
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5221 5222 5223 5224 5225
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5226 5227
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5228
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5229 5230
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5231 5232
      }
    } else {
5233
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5234
//        return TSDB_CODE_QRY_INVALID_MSG;
5235
//      }
5236 5237
    }

5238
    pExprMsg = (SSqlFuncMsg *)pMsg;
5239
  }
5240

5241
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5242
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5243
    goto _cleanup;
5244
  }
5245

H
hjxilinx 已提交
5246
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5247

H
hjxilinx 已提交
5248
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5249
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5250 5251 5252 5253
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5254 5255 5256

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5257
      pMsg += sizeof((*groupbyCols)[i].colId);
5258 5259

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5260 5261
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5262
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5263 5264 5265 5266 5267
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5268

H
hjxilinx 已提交
5269 5270
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5271 5272
  }

5273 5274
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5275
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5276 5277

    int64_t *v = (int64_t *)pMsg;
5278
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5279 5280
      v[i] = htobe64(v[i]);
    }
5281

5282
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5283
  }
5284

5285 5286 5287 5288
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5289

5290 5291 5292 5293
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5294

5295
      (*tagCols)[i] = *pTagCol;
5296
      pMsg += sizeof(SColumnInfo);
5297
    }
H
hjxilinx 已提交
5298
  }
5299

5300 5301 5302 5303 5304 5305
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5306

weixin_48148422's avatar
weixin_48148422 已提交
5307
  if (*pMsg != 0) {
5308
    size_t len = strlen(pMsg) + 1;
5309

5310
    *tbnameCond = malloc(len);
5311 5312 5313 5314 5315
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5316
    strcpy(*tbnameCond, pMsg);
5317
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5318
  }
5319

5320
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5321 5322
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5323
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5324
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5325 5326

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5327 5328 5329 5330 5331 5332 5333 5334 5335

_cleanup:
  tfree(*pExpr);
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
  tfree(*tbnameCond);
  tfree(*groupbyCols);
  tfree(*tagCols);
  tfree(*tagCond);
5336 5337

  return code;
5338 5339
}

H
hjxilinx 已提交
5340
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5341
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5342 5343 5344 5345 5346 5347 5348 5349 5350

  tExprNode* pExprNode = NULL;
  TRY(32) {
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
    return code;
  } END_TRY

H
hjxilinx 已提交
5351
  if (pExprNode == NULL) {
5352
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5353
    return TSDB_CODE_QRY_APP_ERROR;
5354
  }
5355

5356
  pArithExprInfo->pExpr = pExprNode;
5357 5358 5359
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5360
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5361 5362
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5363
  int32_t code = TSDB_CODE_SUCCESS;
5364

H
Haojun Liao 已提交
5365
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5366
  if (pExprs == NULL) {
5367
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5368 5369 5370 5371 5372
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5373
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5374
    pExprs[i].base = *pExprMsg[i];
5375
    pExprs[i].bytes = 0;
5376 5377 5378 5379

    int16_t type = 0;
    int16_t bytes = 0;

5380
    // parse the arithmetic expression
5381
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5382
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5383

5384 5385 5386
      if (code != TSDB_CODE_SUCCESS) {
        tfree(pExprs);
        return code;
5387 5388
      }

5389
      type  = TSDB_DATA_TYPE_DOUBLE;
5390
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5391
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5392 5393 5394
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5395
    } else{
5396
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5397
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5398

dengyihao's avatar
dengyihao 已提交
5399
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5400 5401 5402 5403
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5404
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5405

H
Haojun Liao 已提交
5406 5407 5408
        type  = s.type;
        bytes = s.bytes;
      }
5409 5410
    }

5411 5412
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5413
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
5414
      tfree(pExprs);
5415
      return TSDB_CODE_QRY_INVALID_MSG;
5416 5417
    }

5418
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5419
      tagLen += pExprs[i].bytes;
5420
    }
5421
    assert(isValidDataType(pExprs[i].type));
5422 5423 5424
  }

  // TODO refactor
5425
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5426 5427
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5428

5429
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5430
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5431 5432 5433 5434 5435
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5436
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5437
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5438 5439 5440
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5441
  *pExprInfo = pExprs;
5442 5443 5444 5445

  return TSDB_CODE_SUCCESS;
}

5446
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5447 5448 5449 5450 5451
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5452
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5453
  if (pGroupbyExpr == NULL) {
5454
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5455 5456 5457 5458 5459 5460 5461
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5462 5463 5464 5465
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5466

5467 5468 5469
  return pGroupbyExpr;
}

5470
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5471
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5472
    if (pQuery->colList[i].numOfFilters > 0) {
5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5484
    if (pQuery->colList[i].numOfFilters > 0) {
5485 5486
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5487
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5488
      pFilterInfo->info = pQuery->colList[i];
5489

5490
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5491 5492 5493 5494
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5495
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5496 5497 5498 5499 5500

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5501
          qError("QInfo:%p invalid filter info", pQInfo);
5502
          return TSDB_CODE_QRY_INVALID_MSG;
5503 5504
        }

5505 5506
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5507

5508 5509 5510
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5511 5512

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5513
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5514
          return TSDB_CODE_QRY_INVALID_MSG;
5515 5516
        }

5517
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5518
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5519
          assert(rangeFilterArray != NULL);
5520
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5534
          assert(filterArray != NULL);
5535 5536 5537 5538
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5539
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5540
              return TSDB_CODE_QRY_INVALID_MSG;
5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5557
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5558
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5559

5560
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5561
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5562
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5563 5564
      continue;
    }
5565

5566
    // todo opt performance
H
Haojun Liao 已提交
5567 5568
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5569 5570
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5571 5572
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5573 5574 5575
          break;
        }
      }
5576 5577
      
      assert (f < pQuery->numOfCols);
5578
    } else {
5579 5580
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5581 5582
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5583 5584
          break;
        }
5585
      }
5586 5587
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5588 5589 5590 5591
    }
  }
}

weixin_48148422's avatar
weixin_48148422 已提交
5592

5593
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5594 5595 5596 5597 5598 5599 5600
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5601 5602
static void freeQInfo(SQInfo *pQInfo);

weixin_48148422's avatar
weixin_48148422 已提交
5603
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5604
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
5605 5606
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
5607
    return NULL;
5608 5609 5610 5611 5612 5613
  }

  SQuery *pQuery = calloc(1, sizeof(SQuery));
  pQInfo->runtimeEnv.pQuery = pQuery;

  int16_t numOfCols = pQueryMsg->numOfCols;
5614
  int16_t numOfOutput = pQueryMsg->numOfOutput;
5615

5616
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5617
  pQuery->numOfOutput     = numOfOutput;
5618 5619 5620
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5621
  pQuery->order.orderColId = pQueryMsg->orderColId;
5622 5623 5624 5625
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5626
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5627
  pQuery->fillType        = pQueryMsg->fillType;
5628
  pQuery->numOfTags       = pQueryMsg->numOfTags;
5629
  
5630
  // todo do not allocate ??
5631
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5632
  if (pQuery->colList == NULL) {
5633
    goto _cleanup;
5634
  }
5635

H
hjxilinx 已提交
5636
  for (int16_t i = 0; i < numOfCols; ++i) {
5637
    pQuery->colList[i] = pQueryMsg->colList[i];
5638
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5639
  }
5640

5641
  pQuery->tagColList = pTagCols;
5642

5643
  // calculate the result row size
5644 5645 5646
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5647
  }
5648

5649
  doUpdateExprColumnIndex(pQuery);
5650

5651
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5652
  if (ret != TSDB_CODE_SUCCESS) {
5653
    goto _cleanup;
5654 5655 5656
  }

  // prepare the result buffer
5657
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5658
  if (pQuery->sdata == NULL) {
5659
    goto _cleanup;
5660 5661
  }

H
hjxilinx 已提交
5662
  // set the output buffer capacity
H
hjxilinx 已提交
5663
  pQuery->rec.capacity = 4096;
5664
  pQuery->rec.threshold = 4000;
5665

5666
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5667
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5668 5669

    // allocate additional memory for interResults that are usually larger then final results
5670 5671
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5672
    if (pQuery->sdata[col] == NULL) {
5673
      goto _cleanup;
5674 5675 5676
    }
  }

5677
  if (pQuery->fillType != TSDB_FILL_NONE) {
5678 5679
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5680
      goto _cleanup;
5681 5682 5683
    }

    // the first column is the timestamp
5684
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5685 5686 5687
  }

  // to make sure third party won't overwrite this structure
5688
  pQInfo->signature = pQInfo;
5689

5690
  pQInfo->tableGroupInfo = *pTableGroupInfo;
dengyihao's avatar
dengyihao 已提交
5691 5692 5693 5694 5695 5696
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5697 5698 5699
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false);
  }
5700

weixin_48148422's avatar
weixin_48148422 已提交
5701 5702
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5703
  taosArraySort(pTableIdList, compareTableIdInfo);
5704

H
Haojun Liao 已提交
5705 5706 5707 5708
  // TODO optimize the STableQueryInfo malloc strategy
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5709
  for(int32_t i = 0; i < numOfGroups; ++i) {
5710
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5711

H
Haojun Liao 已提交
5712
    size_t s = taosArrayGetSize(pa);
5713
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
5714

H
hjxilinx 已提交
5715
    for(int32_t j = 0; j < s; ++j) {
5716
      void* pTable = taosArrayGetP(pa, j);
H
Haojun Liao 已提交
5717
      STableId* id = TSDB_TABLEID(pTable);
5718

H
Haojun Liao 已提交
5719
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5720 5721 5722
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5723
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5724
      }
5725

H
Haojun Liao 已提交
5726 5727
      void* buf = pQInfo->pBuf + index * sizeof(STableQueryInfo);
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, pTable, window, buf);
5728
      item->groupIndex = i;
H
hjxilinx 已提交
5729
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5730 5731
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5732
    }
5733

5734
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5735
  }
5736

weixin_48148422's avatar
weixin_48148422 已提交
5737 5738
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));

5739
  pQuery->pos = -1;
5740
  pQuery->window = pQueryMsg->window;
5741

5742
  if (sem_init(&pQInfo->dataReady, 0, 0) != 0) {
5743 5744
    int32_t code = TAOS_SYSTEM_ERROR(errno);
    qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code));
5745
    goto _cleanup;
5746
  }
5747

5748
  colIdCheck(pQuery);
5749

5750
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5751 5752
  return pQInfo;

5753
_cleanup:
dengyihao's avatar
dengyihao 已提交
5754
  freeQInfo(pQInfo);
5755 5756 5757
  return NULL;
}

H
hjxilinx 已提交
5758
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5759 5760 5761 5762
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5763

H
hjxilinx 已提交
5764 5765 5766 5767
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5768
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5769 5770 5771
  return (sig == (uint64_t)pQInfo);
}

H
Haojun Liao 已提交
5772
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param, _qinfo_free_fn_t fn) {
H
hjxilinx 已提交
5773
  int32_t code = TSDB_CODE_SUCCESS;
5774
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5775

H
hjxilinx 已提交
5776 5777
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5778
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5779
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5780

H
hjxilinx 已提交
5781
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
5782 5783
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
5784
  }
5785

5786 5787
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
5788
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5789
           pQuery->window.ekey, pQuery->order.order);
5790
    setQueryStatus(pQuery, QUERY_COMPLETED);
5791

5792 5793 5794
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
5795

5796 5797 5798
  pQInfo->param = param;
  pQInfo->freeFn = fn;

5799
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
5800
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
5801 5802 5803 5804 5805
    setQueryStatus(pQuery, QUERY_COMPLETED);
  
    sem_post(&pQInfo->dataReady);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
5806 5807

  // filter the qualified
5808
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
5809 5810
    goto _error;
  }
H
hjxilinx 已提交
5811
  
H
hjxilinx 已提交
5812 5813 5814 5815
  return code;

_error:
  // table query ref will be decrease during error handling
5816
  freeQInfo(pQInfo);
H
hjxilinx 已提交
5817 5818 5819 5820 5821 5822 5823
  return code;
}

static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
5824 5825

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
5826
  setQueryKilled(pQInfo);
5827

5828
  qDebug("QInfo:%p start to free QInfo", pQInfo);
5829
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
H
hjxilinx 已提交
5830 5831
    tfree(pQuery->sdata[col]);
  }
5832

H
hjxilinx 已提交
5833
  sem_destroy(&(pQInfo->dataReady));
5834
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
5835

H
hjxilinx 已提交
5836 5837 5838 5839 5840 5841
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
      tfree(pColFilter->pFilters);
    }
  }
5842

H
hjxilinx 已提交
5843
  if (pQuery->pSelectExpr != NULL) {
5844
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
5845
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
5846

H
hjxilinx 已提交
5847 5848 5849
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
5850
    }
5851

H
hjxilinx 已提交
5852 5853
    tfree(pQuery->pSelectExpr);
  }
5854

5855 5856
  if (pQuery->fillVal != NULL) {
    tfree(pQuery->fillVal);
H
hjxilinx 已提交
5857
  }
5858

5859
  // todo refactor, extract method to destroytableDataInfo
H
Haojun Liao 已提交
5860
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
5861
  for (int32_t i = 0; i < numOfGroups; ++i) {
5862
    SArray *p = GET_TABLEGROUP(pQInfo, i);
5863

5864 5865
    size_t num = taosArrayGetSize(p);
    for(int32_t j = 0; j < num; ++j) {
5866 5867 5868
      STableQueryInfo* item = taosArrayGetP(p, j);
      if (item != NULL) {
        destroyTableQueryInfo(item, pQuery->numOfOutput);
5869 5870
      }
    }
5871

H
hjxilinx 已提交
5872 5873
    taosArrayDestroy(p);
  }
5874

H
Haojun Liao 已提交
5875
  tfree(pQInfo->pBuf);
5876
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
5877
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
5878
  tsdbDestoryTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5879
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
5880
  
5881 5882 5883 5884
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
    tfree(pQuery->pGroupbyExpr);
  }
5885

5886 5887 5888 5889
  tfree(pQuery->tagColList);
  tfree(pQuery->pFilterInfo);
  tfree(pQuery->colList);
  tfree(pQuery->sdata);
5890

5891
  tfree(pQuery);
5892

5893
  qDebug("QInfo:%p QInfo is freed", pQInfo);
5894

5895
  // destroy signature, in order to avoid the query process pass the object safety check
H
hjxilinx 已提交
5896 5897 5898 5899
  memset(pQInfo, 0, sizeof(SQInfo));
  tfree(pQInfo);
}

H
hjxilinx 已提交
5900
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
5901 5902
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
5914
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
5915 5916 5917 5918
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
5919
  }
H
hjxilinx 已提交
5920
}
5921

H
hjxilinx 已提交
5922 5923 5924
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5925

H
hjxilinx 已提交
5926 5927 5928
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
5929

H
hjxilinx 已提交
5930 5931
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
5932 5933
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
5934
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
5935
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
5936 5937
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
5938 5939
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
5940
      }
H
Haojun Liao 已提交
5941

H
hjxilinx 已提交
5942 5943 5944
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
5945
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
5946
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
5947
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
5948 5949 5950
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
5951
    }
5952

H
hjxilinx 已提交
5953 5954 5955 5956
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
5957
  } else {
5958
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
5959
  }
5960

5961
  pQuery->rec.total += pQuery->rec.rows;
5962
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5963

5964
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
5965
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
5966 5967 5968
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
5969
  return TSDB_CODE_SUCCESS;
5970 5971
}

5972 5973 5974 5975 5976 5977 5978
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

H
Haojun Liao 已提交
5979 5980
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, _qinfo_free_fn_t fn,
    qinfo_t* pQInfo) {
5981
  assert(pQueryMsg != NULL && tsdb != NULL);
5982 5983

  int32_t code = TSDB_CODE_SUCCESS;
5984

weixin_48148422's avatar
weixin_48148422 已提交
5985
  char *        tagCond = NULL, *tbnameCond = NULL;
5986
  SArray *      pTableIdList = NULL;
5987
  SSqlFuncMsg **pExprMsg = NULL;
5988 5989
  SColIndex *   pGroupColIndex = NULL;
  SColumnInfo*  pTagColumnInfo = NULL;
dengyihao's avatar
dengyihao 已提交
5990 5991
  SExprInfo     *pExprs = NULL;
  SSqlGroupbyExpr *pGroupbyExpr = NULL;
5992

weixin_48148422's avatar
weixin_48148422 已提交
5993
  if ((code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo)) !=
5994
         TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
5995
    goto _over;
5996 5997
  }

H
hjxilinx 已提交
5998
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5999
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6000
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6001
    goto _over;
6002 6003
  }

H
hjxilinx 已提交
6004
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6005
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6006
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6007
    goto _over;
6008 6009
  }

H
Haojun Liao 已提交
6010
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6011
    goto _over;
6012 6013
  }

dengyihao's avatar
dengyihao 已提交
6014
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6015
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6016
    goto _over;
6017
  }
6018

H
hjxilinx 已提交
6019
  bool isSTableQuery = false;
6020
  STableGroupInfo tableGroupInfo = {0};
6021
  
H
Haojun Liao 已提交
6022
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6023
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6024

6025
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
6026
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6027
      goto _over;
6028
    }
H
Haojun Liao 已提交
6029
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6030
    isSTableQuery = true;
H
Haojun Liao 已提交
6031 6032 6033 6034
    // TODO: need a macro from TSDB to check if table is super table

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6035 6036 6037 6038 6039 6040 6041 6042
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
      
6043
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, tagCond, pQueryMsg->tagCondLen, pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex,
weixin_48148422's avatar
weixin_48148422 已提交
6044
                                          numOfGroupByCols);
6045 6046 6047
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6048
    } else {
6049 6050 6051 6052
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6053

6054
      qDebug("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6055
    }
H
hjxilinx 已提交
6056
  } else {
6057
    assert(0);
6058
  }
6059

6060
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6061 6062 6063 6064
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6065
  if ((*pQInfo) == NULL) {
6066
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6067
    goto _over;
6068
  }
6069

H
Haojun Liao 已提交
6070
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param, fn);
6071

H
hjxilinx 已提交
6072
_over:
dengyihao's avatar
dengyihao 已提交
6073 6074 6075
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6076 6077
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6078
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6079
  } 
dengyihao's avatar
dengyihao 已提交
6080 6081
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6082
  free(pExprMsg);
H
hjxilinx 已提交
6083
  taosArrayDestroy(pTableIdList);
6084

H
Haojun Liao 已提交
6085
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6086 6087
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
H
Haojun Liao 已提交
6088 6089 6090 6091 6092
  } else {
    SQInfo* pq = (SQInfo*) (*pQInfo);

    T_REF_INC(pq);
    T_REF_INC(pq);
6093 6094
  }

6095
  // if failed to add ref for all meters in this query, abort current query
6096
  return code;
H
hjxilinx 已提交
6097 6098
}

H
Haojun Liao 已提交
6099 6100
static void doDestoryQueryInfo(SQInfo* pQInfo) {
  assert(pQInfo != NULL);
6101
  qDebug("QInfo:%p query completed", pQInfo);
H
Haojun Liao 已提交
6102
  queryCostStatis(pQInfo);   // print the query cost summary
6103 6104 6105
  freeQInfo(pQInfo);
}

H
Haojun Liao 已提交
6106
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6107 6108 6109 6110 6111
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

dengyihao's avatar
dengyihao 已提交
6112
  int32_t ref = T_REF_DEC(pQInfo);
6113
  qDebug("QInfo:%p dec refCount, value:%d", pQInfo, ref);
H
Haojun Liao 已提交
6114

H
Haojun Liao 已提交
6115
  if (ref == 0) {
6116
    _qinfo_free_fn_t freeFp = pQInfo->freeFn;
H
Hongze Cheng 已提交
6117
    void* param = pQInfo->param;
H
Haojun Liao 已提交
6118 6119

    doDestoryQueryInfo(pQInfo);
6120
    if (freeFp != NULL) {
H
Hongze Cheng 已提交
6121
      assert(param != NULL);
6122
      freeFp(param);
H
Hongze Cheng 已提交
6123 6124
    }

H
Haojun Liao 已提交
6125 6126 6127
  }
}

6128
void qTableQuery(qinfo_t qinfo) {
6129 6130
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6131
  if (pQInfo == NULL || pQInfo->signature != pQInfo) {
6132
    qDebug("QInfo:%p has been freed, no need to execute", pQInfo);
H
hjxilinx 已提交
6133 6134
    return;
  }
6135

H
hjxilinx 已提交
6136
  if (isQueryKilled(pQInfo)) {
6137
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6138 6139

    sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
6140
    qDestroyQueryInfo(pQInfo);
H
hjxilinx 已提交
6141 6142
    return;
  }
6143

6144 6145
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6146 6147 6148 6149 6150 6151

    sem_post(&pQInfo->dataReady);
    qDestroyQueryInfo(pQInfo);
    return;
  }

H
Haojun Liao 已提交
6152 6153
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);

6154 6155 6156 6157 6158 6159 6160
  // error occurs, record the error code and return to client
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
    qDebug("QInfo:%p query abort due to error occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
    sem_post(&pQInfo->dataReady);
    qDestroyQueryInfo(pQInfo);

6161 6162 6163
    return;
  }

6164
  qDebug("QInfo:%p query task is launched", pQInfo);
6165

6166
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6167
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6168
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
H
hjxilinx 已提交
6169
    buildTagQueryResult(pQInfo);   // todo support the limit/offset
H
hjxilinx 已提交
6170
  } else if (pQInfo->runtimeEnv.stableQuery) {
6171
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6172
  } else {
6173
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6174
  }
6175

6176 6177 6178 6179 6180 6181 6182 6183 6184 6185
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (isQueryKilled(pQInfo)) {
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
    qDebug("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

H
hjxilinx 已提交
6186
  sem_post(&pQInfo->dataReady);
H
Haojun Liao 已提交
6187
  qDestroyQueryInfo(pQInfo);
H
hjxilinx 已提交
6188 6189
}

H
hjxilinx 已提交
6190
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) {
6191 6192
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6193
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6194
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6195
  }
6196

H
hjxilinx 已提交
6197
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6198
  if (isQueryKilled(pQInfo)) {
6199
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6200
    return pQInfo->code;
H
hjxilinx 已提交
6201
  }
6202

H
hjxilinx 已提交
6203
  sem_wait(&pQInfo->dataReady);
6204
  qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
6205 6206
         pQInfo->code);

H
hjxilinx 已提交
6207
  return pQInfo->code;
H
hjxilinx 已提交
6208
}
6209

H
hjxilinx 已提交
6210
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6211 6212
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6213
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
6214
    qDebug("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6215 6216
    return false;
  }
6217 6218

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6219
  bool ret = false;
H
hjxilinx 已提交
6220
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6221
    ret = false;
H
hjxilinx 已提交
6222
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6223
    ret = true;
H
hjxilinx 已提交
6224
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6225
    ret = true;
H
hjxilinx 已提交
6226 6227
  } else {
    assert(0);
6228
  }
H
Haojun Liao 已提交
6229 6230 6231

  if (ret) {
    T_REF_INC(pQInfo);
6232
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6233 6234 6235
  }

  return ret;
6236 6237
}

6238 6239 6240
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6241
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6242
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6243
  }
6244

6245
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6246 6247
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
weixin_48148422's avatar
weixin_48148422 已提交
6248 6249
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6250
  *contLen = size + sizeof(SRetrieveTableRsp);
6251

6252 6253
  // todo handle failed to allocate memory
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
6254
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6255

6256 6257 6258
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6259
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6260 6261 6262 6263
  } else {
    (*pRsp)->offset = 0;
    (*pRsp)->useconds = 0;
  }
6264 6265
  
  (*pRsp)->precision = htons(pQuery->precision);
6266
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6267
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6268
  } else {
H
hjxilinx 已提交
6269
    setQueryStatus(pQuery, QUERY_OVER);
6270
    code = pQInfo->code;
6271
  }
6272

H
hjxilinx 已提交
6273
  if (isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6274
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6275
  }
6276

H
hjxilinx 已提交
6277
  return code;
6278
}
H
hjxilinx 已提交
6279

H
Haojun Liao 已提交
6280
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6281 6282 6283 6284 6285 6286 6287
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
H
Haojun Liao 已提交
6288
  qDestroyQueryInfo(pQInfo);
H
Haojun Liao 已提交
6289 6290 6291
  return TSDB_CODE_SUCCESS;
}

H
hjxilinx 已提交
6292 6293 6294
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6295

H
Haojun Liao 已提交
6296
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6297
  assert(numOfGroup == 0 || numOfGroup == 1);
6298

H
Haojun Liao 已提交
6299
  if (numOfGroup == 0) {
6300 6301
    return;
  }
H
hjxilinx 已提交
6302
  
H
Haojun Liao 已提交
6303
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6304

H
Haojun Liao 已提交
6305
  size_t num = taosArrayGetSize(pa);
6306
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6307

H
Haojun Liao 已提交
6308
  int32_t count = 0;
6309 6310 6311
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6312

6313 6314
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6315
    count = 0;
6316

H
Haojun Liao 已提交
6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6328 6329
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6330
      STableQueryInfo *item = taosArrayGetP(pa, i);
6331

6332
      char *output = pQuery->sdata[0]->data + i * rsize;
6333
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6334

6335
      output = varDataVal(output);
H
Haojun Liao 已提交
6336
      STableId* id = TSDB_TABLEID(item->pTable);
6337

H
Haojun Liao 已提交
6338 6339
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6340

H
Haojun Liao 已提交
6341 6342
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6343

6344
      *(int32_t *)output = pQInfo->vgId;
6345
      output += sizeof(pQInfo->vgId);
6346

6347
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6348
        char *data = tsdbGetTableName(item->pTable);
6349
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6350
      } else {
6351
        char *val = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
6352 6353 6354 6355 6356 6357 6358 6359

        // todo refactor
        if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
          if (val == NULL) {
            setVardataNull(output, type);
          } else {
            memcpy(output, val, varDataTLen(val));
          }
H
[td-90]  
Haojun Liao 已提交
6360
        } else {
6361 6362
          if (val == NULL) {
            setNull(output, type, bytes);
H
Haojun Liao 已提交
6363
          } else {  // todo here stop will cause client crash
6364 6365
            memcpy(output, val, bytes);
          }
H
[td-90]  
Haojun Liao 已提交
6366 6367
        }
      }
6368

H
Haojun Liao 已提交
6369
      count += 1;
6370
    }
6371

6372
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6373

H
Haojun Liao 已提交
6374 6375 6376 6377 6378
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
6379
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6380
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6381
    count = 0;
H
Haojun Liao 已提交
6382
    SSchema tbnameSchema = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
6383 6384
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6385

6386
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6387
      STableQueryInfo* item = taosArrayGetP(pa, i);
6388

6389 6390
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6391
          char* data = tsdbGetTableName(item->pTable);
H
Haojun Liao 已提交
6392
          char* dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
H
hjxilinx 已提交
6393
          memcpy(dst, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6394 6395 6396 6397
        } else {// todo refactor
          int16_t type = pExprInfo[j].type;
          int16_t bytes = pExprInfo[j].bytes;
          
6398
          char* data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
H
Haojun Liao 已提交
6399
          char* dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6400

H
hjxilinx 已提交
6401
          if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
[td-90]  
Haojun Liao 已提交
6402 6403 6404 6405 6406
            if (data == NULL) {
              setVardataNull(dst, type);
            } else {
              memcpy(dst, data, varDataTLen(data));
            }
H
hjxilinx 已提交
6407
          } else {
H
[td-90]  
Haojun Liao 已提交
6408 6409 6410 6411 6412
            if (data == NULL) {
              setNull(dst, type, bytes);
            } else {
              memcpy(dst, data, pExprInfo[j].bytes);
            }
H
hjxilinx 已提交
6413
          }
6414
        }
H
hjxilinx 已提交
6415
      }
H
Haojun Liao 已提交
6416
      count += 1;
H
hjxilinx 已提交
6417
    }
6418

6419
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6420
  }
6421

H
Haojun Liao 已提交
6422
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6423
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6424 6425
}

6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
}

void* qOpenQueryMgmt(int32_t vgId) {
  const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

  SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt));

  pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryHandle->closed    = false;
  pthread_mutex_init(&pQueryHandle->lock, NULL);

  qDebug("vgId:%d, open querymgmt success", vgId);
  return pQueryHandle;
}

void qSetQueryMgmtClosed(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

  pthread_mutex_lock(&pQueryMgmt->lock);
  pQueryMgmt->closed = true;
  pthread_mutex_unlock(&pQueryMgmt->lock);

  taosCacheEmpty(pQueryMgmt->qinfoPool, true);
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
  tfree(pQueryMgmt);

  qDebug("vgId:%d querymgmt cleanup completed", vgId);
}

void** qRegisterQInfo(void* pMgmt, void* qInfo) {
  if (pMgmt == NULL) {
    return NULL;
  }

  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  pthread_mutex_lock(&pQueryMgmt->lock);
  if (pQueryMgmt->closed) {
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return NULL;
  } else {
    void** handle = taosCachePut(pQueryMgmt->qinfoPool, qInfo, POINTER_BYTES, &qInfo, POINTER_BYTES, tsShellActivityTimer*2);
    pthread_mutex_unlock(&pQueryMgmt->lock);

    return handle;
  }
}

void** qAcquireQInfo(void* pMgmt, void** key) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, key, POINTER_BYTES);
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) {
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, needFree);
  return 0;
}