qExecutor.c 228.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29 30
#include "tlosertree.h"
#include "tscompression.h"
31 32 33 34 35

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
36 37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f)    (((f)&TSDB_COL_TAG) != 0)
H
Haojun Liao 已提交
38 39 40
#define TSDB_COL_IS_NORMAL_COL(f)    ((f) == TSDB_COL_NORMAL)
#define TSDB_COL_IS_UD_COL(f)   ((f) == TSDB_COL_UDC)

41 42
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

43
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
44
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
45
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
46
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
47

H
Haojun Liao 已提交
48
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
49

50
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
51
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
52

H
Haojun Liao 已提交
53 54
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

S
TD-1057  
Shengliang Guan 已提交
55 56 57 58 59
#define TIME_WINDOW_COPY(_dst, _src)  do {\
   _dst.skey = _src.skey;\
   _dst.ekey = _src.ekey;\
} while (0);

60
enum {
H
hjxilinx 已提交
61
  // when query starts to execute, this status will set
62 63
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
64 65
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
66
   */
67 68
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
69 70 71
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
72
   */
73
  QUERY_COMPLETED = 0x4u,
74

H
hjxilinx 已提交
75 76
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
77
   */
78
  QUERY_OVER = 0x8u,
79
};
80 81

enum {
82 83
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
84 85 86
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

87
typedef struct {
88 89 90 91 92 93
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
94 95
} SQueryStatusInfo;

H
Haojun Liao 已提交
96
#if 0
H
Haojun Liao 已提交
97
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
98 99 100 101
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
102
    return malloc(__size);
H
Haojun Liao 已提交
103
  }
H
Haojun Liao 已提交
104 105
}

H
Haojun Liao 已提交
106 107 108 109 110 111 112 113 114 115
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
116
#define malloc  u_malloc
H
Haojun Liao 已提交
117
#endif
H
Haojun Liao 已提交
118

119
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
120 121 122
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

123
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
124
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
125

H
Haojun Liao 已提交
126
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
127

H
Haojun Liao 已提交
128 129 130 131 132 133 134 135
// previous time window may not be of the same size of pQuery->intervalTime
#define GET_NEXT_TIMEWINDOW(_q, tw)                                   \
  do {                                                                \
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR((_q)->order.order); \
    (tw)->skey += ((_q)->slidingTime * factor);                       \
    (tw)->ekey = (tw)->skey + ((_q)->intervalTime - 1);               \
  } while (0)

136 137
#define SET_STABLE_QUERY_OVER(_q) ((_q)->tableIndex = (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
#define IS_STASBLE_QUERY_OVER(_q) ((_q)->tableIndex >= (int32_t)((_q)->tableqinfoGroupInfo.numOfTables))
H
Haojun Liao 已提交
138

H
hjxilinx 已提交
139
// todo move to utility
140
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
141

H
hjxilinx 已提交
142
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
143
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
144 145
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
146

147 148 149
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

150
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
151
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo);
152 153
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
154
static void buildTagQueryResult(SQInfo *pQInfo);
155

156
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
157
static int32_t flushFromResultBuf(SQInfo *pQInfo);
158

159
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
160 161
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
162

S
TD-1057  
Shengliang Guan 已提交
163
    char *pElem = (char*)pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
164
    if (isNull(pElem, pFilterInfo->info.type)) {
165 166
      return false;
    }
167

168 169
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
170
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
171

172 173 174 175 176
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
177

178 179 180 181
    if (!qualified) {
      return false;
    }
  }
182

183 184 185 186 187 188
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
189

190
  int64_t maxOutput = 0;
191
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
192
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
193

194 195 196 197 198 199 200 201
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
202

203 204 205 206 207
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
208

209
  assert(maxOutput >= 0);
210 211 212
  return maxOutput;
}

213 214 215 216 217 218 219 220 221
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
222 223 224 225 226 227 228
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
229 230 231 232
    pResInfo->numOfRes = numOfRes;
  }
}

233
static int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
234
  int32_t base = 20000000;
235 236 237 238 239 240 241
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
242

243
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
244
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
245
    if (pColIndex->flag == TSDB_COL_NORMAL) {
246
      //make sure the normal column locates at the second position if tbname exists in group by clause
247
      if (pGroupbyExpr->numOfGroupCols > 1) {
248
        assert(pColIndex->colIndex > 0);
249
      }
250

251 252 253
      return true;
    }
  }
254

255 256 257 258 259
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
260

261 262
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
263

264
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
265
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
266 267 268 269 270
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
271

272
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
273 274
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
275 276 277
      break;
    }
  }
278

279 280 281 282 283 284
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
285

286
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
287
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
288 289 290 291
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
292

293 294 295 296
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
297

298 299 300
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
301

302 303 304
  return false;
}

305 306 307 308 309 310 311 312 313 314 315
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

316
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
317

318 319 320 321
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
322 323
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
324
    
325
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
326 327
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
328 329 330
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
331

332 333 334 335
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
336
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
337
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
338 339 340
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
341

342 343 344 345
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
346

347 348 349
  return false;
}

H
Haojun Liao 已提交
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

368 369 370 371 372 373 374 375
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
376
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
377
  if (pStatis != NULL && TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
H
Haojun Liao 已提交
378 379
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
380 381
  } else {
    *pColStatis = NULL;
382
  }
383

H
Haojun Liao 已提交
384
  if (TSDB_COL_IS_TAG(pColIndex->flag) || TSDB_COL_IS_UD_COL(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
H
Haojun Liao 已提交
385 386 387
    return false;
  }

388 389 390
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
391

392 393 394 395
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
396
                                             int16_t bytes, bool masterscan) {
397
  SQuery *pQuery = pRuntimeEnv->pQuery;
398

399
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
400 401
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
402
  } else {
H
Haojun Liao 已提交
403 404 405
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
406

H
Haojun Liao 已提交
407 408
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
409 410
      int64_t newCap = 0;
      if (pWindowResInfo->capacity > 10000) {
S
Shengliang Guan 已提交
411
        newCap = (int64_t)(pWindowResInfo->capacity * 1.25);
412
      } else {
S
Shengliang Guan 已提交
413
        newCap = (int64_t)(pWindowResInfo->capacity * 1.5);
414 415
      }

H
Haojun Liao 已提交
416
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
417 418
      pRuntimeEnv->summary.internalSupSize += (newCap - pWindowResInfo->capacity) * sizeof(SWindowResult);

419 420
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
421
      }
422

423 424
      pWindowResInfo->pResult = (SWindowResult *)t;

S
TD-1057  
Shengliang Guan 已提交
425
      int32_t inc = (int32_t)newCap - pWindowResInfo->capacity;
426 427
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

428 429
      pRuntimeEnv->summary.internalSupSize += (pQuery->numOfOutput * sizeof(SResultInfo) + pRuntimeEnv->interBufSize) * inc;

H
Haojun Liao 已提交
430 431 432 433
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
      }

S
TD-1057  
Shengliang Guan 已提交
434
      pWindowResInfo->capacity = (int32_t)newCap;
435
    }
H
Haojun Liao 已提交
436 437 438 439

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
440
  }
441

442 443 444 445 446 447
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
448

449 450 451 452 453 454 455
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
456

457 458
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
459

460 461 462
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
463

464 465 466 467
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
468

469 470 471
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
472

473 474 475 476 477 478 479
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
480

481
  assert(ts >= w.skey && ts <= w.ekey);
482

483 484 485 486 487 488 489 490
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
491

492
  tFilePage *pData = NULL;
493

494 495 496
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
497

H
Haojun Liao 已提交
498
  if (taosArrayGetSize(list) == 0) {
499 500
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
501 502 503
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
504

505
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
506 507 508
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

509 510
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
511
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
512 513 514
      }
    }
  }
515

516 517 518
  if (pData == NULL) {
    return -1;
  }
519

520 521 522
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
S
TD-1057  
Shengliang Guan 已提交
523
    pWindowRes->pos.rowId = (int32_t)(pData->num++);
H
Haojun Liao 已提交
524 525

    assert(pWindowRes->pos.pageId >= 0);
526
  }
527

528 529 530 531
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
532
                                       STimeWindow *win, bool masterscan, bool* newWind) {
533 534
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
535

536 537
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
538
  if (pWindowRes == NULL) {
539 540 541
    *newWind = false;

    return masterscan? -1:0;
542
  }
543

544
  *newWind = true;
H
Haojun Liao 已提交
545

546 547 548
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
549
    if (ret != TSDB_CODE_SUCCESS) {
550 551 552
      return -1;
    }
  }
553

554 555
  // set time window for current result
  pWindowRes->window = *win;
556

H
Haojun Liao 已提交
557
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
558 559 560 561 562 563 564 565
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
566
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
567 568
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
569

H
Haojun Liao 已提交
570 571 572 573
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
574

H
Haojun Liao 已提交
575 576 577 578 579 580 581 582 583 584 585 586
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
587 588
    }
  }
589

H
Haojun Liao 已提交
590
  assert(forwardStep > 0);
591 592 593 594 595 596
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
597
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
598
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
599
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
600
    return pWindowResInfo->size;
601
  }
602

603
  // no qualified results exist, abort check
604 605
  int32_t numOfClosed = 0;
  
606
  if (pWindowResInfo->size == 0) {
607
    return pWindowResInfo->size;
608
  }
609

610
  // query completed
H
hjxilinx 已提交
611 612
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
613
    closeAllTimeWindow(pWindowResInfo);
614

615 616 617 618
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
619
    int64_t skey = TSKEY_INITIAL_VAL;
620

621 622 623
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
624
        numOfClosed += 1;
625 626
        continue;
      }
627

628 629 630 631 632 633 634 635
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
636

637
    // all windows are closed, set the last one to be the skey
638
    if (skey == TSKEY_INITIAL_VAL) {
639 640 641 642 643
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
644

645
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
646

647 648
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
649
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
650 651
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
652
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
653
    } else {
654
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
655
             numOfClosed);
656 657
    }
  }
658 659 660 661 662 663 664
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
665
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
666
  return numOfClosed;
667 668 669
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
670
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
671
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
672

673 674 675
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
676

H
hjxilinx 已提交
677 678
  STableQueryInfo* item = pQuery->current;
  
679 680
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
681
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
682 683
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
684 685
      }
    } else {
686
      num = pDataBlockInfo->rows - startPos;
687
      if (updateLastKey) {
H
hjxilinx 已提交
688
        item->lastKey = pDataBlockInfo->window.ekey + step;
689 690 691 692
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
693
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
694 695
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
696 697 698 699
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
700
        item->lastKey = pDataBlockInfo->window.skey + step;
701 702 703
      }
    }
  }
704

H
Haojun Liao 已提交
705
  assert(num > 0);
706 707 708 709
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
710
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
711 712
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
713

714 715 716
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
717

718 719 720
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
721

722 723 724
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
725

726 727 728 729
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
730

731 732 733
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
734 735 736 737 738 739 740 741
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
742

743 744 745
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
746

747 748 749 750
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
751 752 753 754
    }
  }
}

H
Haojun Liao 已提交
755 756
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
757
  SQuery *pQuery = pRuntimeEnv->pQuery;
758

H
Haojun Liao 已提交
759
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
760

H
Haojun Liao 已提交
761
  // next time window is not in current block
H
Haojun Liao 已提交
762 763
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
764 765
    return -1;
  }
766

H
Haojun Liao 已提交
767 768
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
769
    startKey = pNext->skey;
H
Haojun Liao 已提交
770 771
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
772
    }
H
Haojun Liao 已提交
773
  } else {
H
Haojun Liao 已提交
774
    startKey = pNext->ekey;
H
Haojun Liao 已提交
775 776
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
777
    }
H
Haojun Liao 已提交
778
  }
779

H
Haojun Liao 已提交
780 781 782 783 784 785 786 787
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime && prevPosition != -1) {
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
788

H
Haojun Liao 已提交
789 790 791 792
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
793
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
794
    TSKEY next = primaryKeys[startPos];
795

H
Haojun Liao 已提交
796 797 798
    pNext->ekey += ((next - pNext->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNext->skey = pNext->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
799
    TSKEY next = primaryKeys[startPos];
800

H
Haojun Liao 已提交
801 802
    pNext->skey -= ((pNext->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNext->ekey = pNext->skey + pQuery->intervalTime - 1;
803
  }
804

H
Haojun Liao 已提交
805
  return startPos;
806 807
}

H
Haojun Liao 已提交
808
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
809 810 811 812 813 814 815 816 817 818 819 820
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
821

822 823 824
  return ekey;
}

H
hjxilinx 已提交
825 826
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
S
TD-1057  
Shengliang Guan 已提交
827
  int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
H
hjxilinx 已提交
828 829 830 831 832 833 834 835 836 837 838 839
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
840
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
841 842 843
  if (pDataBlock == NULL) {
    return NULL;
  }
844

H
Haojun Liao 已提交
845
  char *dataBlock = NULL;
H
Haojun Liao 已提交
846
  SQuery *pQuery = pRuntimeEnv->pQuery;
847
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
848

849
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
850
  if (functionId == TSDB_FUNC_ARITHM) {
851
    sas->pArithExpr = &pQuery->pSelectExpr[col];
852

853 854 855 856 857 858
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
859

860 861 862 863
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
864

H
Haojun Liao 已提交
865
    if (sas->data == NULL) {
H
Haojun Liao 已提交
866
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
867 868 869
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

870
    // here the pQuery->colList and sas->colList are identical
S
TD-1057  
Shengliang Guan 已提交
871
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
872
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
873
      SColumnInfo *pColMsg = &pQuery->colList[i];
874

875 876 877 878 879 880 881 882
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
883

884
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
885
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
886
    }
887

888
  } else {  // other type of query function
889
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
890
    if (TSDB_COL_IS_NORMAL_COL(pCol->flag)) {
H
Haojun Liao 已提交
891 892 893 894 895
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
H
Haojun Liao 已提交
896 897
    } else {
      dataBlock = NULL;
898 899
    }
  }
900

901 902 903 904
  return dataBlock;
}

/**
H
Haojun Liao 已提交
905
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
906 907
 * @param pRuntimeEnv
 * @param forwardStep
908
 * @param tsCols
909 910 911 912 913
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
914
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
915 916
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
917
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
918 919
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

920 921
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
922
  if (pDataBlock != NULL) {
923
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
924
    tsCols = (TSKEY *)(pColInfo->pData);
925
  }
926

927
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
928
  if (sasArray == NULL) {
H
Haojun Liao 已提交
929
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
930 931
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
932

933
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
934
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
935
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
936
  }
937

938
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
939 940
  if (QUERY_IS_INTERVAL_QUERY(pQuery)/* && tsCols != NULL*/) {
    TSKEY ts = TSKEY_INITIAL_VAL;
941

H
Haojun Liao 已提交
942 943 944 945 946 947 948 949
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
950
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
951 952
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
953
      taosTFree(sasArray);
H
hjxilinx 已提交
954
      return;
955
    }
956

H
Haojun Liao 已提交
957 958 959
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

960
    if (hasTimeWindow) {
H
Haojun Liao 已提交
961
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
962
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
963

964
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
H
Haojun Liao 已提交
965
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
966
    }
967

968 969
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
970

971
    while (1) {
H
Haojun Liao 已提交
972 973
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
974 975 976
      if (startPos < 0) {
        break;
      }
977

978
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
979
      hasTimeWindow = false;
H
Haojun Liao 已提交
980 981
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
982 983
        break;
      }
984

985 986 987 988 989
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
990
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
991

H
Haojun Liao 已提交
992
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
993
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
994
    }
995

996 997 998 999 1000 1001 1002
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
1003
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1004
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1005 1006 1007 1008 1009
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
1010

1011 1012 1013 1014
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1015

S
Shengliang Guan 已提交
1016
    taosTFree(sasArray[i].data);
1017
  }
1018

S
Shengliang Guan 已提交
1019
  taosTFree(sasArray);
1020 1021 1022 1023 1024 1025
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1026

1027
  int32_t GROUPRESULTID = 1;
1028

1029
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1030

1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1041
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1042 1043 1044
  if (pWindowRes == NULL) {
    return -1;
  }
1045

1046 1047 1048
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

1049 1050 1051 1052 1053 1054
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1055

1056 1057 1058 1059 1060
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1061
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1062
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1063

1064
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1065 1066
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1067 1068
      continue;
    }
1069

1070
    int16_t colIndex = -1;
1071
    int32_t colId = pColIndex->colId;
1072

1073
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1074
      if (pQuery->colList[i].colId == colId) {
1075 1076 1077 1078
        colIndex = i;
        break;
      }
    }
1079

1080
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1081

1082 1083
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1084
    /*
1085 1086 1087
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1088
     */
S
TD-1057  
Shengliang Guan 已提交
1089
    int32_t numOfCols = (int32_t)taosArrayGetSize(pDataBlock);
1090

1091 1092 1093 1094 1095 1096
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1097
  }
1098

1099
  return NULL;
1100 1101 1102 1103
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1104

1105 1106
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1107

1108
  // compare tag first
1109
  if (tVariantCompare(&pCtx[0].tag, &elem.tag) != 0) {
1110 1111
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1112

S
TD-1057  
Shengliang Guan 已提交
1113
  TSKEY key = *(TSKEY *)((char*)pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);
1114 1115

#if defined(_DEBUG_VIEW)
1116 1117
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1118 1119
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1120

1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1134

1135 1136 1137 1138 1139
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1140
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1141 1142 1143 1144 1145

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1146
  
1147 1148 1149
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1150

1151
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1152 1153
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1154 1155 1156 1157 1158 1159

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1160
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1161
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1162 1163
    return false;
  }
1164

1165 1166 1167
  return true;
}

1168 1169
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1170
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1171
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1172

1173
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1174
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1175 1176 1177 1178

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1179 1180
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1181
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1182
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1183
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1184 1185
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1186

1187 1188
  int16_t type = 0;
  int16_t bytes = 0;
1189

1190
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1191
  if (groupbyColumnValue) {
1192
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1193
  }
1194

1195
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1196
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1197
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1198
  }
1199

1200 1201
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1202
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1203 1204
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1205
  }
1206

1207
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1208

1209 1210 1211
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1212
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1213
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1214 1215
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1216

1217
  int32_t j = 0;
H
hjxilinx 已提交
1218
  int32_t offset = -1;
1219

1220
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1221
    offset = GET_COL_DATA_POS(pQuery, j, step);
1222

1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1233

1234
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1235 1236
      continue;
    }
1237

1238
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1239
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1240
      int64_t     ts = tsCols[offset];
1241
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1242

1243 1244
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1245 1246 1247
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1248

1249 1250 1251 1252
      if (!hasTimeWindow) {
        continue;
      }

1253 1254
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1255

1256 1257
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1258

1259
      while (1) {
H
Haojun Liao 已提交
1260
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1261
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1262
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1263 1264
          break;
        }
1265

1266 1267 1268
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1269

1270
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1271
        hasTimeWindow = false;
1272
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1273 1274
          break;
        }
1275

1276 1277 1278 1279
        if (hasTimeWindow) {
          pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
        }
1280
      }
1281

1282 1283 1284
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1285
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1286
        char *val = groupbyColumnData + bytes * offset;
1287

H
hjxilinx 已提交
1288
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1289 1290 1291 1292
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1293

1294
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1295
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1296 1297 1298 1299 1300
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1301

1302 1303 1304
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1305
        setQueryStatus(pQuery, QUERY_COMPLETED);
1306 1307 1308 1309
        break;
      }
    }
  }
H
Haojun Liao 已提交
1310 1311 1312 1313 1314 1315 1316 1317

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1318 1319 1320 1321 1322
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1323

S
Shengliang Guan 已提交
1324
    taosTFree(sasArray[i].data);
1325
  }
1326

1327 1328 1329 1330
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1331
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1332
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1333 1334 1335
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1336
  
H
Haojun Liao 已提交
1337
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1338
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1339
  } else {
1340
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1341
  }
1342

1343
  // update the lastkey of current table
1344
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1345
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1346

1347
  // interval query with limit applied
1348
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1349
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1350 1351
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
S
TD-1057  
Shengliang Guan 已提交
1352
    numOfRes = (int32_t)getNumOfResult(pRuntimeEnv);
1353

1354 1355 1356 1357
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1358

1359 1360 1361
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1362

1363 1364 1365
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
1366 1367 1368 1369 1370

      if (((pTableQInfo->lastKey > pTableQInfo->win.ekey) && QUERY_IS_ASC_QUERY(pQuery)) ||
          ((pTableQInfo->lastKey < pTableQInfo->win.ekey) && (!QUERY_IS_ASC_QUERY(pQuery)))) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1371
    }
1372
  }
1373

1374
  return numOfRes;
1375 1376
}

H
Haojun Liao 已提交
1377
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1378 1379 1380 1381 1382 1383
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1384
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1385
  pCtx->aInputElemBuf = inputData;
1386

1387
  if (tpField != NULL) {
H
Haojun Liao 已提交
1388
    pCtx->preAggVals.isSet  = true;
1389 1390
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1391 1392 1393
  } else {
    pCtx->preAggVals.isSet = false;
  }
1394

H
Haojun Liao 已提交
1395 1396
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1397 1398 1399
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1400

1401 1402
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1403
    pCtx->ptsList = tsCol;
1404
  }
1405

1406 1407 1408 1409 1410
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1411
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1412
    /*
H
Haojun Liao 已提交
1413
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1424

1425 1426
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1427 1428 1429 1430 1431 1432
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1433 1434
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
S
TD-1057  
Shengliang Guan 已提交
1435
    pInterpInfo->type = (int8_t)pQuery->fillType;
1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1446
  }
1447

1448 1449 1450 1451 1452 1453
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1454
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1455 1456 1457
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1458
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1459 1460 1461 1462 1463 1464
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1465 1466 1467
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1468
  if (isSelectivityWithTagsQuery(pQuery)) {
1469
    int32_t num = 0;
1470
    int16_t tagLen = 0;
1471 1472
    
    SQLFunctionCtx *p = NULL;
1473
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1474

1475
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1476
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1477
      
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1491 1492 1493 1494 1495
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
Shengliang Guan 已提交
1496
      taosTFree(pTagCtx); 
dengyihao's avatar
dengyihao 已提交
1497
    }
1498 1499 1500
  }
}

H
Haojun Liao 已提交
1501 1502
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1503
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1504 1505 1506 1507
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1508 1509 1510
  }
}

1511
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1512
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1513 1514
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1515 1516 1517
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1518
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1519

1520
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1521
    goto _clean;
1522
  }
1523

1524
  pRuntimeEnv->offset[0] = 0;
1525
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1526
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1527

1528
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1529
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1530

1531 1532
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1533
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1534 1535 1536 1537
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1538 1539 1540 1541
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
H
Haojun Liao 已提交
1542 1543 1544
    } else if (TSDB_COL_IS_UD_COL(pIndex->flag)) {
      pCtx->inputBytes = pSqlFuncMsg->arg[0].argBytes;
      pCtx->inputType = pSqlFuncMsg->arg[0].argType;
1545 1546 1547 1548
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1549
  
1550
    assert(isValidDataType(pCtx->inputType));
1551
    pCtx->ptsOutputBuf = NULL;
1552

1553 1554
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1555

1556 1557
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1558

1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1569

1570 1571
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1572

1573
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1574
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1575
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1576

1577 1578 1579 1580
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1581

1582 1583
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1584

1585 1586 1587 1588
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1589

H
Haojun Liao 已提交
1590
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1591

1592
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1593
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1594

1595
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1596
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1597 1598
    resetCtxOutputBuf(pRuntimeEnv);
  }
1599

H
Haojun Liao 已提交
1600
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1601

H
Haojun Liao 已提交
1602
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1603
  return TSDB_CODE_SUCCESS;
1604

1605
_clean:
S
Shengliang Guan 已提交
1606 1607
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1608

1609
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1610 1611 1612 1613 1614 1615
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1616

1617
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1618
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1619

1620
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1621
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1622

1623
  if (pRuntimeEnv->pCtx != NULL) {
1624
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1625
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1626

1627 1628 1629
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1630

1631
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1632
      taosTFree(pCtx->tagInfo.pTagCtxList);
1633
    }
1634

S
Shengliang Guan 已提交
1635 1636
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1637
  }
1638

H
Haojun Liao 已提交
1639
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1640

H
Haojun Liao 已提交
1641
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1642
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1643
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1644

H
Haojun Liao 已提交
1645
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1646 1647
}

H
Haojun Liao 已提交
1648
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1649

H
Haojun Liao 已提交
1650
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1651

H
Haojun Liao 已提交
1652 1653 1654
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1655 1656
    return false;
  }
1657

1658
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1659
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1660 1661
    return true;
  }
1662

1663
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1664
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1665

1666 1667
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1668
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1669 1670
      continue;
    }
1671

1672 1673 1674
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1675

1676 1677 1678 1679
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1680

1681 1682 1683
  return false;
}

1684
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1685
static bool isPointInterpoQuery(SQuery *pQuery) {
1686
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1687
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1688
    if (functionID == TSDB_FUNC_INTERP) {
1689 1690 1691
      return true;
    }
  }
1692

1693 1694 1695 1696
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1697
static bool isSumAvgRateQuery(SQuery *pQuery) {
1698
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1699
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1700 1701 1702
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1703

1704 1705 1706 1707 1708
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1709

1710 1711 1712
  return false;
}

H
hjxilinx 已提交
1713
static bool isFirstLastRowQuery(SQuery *pQuery) {
1714
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1715
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1716 1717 1718 1719
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1720

1721 1722 1723
  return false;
}

H
hjxilinx 已提交
1724
static bool needReverseScan(SQuery *pQuery) {
1725
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1726
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1727 1728 1729
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1730

1731
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1732 1733
      return true;
    }
1734 1735

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
S
TD-1057  
Shengliang Guan 已提交
1736
      int32_t order = (int32_t)pQuery->pSelectExpr[i].base.arg->argValue.i64;
1737 1738
      return order != pQuery->order.order;
    }
1739
  }
1740

1741 1742
  return false;
}
H
hjxilinx 已提交
1743 1744 1745

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1746 1747 1748 1749 1750
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1751 1752 1753
      return false;
    }
  }
1754

H
hjxilinx 已提交
1755 1756 1757
  return true;
}

1758 1759
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1760
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1761
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1762
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1763 1764 1765 1766 1767

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1768 1769 1770 1771
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1772 1773
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1774 1775 1776 1777 1778
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1779
    pQuery->checkBuffer = 0;
1780
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1781
    pQuery->checkBuffer = 0;
1782 1783
  } else {
    bool hasMultioutput = false;
1784
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1785
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1786 1787 1788
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1789

1790 1791 1792 1793 1794
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1795

1796
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1797 1798 1799 1800 1801 1802
  }
}

/*
 * todo add more parameters to check soon..
 */
1803
bool colIdCheck(SQuery *pQuery) {
1804 1805
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1806
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1807
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1808 1809 1810
      return false;
    }
  }
1811
  
1812 1813 1814 1815 1816 1817
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1818
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1819
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1820

1821 1822 1823 1824
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1825

1826 1827 1828 1829
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1830

1831 1832 1833 1834 1835 1836 1837
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1838 1839 1840 1841 1842 1843
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

H
Haojun Liao 已提交
1844
    SArray* tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1845 1846 1847 1848
    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
H
Haojun Liao 已提交
1849 1850 1851

      STableKeyInfo* pInfo = taosArrayGet(tableKeyGroup, j);
      pInfo->lastKey = pTableQueryInfo->win.skey;
H
Haojun Liao 已提交
1852 1853 1854 1855
    }
  }
}

H
Haojun Liao 已提交
1856 1857 1858
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1859 1860 1861
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1862

1863 1864 1865
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1866
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1867
           pQuery->order.order, TSDB_ORDER_DESC);
1868

1869
    pQuery->order.order = TSDB_ORDER_DESC;
1870

1871 1872
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1873

1874 1875
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1876

1877 1878
    return;
  }
1879

1880 1881
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1882
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1883
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1884 1885
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1886

1887
    pQuery->order.order = TSDB_ORDER_ASC;
1888 1889
    return;
  }
1890

1891 1892 1893
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1894
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1895 1896
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1897
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1898
        doExchangeTimeWindow(pQInfo);
1899
      }
1900

1901
      pQuery->order.order = TSDB_ORDER_ASC;
1902 1903
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1904
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1905 1906
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1907
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1908
        doExchangeTimeWindow(pQInfo);
1909
      }
1910

1911
      pQuery->order.order = TSDB_ORDER_DESC;
1912
    }
1913

1914
  } else {  // interval query
1915
    if (stableQuery) {
1916 1917
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1918
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1919 1920
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1921 1922
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1923

1924
        pQuery->order.order = TSDB_ORDER_ASC;
1925 1926
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1927
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1928 1929
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1930 1931
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1932

1933
        pQuery->order.order = TSDB_ORDER_DESC;
1934 1935 1936 1937 1938 1939 1940 1941
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1942

1943
  int32_t num = 0;
1944

1945 1946
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1947
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1948
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
S
TD-1057  
Shengliang Guan 已提交
1949
    num = (int32_t)(MAX(s, INITIAL_RESULT_ROWS_VALUE));
1950 1951
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1952
  }
1953

1954 1955 1956 1957
  assert(num > 0);
  return num;
}

1958 1959 1960
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

S
TD-1057  
Shengliang Guan 已提交
1961
  *rowsize = (int32_t)(pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery));
1962 1963 1964 1965 1966 1967 1968 1969 1970 1971
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
  while(((*rowsize) * 2) > (*ps) - overhead) {
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);

1972 1973
}

H
Haojun Liao 已提交
1974
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
1975

H
Haojun Liao 已提交
1976 1977 1978 1979
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
1980 1981 1982 1983 1984
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1985

H
Haojun Liao 已提交
1986 1987 1988 1989 1990 1991 1992 1993
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

H
Haojun Liao 已提交
1994
    // no statistics data
H
Haojun Liao 已提交
1995
    if (index == -1) {
H
Haojun Liao 已提交
1996
      return true;
1997
    }
1998

1999
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
2000
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
2001
      return true;
2002
    }
2003

2004
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
2005
    if (pDataStatis[index].numOfNull == numOfRows) {
2006 2007
      continue;
    }
2008

H
Haojun Liao 已提交
2009 2010 2011
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
S
TD-1057  
Shengliang Guan 已提交
2012 2013
      float minval = (float)(*(double *)(&pDataBlockst->min));
      float maxval = (float)(*(double *)(&pDataBlockst->max));
2014

2015 2016 2017 2018 2019 2020 2021
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
2022
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
2023 2024 2025 2026 2027
          return true;
        }
      }
    }
  }
2028

H
Haojun Liao 已提交
2029 2030 2031 2032 2033 2034 2035 2036
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2037

H
Haojun Liao 已提交
2038
  return false;
2039 2040
}

H
Haojun Liao 已提交
2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087
#define PT_IN_WINDOW(_p, _w)  ((_p) > (_w).skey && (_p) < (_w).ekey)

static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);


  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);

    if (PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.skey > pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
    if (PT_IN_WINDOW(w.skey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2088
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock) {
2089
  SQuery *pQuery = pRuntimeEnv->pQuery;
2090

H
Haojun Liao 已提交
2091
  uint32_t status = 0;
H
Haojun Liao 已提交
2092
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
H
Haojun Liao 已提交
2093 2094
    status = BLK_DATA_ALL_NEEDED;
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2095

H
Haojun Liao 已提交
2096
    // Calculate all time windows that are overlapping or contain current data block.
2097
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2098 2099
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
      status = BLK_DATA_ALL_NEEDED;
2100
    }
2101

H
Haojun Liao 已提交
2102 2103 2104 2105 2106 2107 2108 2109
    if (status != BLK_DATA_ALL_NEEDED) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;

        status |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
H
Haojun Liao 已提交
2110
        if ((status & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2111 2112 2113
          break;
        }
      }
2114 2115
    }
  }
2116

H
Haojun Liao 已提交
2117 2118 2119
  if (status == BLK_DATA_NO_NEEDED) {
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2120
    pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2121
  } else if (status == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2122
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2123
      //        return DISK_DATA_LOAD_FAILED;
2124
    }
2125 2126 2127 2128
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2129
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2130
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2131 2132
    }
  } else {
H
Haojun Liao 已提交
2133
    assert(status == BLK_DATA_ALL_NEEDED);
2134 2135
  
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2136
    pRuntimeEnv->summary.loadBlockStatis += 1;
H
hjxilinx 已提交
2137
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2138
    }
2139
    
H
Haojun Liao 已提交
2140
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2141 2142
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2143 2144 2145
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
      return BLK_DATA_DISCARD;
2146
    }
2147
  
2148
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2149
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2150
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2151
  }
2152

H
Haojun Liao 已提交
2153
  return TSDB_CODE_SUCCESS;
2154 2155
}

H
hjxilinx 已提交
2156
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2157
  int32_t midPos = -1;
H
Haojun Liao 已提交
2158
  int32_t numOfRows;
2159

2160 2161 2162
  if (num <= 0) {
    return -1;
  }
2163

2164
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2165 2166

  TSKEY * keyList = (TSKEY *)pValue;
2167
  int32_t firstPos = 0;
2168
  int32_t lastPos = num - 1;
2169

2170
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2171 2172 2173 2174 2175
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2176

H
Haojun Liao 已提交
2177 2178
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2179

H
hjxilinx 已提交
2180 2181 2182 2183 2184 2185 2186 2187
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2188

H
hjxilinx 已提交
2189 2190 2191 2192 2193
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2194

H
hjxilinx 已提交
2195 2196 2197 2198 2199 2200 2201
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2202

H
Haojun Liao 已提交
2203 2204
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2205

H
hjxilinx 已提交
2206 2207 2208 2209 2210 2211 2212 2213 2214
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2215

H
hjxilinx 已提交
2216 2217 2218
  return midPos;
}

2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2241
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2242 2243 2244 2245 2246
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2247 2248 2249
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2250
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2251 2252 2253
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
S
TD-1057  
Shengliang Guan 已提交
2254 2255
      int32_t remain = (int32_t)(pRec->capacity - pRec->rows);
      int32_t newSize = (int32_t)(pRec->capacity + (pBlockInfo->rows - remain));
2256 2257 2258
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2259 2260
        assert(bytes > 0 && newSize > 0);

2261 2262 2263 2264
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
H
Hongze Cheng 已提交
2265
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (newSize - pRec->rows) * bytes);
2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2278
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2279 2280 2281 2282 2283 2284 2285
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2307 2308
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2309
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2310
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2311

2312
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2313 2314
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2315

2316
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2317
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2318

H
Haojun Liao 已提交
2319
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
2320
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
2321
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2322

H
Haojun Liao 已提交
2323
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2324
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2325
    }
2326

H
Haojun Liao 已提交
2327
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2328
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2329

H
hjxilinx 已提交
2330
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2331
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2332

2333
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2334 2335 2336 2337 2338
    SArray *pDataBlock   = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }
2339

2340 2341 2342 2343 2344
    if (terrno != TSDB_CODE_SUCCESS) { // load data block failed, abort query
      longjmp(pRuntimeEnv->env, terrno);
      break;
    }

H
Haojun Liao 已提交
2345 2346
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2347
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2348

H
Haojun Liao 已提交
2349
    summary->totalRows += blockInfo.rows;
2350
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2351
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2352

2353 2354
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2355
      break;
2356 2357
    }
  }
2358

H
Haojun Liao 已提交
2359 2360 2361 2362
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
hjxilinx 已提交
2363
  // if the result buffer is not full, set the query complete
2364 2365 2366
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2367

H
Haojun Liao 已提交
2368
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2369
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2370
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2371
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2372 2373 2374 2375
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2376

2377
  return 0;
2378 2379 2380 2381 2382 2383
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2384
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2385
  tVariantDestroy(tag);
2386

2387
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2388
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2389 2390 2391
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2392
  } else {
2393
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2394 2395 2396 2397
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2398 2399
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2400
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2401 2402 2403 2404
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2405
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2406
    } else {
H
Haojun Liao 已提交
2407 2408 2409 2410 2411
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2412
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2413
    }
2414
  }
2415 2416
}

2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2429
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2430
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2431
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2432

H
[td-90]  
Haojun Liao 已提交
2433 2434 2435
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2436

S
TD-1057  
Shengliang Guan 已提交
2437
    int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2438
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2439

2440
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2441 2442
  } else {
    // set tag value, by which the results are aggregated.
2443
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2444
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2445
  
2446
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2447
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2448 2449
        continue;
      }
2450

2451
      // todo use tag column index to optimize performance
2452
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2453
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2454
    }
2455

2456
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2457
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2458 2459
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2460
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2461

S
TD-1057  
Shengliang Guan 已提交
2462
      int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64;
2463
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2464

2465
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2466
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2467
          pRuntimeEnv->pCtx[0].tag.i64Key)
2468 2469 2470 2471 2472 2473 2474
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2475

H
Haojun Liao 已提交
2476 2477
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2478
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2479
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2480 2481 2482
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2483

2484
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2485 2486
      aAggs[functionId].init(&pCtx[i]);
    }
2487

2488 2489
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2490
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2491

2492 2493 2494
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2495 2496 2497 2498 2499 2500 2501 2502
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2503 2504
    }
  }
2505

2506
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2507
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2508 2509 2510
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2511

2512 2513 2514 2515
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2516
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2585
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2586
  SQuery* pQuery = pRuntimeEnv->pQuery;
2587
  int32_t numOfCols = pQuery->numOfOutput;
2588
  printf("super table query intermediate result, total:%d\n", numOfRows);
2589

2590 2591
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2592
      
2593
      switch (pQuery->pSelectExpr[i].type) {
2594
        case TSDB_DATA_TYPE_BINARY: {
2595
          int32_t type = pQuery->pSelectExpr[i].type;
2596
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2597 2598 2599 2600 2601
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2602
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2603 2604
          break;
        case TSDB_DATA_TYPE_INT:
2605
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2606 2607
          break;
        case TSDB_DATA_TYPE_FLOAT:
2608
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2609 2610
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2611
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2612 2613 2614 2615 2616 2617 2618 2619
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2620 2621 2622
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2623 2624 2625 2626 2627
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2628

2629 2630
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2631

2632 2633
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2634

2635 2636 2637 2638
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2639

2640 2641 2642 2643
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2644

H
hjxilinx 已提交
2645
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2646
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2647
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2648

H
Haojun Liao 已提交
2649
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2650
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2651

H
hjxilinx 已提交
2652
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2653
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2654
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2655

H
Haojun Liao 已提交
2656
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2657
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2658

2659 2660 2661
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2662

2663 2664 2665
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2666
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2667
  int64_t st = taosGetTimestampMs();
2668
  int32_t ret = TSDB_CODE_SUCCESS;
2669

S
TD-1057  
Shengliang Guan 已提交
2670
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2671

2672
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2673
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2674
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2675 2676 2677 2678
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2679
    pQInfo->groupIndex += 1;
2680 2681

    // this group generates at least one result, return results
2682 2683 2684
    if (ret > 0) {
      break;
    }
2685 2686

    assert(pQInfo->numOfGroupResultPages == 0);
2687
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2688
  }
2689

2690
  if (pQInfo->groupIndex == numOfGroups && pQInfo->offset == pQInfo->numOfGroupResultPages) {
H
Haojun Liao 已提交
2691 2692 2693
    SET_STABLE_QUERY_OVER(pQInfo);
  }

2694
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2695
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2696

2697 2698 2699 2700 2701 2702
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2703

2704
    // current results of group has been sent to client, try next group
2705
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2706 2707
      return;  // failed to save data in the disk
    }
2708

2709
    // check if all results has been sent to client
S
TD-1057  
Shengliang Guan 已提交
2710
    int32_t numOfGroup = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
2711
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
H
Haojun Liao 已提交
2712
      SET_STABLE_QUERY_OVER(pQInfo);
2713 2714
      return;
    }
2715
  }
2716 2717

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2718
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2719

2720
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2721
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2722

S
TD-1057  
Shengliang Guan 已提交
2723
  int32_t size = (int32_t)(taosArrayGetSize(list));
2724

2725
  int32_t offset = 0;
H
Haojun Liao 已提交
2726
  for (int32_t j = 0; j < size; ++j) {
H
Haojun Liao 已提交
2727 2728
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
    tFilePage *pData = getResBufPage(pResultBuf, pi->pageId);
2729

2730
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2731
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2732
      char *  pDest = pQuery->sdata[i]->data;
H
Haojun Liao 已提交
2733
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num, bytes * pData->num);
2734
    }
2735

S
Shengliang Guan 已提交
2736
    offset += (int32_t)pData->num;
2737
  }
2738

2739
  assert(pQuery->rec.rows == 0);
2740

H
Haojun Liao 已提交
2741
  pQuery->rec.rows += offset;
2742 2743 2744
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2745
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2746
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2747
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2748

2749 2750 2751 2752 2753 2754 2755
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2756

2757
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2758
    assert(pResultInfo != NULL);
2759

H
Haojun Liao 已提交
2760 2761
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2762 2763
    }
  }
2764

H
Haojun Liao 已提交
2765
  return 0;
2766 2767
}

2768
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2769
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2770
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2771

2772
  size_t size = taosArrayGetSize(pGroup);
2773
  tFilePage **buffer = pQuery->sdata;
2774

2775
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2776
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2777

2778
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2779 2780
    taosTFree(posList);
    taosTFree(pTableList);
2781 2782

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2783
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2784 2785
  }

2786
  // todo opt for the case of one table per group
2787
  int32_t numOfTables = 0;
2788
  for (int32_t i = 0; i < size; ++i) {
2789
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2790

H
Haojun Liao 已提交
2791
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2792
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
2793
      pTableList[numOfTables] = item;
2794
      numOfTables += 1;
2795 2796
    }
  }
2797

2798
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
2799 2800
    taosTFree(posList);
    taosTFree(pTableList);
2801

2802 2803
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2804
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2805
  }
2806

2807
  SCompSupporter cs = {pTableList, posList, pQInfo};
2808

2809
  SLoserTreeInfo *pTree = NULL;
2810
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2811

2812
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2813 2814 2815 2816
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2817 2818
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
2819
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2820

H
Haojun Liao 已提交
2821
  // todo add windowRes iterator
2822 2823
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2824

2825
  while (1) {
2826 2827 2828 2829 2830
    if (IS_QUERY_KILLED(pQInfo)) {
      qDebug("QInfo:%p it is already killed, abort", pQInfo);
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
    }

2831
    int32_t pos = pTree->pNode[0].index;
2832

H
hjxilinx 已提交
2833
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
2834
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
2835
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
2836

H
Haojun Liao 已提交
2837
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
2838
    TSKEY ts = GET_INT64_VAL(b);
2839

2840
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2841
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2842 2843
    if (num <= 0) {
      cs.position[pos] += 1;
2844

2845 2846
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2847

2848
        // all input sources are exhausted
2849
        if (--numOfTables == 0) {
2850 2851 2852 2853 2854 2855 2856
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2857
        if (buffer[0]->num == pQuery->rec.capacity) {
2858 2859 2860
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2861

2862 2863
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2864

2865
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2866
        buffer[0]->num += 1;
2867
      }
2868

2869
      lastTimestamp = ts;
2870

H
Haojun Liao 已提交
2871 2872 2873
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

2874 2875 2876
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2877

2878
        // all input sources are exhausted
2879
        if (--numOfTables == 0) {
2880 2881
          break;
        }
H
Haojun Liao 已提交
2882 2883 2884 2885 2886 2887
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
2888 2889
      }
    }
2890

2891 2892
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2893

2894
  if (buffer[0]->num != 0) {  // there are data in buffer
2895
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2896
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2897

S
Shengliang Guan 已提交
2898 2899 2900 2901
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
2902

2903 2904 2905
      return -1;
    }
  }
2906

2907 2908 2909
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2910
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2911
#endif
2912

2913
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2914

S
Shengliang Guan 已提交
2915 2916 2917
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
2918

2919
  pQInfo->offset = 0;
2920

S
Shengliang Guan 已提交
2921 2922
  taosTFree(pResultInfo);
  taosTFree(buf);
2923 2924 2925 2926
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2927 2928 2929
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2930
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2931

2932 2933
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2934
  int32_t capacity = pResultBuf->numOfRowsPerPage;
2935

S
TD-1057  
Shengliang Guan 已提交
2936
  int32_t remain = (int32_t)pQuery->sdata[0]->num;
2937
  int32_t offset = 0;
2938

2939 2940 2941 2942 2943
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2944

2945
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2946
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2947

2948
    // pagewise copy to dest buffer
2949
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2950
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2951
      buf->num = r;
2952

2953 2954
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2955
    }
2956

2957 2958 2959
    offset += r;
    remain -= r;
  }
2960

2961 2962 2963 2964 2965
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2966
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2967
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2968 2969 2970
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2971

2972
    pQuery->sdata[k]->num = 0;
2973 2974 2975
  }
}

2976 2977 2978 2979 2980
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
H
Haojun Liao 已提交
2981
  // order has changed already
2982
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2983 2984 2985 2986 2987 2988 2989
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
2990 2991 2992 2993 2994 2995 2996

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

2997 2998 2999 3000 3001
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
3002 3003 3004

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
3005 3006 3007 3008 3009
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
3010 3011 3012 3013 3014
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
3015

3016
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
3017

3018
    // open/close the specified query for each group result
3019
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3020
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3021

3022 3023
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3024 3025 3026 3027 3028 3029 3030 3031
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

3032 3033
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
3034
  SQuery *pQuery = pRuntimeEnv->pQuery;
3035
  int32_t order = pQuery->order.order;
3036

3037 3038
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3039
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3040
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3041
  } else {  // for simple result of table query,
3042
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3043
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3044

3045
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3046 3047 3048
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3049

3050 3051
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3052 3053 3054 3055 3056 3057
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3058 3059 3060 3061
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
S
Shengliang Guan 已提交
3062
  int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
H
Haojun Liao 已提交
3063

H
hjxilinx 已提交
3064
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3065
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3066 3067
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3068 3069
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3070 3071
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3072 3073 3074 3075 3076 3077

      // update the last key in tableKeyInfo list
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3078 3079
    }
  }
3080 3081
}

3082
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3083
  SQuery *pQuery = pRuntimeEnv->pQuery;
3084
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3085
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3086 3087 3088
  }
}

H
Haojun Liao 已提交
3089
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3090
  int32_t numOfCols = pQuery->numOfOutput;
3091

H
Haojun Liao 已提交
3092 3093
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3094 3095 3096
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3097

H
Haojun Liao 已提交
3098
  pResultRow->pos = (SPosInfo) {-1, -1};
3099

H
Haojun Liao 已提交
3100
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3101

3102
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3103
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3104
  return TSDB_CODE_SUCCESS;
3105 3106 3107 3108
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3109

3110
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3111 3112
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3113

3114 3115 3116 3117
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3118
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3119
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3120

3121
    // set the timestamp output buffer for top/bottom/diff query
3122
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3123 3124 3125
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3126

3127
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
3128
  }
3129

3130 3131 3132 3133 3134
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3135

3136
  // reset the execution contexts
3137
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3138
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3139
    assert(functionId != TSDB_FUNC_DIFF);
3140

3141 3142 3143 3144
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3145

3146 3147 3148 3149 3150 3151 3152 3153
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
S
TD-1057  
Shengliang Guan 已提交
3154
      pRuntimeEnv->pCtx[j].ptsOutputBuf = (char*)pRuntimeEnv->pCtx[j].ptsOutputBuf + TSDB_KEYSIZE * output;
3155
    }
3156

3157
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3158 3159 3160 3161 3162
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3163

3164
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3165
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3166
    pRuntimeEnv->pCtx[j].currentStage = 0;
3167

H
Haojun Liao 已提交
3168 3169 3170 3171
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3172

3173 3174 3175 3176
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3177
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3178
  SQuery *pQuery = pRuntimeEnv->pQuery;
3179
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3180 3181
    return;
  }
3182

3183
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3184
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3185 3186
        pQuery->limit.offset - pQuery->rec.rows);
    
3187 3188
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3189

3190
    resetCtxOutputBuf(pRuntimeEnv);
3191

H
Haojun Liao 已提交
3192
    // clear the buffer full flag if exists
3193
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3194
  } else {
3195
    int64_t numOfSkip = pQuery->limit.offset;
3196
    pQuery->rec.rows -= numOfSkip;
3197 3198
    pQuery->limit.offset = 0;
  
3199
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3200 3201
           0, pQuery->rec.rows);
    
3202
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3203
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3204
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3205
      
H
Haojun Liao 已提交
3206 3207
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3208

3209
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3210
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3211 3212
      }
    }
3213

S
TD-1057  
Shengliang Guan 已提交
3214
    updateNumOfResult(pRuntimeEnv, (int32_t)pQuery->rec.rows);
3215 3216 3217 3218 3219 3220 3221 3222
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3223
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3224 3225 3226 3227 3228 3229
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3230

H
hjxilinx 已提交
3231
  bool toContinue = false;
H
Haojun Liao 已提交
3232
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3233 3234
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3235

3236 3237 3238 3239 3240
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3241

3242
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3243

3244
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3245
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3246 3247 3248
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3249

3250 3251
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3252

3253 3254 3255 3256
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3257
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3258
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3259 3260 3261
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3262

3263 3264
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3265

3266 3267 3268
      toContinue |= (!pResInfo->complete);
    }
  }
3269

3270 3271 3272
  return toContinue;
}

H
Haojun Liao 已提交
3273
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3274
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3275 3276
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3277 3278 3279
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3280
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3281
      .status      = pQuery->status,
3282
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3283
      .lastKey     = start,
3284
  };
3285

S
TD-1057  
Shengliang Guan 已提交
3286 3287 3288 3289 3290
  TIME_WINDOW_COPY(info.w, pQuery->window);
  TIME_WINDOW_COPY(info.curWindow, pTableQueryInfo->win);

  info.curWindow.skey = start;

3291 3292 3293
  return info;
}

3294 3295 3296 3297
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3298 3299 3300 3301 3302
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3303

3304
  // reverse order time range
3305 3306 3307
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3308
  SWITCH_ORDER(pQuery->order.order);
3309 3310 3311 3312 3313 3314 3315

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3316
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3317

3318
  STsdbQueryCond cond = {
H
hjxilinx 已提交
3319
      .order   = pQuery->order.order,
3320
      .colList = pQuery->colList,
3321 3322
      .numOfCols = pQuery->numOfCols,
  };
3323

S
TD-1057  
Shengliang Guan 已提交
3324 3325
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
3326 3327 3328 3329 3330
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3331 3332 3333 3334
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3335

3336
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3337 3338 3339
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3340 3341
}

3342 3343
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3344
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3345

3346 3347
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3348

3349 3350 3351 3352
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3353

3354
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3355

3356
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3357
  pTableQueryInfo->lastKey = pStatus->lastKey;
3358
  pQuery->status = pStatus->status;
3359
  
H
hjxilinx 已提交
3360
  pTableQueryInfo->win = pStatus->w;
3361
  pQuery->window = pTableQueryInfo->win;
3362 3363
}

H
Haojun Liao 已提交
3364 3365 3366 3367 3368 3369 3370
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3371
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3372
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3373
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3374 3375
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3376
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3377

3378
  // store the start query position
H
Haojun Liao 已提交
3379
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3380

3381 3382
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3383

3384 3385
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3386

3387 3388
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3389 3390 3391 3392 3393 3394

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3395
      qstatus.lastKey = pTableQueryInfo->lastKey;
3396
    }
3397

3398
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3399
      // restore the status code and jump out of loop
3400
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3401
        pQuery->status = qstatus.status;
3402
      }
3403

3404 3405
      break;
    }
3406

3407
    STsdbQueryCond cond = {
H
hjxilinx 已提交
3408
        .order   = pQuery->order.order,
3409
        .colList = pQuery->colList,
3410
        .numOfCols = pQuery->numOfCols,
3411
    };
3412

S
TD-1057  
Shengliang Guan 已提交
3413 3414
    TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow);

3415 3416
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3417
    }
3418

H
Haojun Liao 已提交
3419
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3420
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3421 3422 3423
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3424

3425
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3426 3427
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3428
    
3429
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3430
        cond.twindow.skey, cond.twindow.ekey);
3431

3432
    // check if query is killed or not
H
Haojun Liao 已提交
3433
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3434 3435
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3436 3437
    }
  }
3438

H
hjxilinx 已提交
3439
  if (!needReverseScan(pQuery)) {
3440 3441
    return;
  }
3442

3443
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3444

3445
  // reverse scan from current position
3446
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3447
  doScanAllDataBlocks(pRuntimeEnv);
3448 3449

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3450 3451
}

H
hjxilinx 已提交
3452
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3453
  SQuery *pQuery = pRuntimeEnv->pQuery;
3454

H
Haojun Liao 已提交
3455
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3456 3457
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3458
    if (pRuntimeEnv->groupbyNormalCol) {
3459 3460
      closeAllTimeWindow(pWindowResInfo);
    }
3461

3462 3463 3464 3465 3466
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3467

3468
      setWindowResOutputBuf(pRuntimeEnv, buf);
3469

3470
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3471
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3472
      }
3473

3474 3475 3476 3477
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
S
TD-1057  
Shengliang Guan 已提交
3478
      buf->numOfRows = (uint16_t)getNumOfResult(pRuntimeEnv);
3479
    }
3480

3481
  } else {
3482
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3483
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3484 3485 3486 3487 3488
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3489
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3490
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3491

3492 3493 3494 3495
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3496

3497 3498 3499
  return false;
}

H
Haojun Liao 已提交
3500
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3501
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3502

H
Haojun Liao 已提交
3503
  STableQueryInfo *pTableQueryInfo = buf;
3504

H
hjxilinx 已提交
3505 3506
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3507

3508
  pTableQueryInfo->pTable = pTable;
3509
  pTableQueryInfo->cur.vgroupIndex = -1;
3510

H
Haojun Liao 已提交
3511 3512
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3513
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3514
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3515 3516 3517 3518
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3519
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3520 3521
  }

3522 3523 3524
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3525
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo) {
3526 3527 3528
  if (pTableQueryInfo == NULL) {
    return;
  }
3529

H
Haojun Liao 已提交
3530
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3531 3532
}

H
Haojun Liao 已提交
3533 3534 3535 3536
#define CHECK_QUERY_TIME_RANGE(_q, _tableInfo)                                              \
  do {                                                                                      \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_q)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_q))); \
H
Haojun Liao 已提交
3537
  } while (0)
3538 3539 3540 3541

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3542
 * @param pDataBlockInfo
3543
 */
H
Haojun Liao 已提交
3544
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3545
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3546 3547 3548
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3549 3550
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3551 3552 3553 3554

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3555

H
Haojun Liao 已提交
3556 3557 3558
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3559

3560 3561
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3562 3563 3564
  if (pWindowRes == NULL) {
    return;
  }
3565

3566 3567 3568 3569 3570
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3571
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3572 3573 3574 3575
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3576

H
Haojun Liao 已提交
3577 3578
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3579 3580 3581 3582
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3583
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3584
  SQuery *pQuery = pRuntimeEnv->pQuery;
3585

3586
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3587 3588
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3589
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3590
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3591
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3592

3593
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3594 3595 3596
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3597

3598 3599 3600 3601 3602
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3603

3604 3605 3606 3607 3608 3609
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3610 3611
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3612

H
Haojun Liao 已提交
3613
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3614 3615
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3616 3617 3618 3619
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3620
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3621 3622
      continue;
    }
3623

H
Haojun Liao 已提交
3624
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3625
    pCtx->currentStage = 0;
3626

H
Haojun Liao 已提交
3627 3628 3629 3630
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3631

H
Haojun Liao 已提交
3632 3633 3634 3635 3636
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3637

H
Haojun Liao 已提交
3638 3639 3640 3641 3642 3643
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3644
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3645
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3646

3647
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3648

3649 3650
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3651
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3652 3653
      tVariantAssign(&pTableQueryInfo->tag, &pRuntimeEnv->pCtx[0].tag);
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pTableQueryInfo->tag);
3654

3655 3656 3657 3658 3659 3660
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3661

3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3674
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3675 3676
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3677 3678
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3679 3680 3681
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3682
    pTableQueryInfo->win.skey = key;
3683
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3684

3685 3686 3687 3688 3689
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3690

3691 3692 3693 3694 3695 3696
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3697
    STimeWindow     w = TSWINDOW_INITIALIZER;
3698
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3699

H
Haojun Liao 已提交
3700 3701
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3702
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3703
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3704

3705 3706
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3707
        assert(win.ekey == pQuery->window.ekey);
3708
      }
3709 3710
      
      pWindowResInfo->prevSKey = w.skey;
3711
    }
3712

3713
    pTableQueryInfo->queryRangeSet = 1;
3714
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3715 3716 3717 3718
  }
}

bool requireTimestamp(SQuery *pQuery) {
3719
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3720
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3734 3735 3736
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3737 3738
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3739 3740 3741
  return loadPrimaryTS;
}

3742
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3743 3744
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3745

3746 3747 3748
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3749

3750
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3751 3752
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3753

3754
  if (orderType == TSDB_ORDER_ASC) {
3755
    startIdx = pQInfo->groupIndex;
3756 3757
    step = 1;
  } else {  // desc order copy all data
3758
    startIdx = totalSet - pQInfo->groupIndex - 1;
3759 3760
    step = -1;
  }
3761

3762
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3763 3764
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3765
      pQInfo->groupIndex += 1;
3766 3767
      continue;
    }
3768

dengyihao's avatar
dengyihao 已提交
3769
    assert(pQInfo->offset <= 1);
3770

3771 3772
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3773

3774 3775 3776 3777
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3778
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
S
TD-1057  
Shengliang Guan 已提交
3779
      numOfRowsToCopy = (int32_t)pQuery->rec.capacity - numOfResult;
3780 3781 3782
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3783
      pQInfo->groupIndex += 1;
3784
    }
3785

H
Haojun Liao 已提交
3786 3787
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

3788
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3789
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3790

3791
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
3792
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
3793 3794
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3795

3796
    numOfResult += numOfRowsToCopy;
3797 3798 3799
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3800
  }
3801

3802
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3803 3804

#ifdef _DEBUG_VIEW
3805
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
3819
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
3820
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3821

3822
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3823
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
3824

3825
  pQuery->rec.rows += numOfResult;
3826

3827
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3828 3829
}

H
Haojun Liao 已提交
3830
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
3831
  SQuery *pQuery = pRuntimeEnv->pQuery;
3832

3833
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3834 3835 3836
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
3837

H
Haojun Liao 已提交
3838 3839
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
3840

H
Haojun Liao 已提交
3841 3842 3843 3844
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
3845
      }
H
Haojun Liao 已提交
3846 3847

      pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
3848 3849 3850 3851
    }
  }
}

H
Haojun Liao 已提交
3852
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
3853
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3854
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3855 3856
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3857
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3858
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3859

H
Haojun Liao 已提交
3860
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3861
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3862
  } else {
3863
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3864 3865 3866
  }
}

H
Haojun Liao 已提交
3867
bool queryHasRemainResForTableQuery(SQueryRuntimeEnv* pRuntimeEnv) {
3868 3869
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3870

H
Haojun Liao 已提交
3871
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3872 3873
    return false;
  }
3874

3875
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3876
    // There are results not returned to client yet, so filling applied to the remain result is required firstly.
H
Haojun Liao 已提交
3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
S
TD-1057  
Shengliang Guan 已提交
3892
      int32_t numOfTotal = (int32_t)getFilledNumOfRes(pFillInfo, pQuery->window.ekey, (int32_t)pQuery->rec.capacity);
H
Haojun Liao 已提交
3893 3894 3895 3896 3897 3898
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3899
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3900 3901 3902
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3903
  }
3904 3905

  return false;
3906 3907 3908
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3909
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3910

3911 3912
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3913

3914 3915 3916
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3917

weixin_48148422's avatar
weixin_48148422 已提交
3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
Haojun Liao 已提交
3930
  // Check if query is completed or not for stable query or normal table query respectively.
H
hjxilinx 已提交
3931
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3932
    if (pQInfo->runtimeEnv.stableQuery) {
H
Haojun Liao 已提交
3933
      if (IS_STASBLE_QUERY_OVER(pQInfo)) {
3934 3935 3936
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
H
Haojun Liao 已提交
3937
      if (!queryHasRemainResForTableQuery(&pQInfo->runtimeEnv)) {
3938 3939
        setQueryStatus(pQuery, QUERY_OVER);
      }
3940
    }
H
hjxilinx 已提交
3941
  }
3942 3943
}

H
Haojun Liao 已提交
3944
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3945
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3946
  SQuery *pQuery = pRuntimeEnv->pQuery;
3947 3948
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3949
  while (1) {
S
TD-1057  
Shengliang Guan 已提交
3950
    int32_t ret = (int32_t)taosGenerateDataBlock(pFillInfo, (tFilePage**)pQuery->sdata, (int32_t)pQuery->rec.capacity);
3951
    
3952
    // todo apply limit output function
3953 3954
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3955
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3956 3957
      return ret;
    }
3958

3959
    if (pQuery->limit.offset < ret) {
3960
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3961 3962
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
S
TD-1057  
Shengliang Guan 已提交
3963
      ret -= (int32_t)pQuery->limit.offset;
3964 3965
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3966 3967 3968
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3969
      }
3970
      
3971 3972 3973
      pQuery->limit.offset = 0;
      return ret;
    } else {
3974
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3975
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3976 3977
          pQuery->limit.offset - ret);
      
3978
      pQuery->limit.offset -= ret;
3979
      pQuery->rec.rows = 0;
3980 3981
      ret = 0;
    }
3982

H
Haojun Liao 已提交
3983
    if (!queryHasRemainResForTableQuery(pRuntimeEnv)) {
3984 3985 3986 3987 3988
      return ret;
    }
  }
}

3989
static void queryCostStatis(SQInfo *pQInfo) {
3990
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3991
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
3992 3993

  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, total blocks:%d, load block statis:%d,"
H
Haojun Liao 已提交
3994
         " load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
H
Haojun Liao 已提交
3995
         pQInfo, pSummary->elapsedTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
3996
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3997 3998

  qDebug("QInfo:%p :cost summary: internal size:%"PRId64, pQInfo, pSummary->internalSupSize);
3999 4000
}

4001 4002
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
4003 4004
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
4005
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
4006

4007
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
4008
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
4009 4010 4011
    pQuery->limit.offset = 0;
    return;
  }
4012

4013
  if (QUERY_IS_ASC_QUERY(pQuery)) {
S
TD-1057  
Shengliang Guan 已提交
4014
    pQuery->pos = (int32_t)pQuery->limit.offset;
4015
  } else {
S
TD-1057  
Shengliang Guan 已提交
4016
    pQuery->pos = pBlockInfo->rows - (int32_t)pQuery->limit.offset - 1;
4017
  }
4018

4019
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
4020

4021
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
4022
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
4023

4024
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
4025
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
4026 4027

  // update the offset value
H
hjxilinx 已提交
4028
  pTableQueryInfo->lastKey = keys[pQuery->pos];
4029
  pQuery->limit.offset = 0;
4030

H
hjxilinx 已提交
4031
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
4032

4033
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
4034
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
4035
}
4036

4037 4038 4039 4040 4041
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
4042
  }
4043

4044 4045 4046
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4047
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4048
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4049

H
Haojun Liao 已提交
4050
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4051
  while (tsdbNextDataBlock(pQueryHandle)) {
H
Haojun Liao 已提交
4052
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4053 4054
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4055
    }
4056

H
Haojun Liao 已提交
4057
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4058

4059 4060
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4061 4062
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4063

4064
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4065 4066
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4067 4068 4069
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4070
  }
H
Haojun Liao 已提交
4071 4072 4073 4074

  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4075
}
4076

H
Haojun Liao 已提交
4077
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4078
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4079
  *start = pQuery->current->lastKey;
4080

4081
  // if queried with value filter, do NOT forward query start position
4082
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4083
    return true;
4084
  }
4085

4086 4087 4088 4089 4090
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4091
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4092

H
Haojun Liao 已提交
4093
  STimeWindow w = TSWINDOW_INITIALIZER;
4094
  
4095
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4096
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4097

H
Haojun Liao 已提交
4098
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4099
  while (tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
H
Haojun Liao 已提交
4100
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4101

H
Haojun Liao 已提交
4102 4103
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4104
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4105 4106 4107
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4108
    } else {
H
Haojun Liao 已提交
4109
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4110

4111 4112 4113
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4114

4115 4116
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4117

4118 4119 4120 4121 4122 4123
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4124

4125
      STimeWindow tw = win;
H
Haojun Liao 已提交
4126
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4127

4128
      if (pQuery->limit.offset == 0) {
4129 4130
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4131 4132
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4133 4134 4135
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4136 4137
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4138
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4139 4140 4141 4142
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
4143 4144 4145 4146 4147 4148
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4149
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4150 4151
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4152
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4153 4154
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4155
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4156 4157
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4158
          return true;
H
Haojun Liao 已提交
4159 4160 4161 4162
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4163
          return true;
4164 4165 4166
        }
      }

H
Haojun Liao 已提交
4167 4168 4169 4170 4171 4172 4173
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4174 4175 4176 4177 4178 4179 4180
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4181
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4182 4183 4184 4185
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4186
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4187 4188
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4189
      } else {
H
Haojun Liao 已提交
4190
        break;  // offset is not 0, and next time window begins or ends in the next block.
4191 4192 4193
      }
    }
  }
4194

H
Haojun Liao 已提交
4195 4196 4197 4198 4199
  // check for error
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

4200 4201 4202
  return true;
}

B
Bomin Zhang 已提交
4203
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4204
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4205 4206
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4207
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4208
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4209 4210
  }

H
Haojun Liao 已提交
4211
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4212
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4213
  }
4214 4215

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4216 4217 4218
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4219
  };
weixin_48148422's avatar
weixin_48148422 已提交
4220

S
TD-1057  
Shengliang Guan 已提交
4221 4222
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

B
Bomin Zhang 已提交
4223
  if (!isSTableQuery
4224
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4225
    && (cond.order == TSDB_ORDER_ASC) 
H
Haojun Liao 已提交
4226
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4227
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4228
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4229
  ) {
H
Haojun Liao 已提交
4230
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4231 4232
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4233
  }
B
Bomin Zhang 已提交
4234

B
Bomin Zhang 已提交
4235
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4236
  if (isFirstLastRowQuery(pQuery)) {
4237
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4238
  } else if (isPointInterpoQuery(pQuery)) {
4239
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4240
  } else {
4241
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4242
  }
4243

B
Bomin Zhang 已提交
4244
  return terrno;
B
Bomin Zhang 已提交
4245 4246
}

4247 4248 4249 4250 4251 4252 4253 4254 4255
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
S
TD-1057  
Shengliang Guan 已提交
4256
    pFillCol[i].col.type   = (int8_t)pExprInfo->type;
4257 4258 4259
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4260
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4261 4262 4263 4264 4265 4266 4267
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4268
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4269 4270
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4271
  int32_t code = TSDB_CODE_SUCCESS;
4272
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4273

4274
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4275 4276
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4277 4278

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4279
  changeExecuteScanOrder(pQInfo, false);
4280

B
Bomin Zhang 已提交
4281 4282 4283 4284
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4285
  
4286
  pQInfo->tsdb = tsdb;
4287
  pQInfo->vgId = vgId;
4288 4289

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4290
  pRuntimeEnv->pTSBuf = pTsBuf;
4291
  pRuntimeEnv->cur.vgroupIndex = -1;
4292
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4293
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4294
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4295

H
Haojun Liao 已提交
4296
  if (pTsBuf != NULL) {
4297 4298 4299 4300 4301 4302 4303 4304 4305 4306
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4307 4308 4309
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4310
  int32_t TWOMB = 1024*1024*2;
4311

H
Haojun Liao 已提交
4312
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4313
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4314 4315 4316 4317
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4318
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4319
      int16_t type = TSDB_DATA_TYPE_NULL;
4320
      int32_t threshold = 0;
4321

H
Haojun Liao 已提交
4322
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4323
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4324
        threshold = 4000;
4325 4326
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
S
TD-1057  
Shengliang Guan 已提交
4327
        threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
4328 4329 4330
        if (threshold < 8) {
          threshold = 8;
        }
4331 4332
      }

4333
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4334 4335 4336
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4337
    }
H
Haojun Liao 已提交
4338
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4339 4340
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4341
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4342 4343 4344 4345 4346
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4347
    if (pRuntimeEnv->groupbyNormalCol) {
4348 4349 4350 4351 4352
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4353
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4354 4355 4356
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4357 4358
  }

4359
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4360
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4361 4362 4363 4364 4365 4366
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

S
TD-1057  
Shengliang Guan 已提交
4367 4368
    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, (int32_t)pQuery->rec.capacity, pQuery->numOfOutput,
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, (int8_t)pQuery->precision,
H
Haojun Liao 已提交
4369
                                              pQuery->fillType, pColInfo);
4370
  }
4371

H
Haojun Liao 已提交
4372
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4373
  return TSDB_CODE_SUCCESS;
4374 4375
}

4376
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4377
  SQuery *pQuery = pRuntimeEnv->pQuery;
4378

4379
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4380 4381 4382 4383 4384 4385 4386
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4404
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4405
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4406 4407
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4408
  
H
hjxilinx 已提交
4409
  int64_t st = taosGetTimestampMs();
4410

4411
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4412
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4413

H
Haojun Liao 已提交
4414 4415
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
Haojun Liao 已提交
4416
  while (tsdbNextDataBlock(pQueryHandle)) {
4417
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4418
    
H
Haojun Liao 已提交
4419
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4420
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4421
    }
4422

H
Haojun Liao 已提交
4423
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4424 4425 4426 4427
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4428

H
Haojun Liao 已提交
4429 4430
    pQuery->current = *pTableQueryInfo;
    CHECK_QUERY_TIME_RANGE(pQuery, *pTableQueryInfo);
4431

H
Haojun Liao 已提交
4432
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4433
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4434
    }
4435

H
Haojun Liao 已提交
4436 4437 4438 4439 4440 4441 4442
    SDataStatis *pStatis = NULL;
    SArray *pDataBlock = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }

4443 4444 4445
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4446
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4447
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4448
  }
4449

H
Haojun Liao 已提交
4450 4451 4452 4453
  if (terrno != TSDB_CODE_SUCCESS) {
    longjmp(pRuntimeEnv->env, terrno);
  }

H
Haojun Liao 已提交
4454 4455
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4456 4457
  int64_t et = taosGetTimestampMs();
  return et - st;
4458 4459
}

4460 4461
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4462
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4463

4464
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4465
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4466
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4467

H
Haojun Liao 已提交
4468 4469 4470
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4471

H
Haojun Liao 已提交
4472
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4473
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4474
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4475

4476
  STsdbQueryCond cond = {
4477
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4478 4479
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4480
      .numOfCols = pQuery->numOfCols,
4481
  };
4482

H
hjxilinx 已提交
4483
  // todo refactor
4484
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4485 4486 4487 4488
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4489

4490
  taosArrayPush(g1, &tx);
4491
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4492

4493
  // include only current table
4494 4495 4496 4497
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4498

H
Haojun Liao 已提交
4499
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4500 4501
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4502 4503 4504
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4505

4506
  if (pRuntimeEnv->pTSBuf != NULL) {
4507
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4508
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pRuntimeEnv->pCtx[0].tag);
4509

4510 4511 4512 4513 4514 4515 4516 4517
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4518

4519
  initCtxOutputBuf(pRuntimeEnv);
4520 4521 4522 4523 4524 4525 4526 4527 4528 4529
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4530
static void sequentialTableProcess(SQInfo *pQInfo) {
4531
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4532
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4533
  setQueryStatus(pQuery, QUERY_COMPLETED);
4534

H
Haojun Liao 已提交
4535
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4536

H
Haojun Liao 已提交
4537
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4538 4539
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4540

4541
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4542
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4543

S
TD-1057  
Shengliang Guan 已提交
4544
      qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4545
             numOfGroups, group);
H
Haojun Liao 已提交
4546 4547 4548 4549 4550 4551 4552

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4553 4554
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

H
Haojun Liao 已提交
4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4567
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4568
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4569
      } else {
H
Haojun Liao 已提交
4570
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4571
      }
B
Bomin Zhang 已提交
4572 4573 4574 4575 4576 4577

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4578

H
Haojun Liao 已提交
4579
      initCtxOutputBuf(pRuntimeEnv);
4580
      
4581
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4582 4583
      assert(taosArrayGetSize(s) >= 1);
      
4584
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4585 4586 4587
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4588

dengyihao's avatar
dengyihao 已提交
4589
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4590

H
Haojun Liao 已提交
4591
      // here we simply set the first table as current table
4592 4593 4594
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4595
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4608 4609 4610 4611 4612 4613

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4614
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4615
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4616
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4617

S
TD-1057  
Shengliang Guan 已提交
4618
      qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups);
4619 4620 4621 4622 4623 4624 4625

      STsdbQueryCond cond = {
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

S
TD-1057  
Shengliang Guan 已提交
4626 4627
      TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639
      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4640
      // no need to update the lastkey for each table
4641
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4642

B
Bomin Zhang 已提交
4643 4644
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4645 4646 4647
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4648

4649
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4650 4651
      assert(taosArrayGetSize(s) >= 1);

4652
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4653 4654 4655 4656 4657 4658 4659 4660

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4661
      taosArrayDestroy(s); 
4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4676
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4677 4678 4679 4680 4681 4682 4683
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4684
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4685 4686 4687 4688 4689 4690

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4691 4692 4693
    }
  } else {
    /*
4694
     * 1. super table projection query, 2. ts-comp query
4695 4696 4697
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4698
    if (pQInfo->groupIndex > 0) {
4699
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4700
      pQuery->rec.total += pQuery->rec.rows;
4701

4702
      if (pQuery->rec.rows > 0) {
4703 4704 4705
        return;
      }
    }
4706

4707
    // all data have returned already
4708
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4709 4710
      return;
    }
4711

4712 4713
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4714

H
Haojun Liao 已提交
4715
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4716 4717
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4718

4719
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4720
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4721
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4722
      }
4723

4724
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4725
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4726
        pQInfo->tableIndex++;
4727 4728
        continue;
      }
4729

H
hjxilinx 已提交
4730
      // TODO handle the limit offset problem
4731
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4732 4733
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4734 4735 4736
          continue;
        }
      }
4737

4738
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4739
      skipResults(pRuntimeEnv);
4740

4741
      // the limitation of output result is reached, set the query completed
4742
      if (limitResults(pRuntimeEnv)) {
H
Haojun Liao 已提交
4743
        SET_STABLE_QUERY_OVER(pQInfo);
4744 4745
        break;
      }
4746

4747 4748
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4749

4750
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4751 4752 4753 4754 4755 4756
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4757
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4758

H
Haojun Liao 已提交
4759
        STableIdInfo tidInfo = {0};
4760

H
Haojun Liao 已提交
4761 4762 4763
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4764
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4765 4766
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4767
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
4768
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4769 4770
          break;
        }
4771

4772
      } else {
4773
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4774 4775
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4776 4777
          continue;
        } else {
4778 4779 4780
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4781 4782 4783
        }
      }
    }
H
Haojun Liao 已提交
4784

4785
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4786 4787
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4788
  }
4789

4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4802
    finalizeQueryResult(pRuntimeEnv);
4803
  }
4804

4805 4806 4807
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4808

4809
  qDebug(
S
TD-1057  
Shengliang Guan 已提交
4810
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%" PRIzu ", %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4811
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4812
      pQuery->limit.offset);
4813 4814
}

4815 4816 4817 4818
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4819 4820 4821 4822
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4823
  if (pRuntimeEnv->pTSBuf != NULL) {
4824
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4825
  }
4826 4827 4828 4829 4830 4831
  
  STsdbQueryCond cond = {
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
4832

S
TD-1057  
Shengliang Guan 已提交
4833 4834
  TIME_WINDOW_COPY(cond.twindow, pQuery->window);

4835 4836 4837 4838
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4839

H
Haojun Liao 已提交
4840 4841 4842 4843 4844
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
4845
  pRuntimeEnv->prevGroupId = INT32_MIN;
4846
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
4847 4848 4849
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
4850 4851
}

4852 4853 4854 4855
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4856
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4857

4858
  if (pRuntimeEnv->pTSBuf != NULL) {
4859
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4860
  }
4861

4862
  switchCtxOrder(pRuntimeEnv);
4863 4864 4865
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4866 4867 4868
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4869
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4870
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4871
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4872
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4873

4874
      size_t num = taosArrayGetSize(group);
4875
      for (int32_t j = 0; j < num; ++j) {
4876 4877
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
4878
      }
H
hjxilinx 已提交
4879 4880 4881 4882 4883 4884 4885
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4886 4887 4888
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4889
  if (pQInfo->groupIndex > 0) {
4890
    /*
4891
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4892 4893
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
4894
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4895 4896
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4897
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4898 4899
#endif
    } else {
4900
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4901
    }
4902

4903
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4904 4905
    return;
  }
4906

4907
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4908 4909
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4910
  // do check all qualified data blocks
H
Haojun Liao 已提交
4911
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4912
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4913

H
hjxilinx 已提交
4914
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
4915
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4916
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4917 4918
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4919
  }
4920

H
hjxilinx 已提交
4921 4922
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4923

H
hjxilinx 已提交
4924 4925
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4926

H
Haojun Liao 已提交
4927
    el = scanMultiTableDataBlocks(pQInfo);
4928
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4929

H
Haojun Liao 已提交
4930
//    doCloseAllTimeWindowAfterScan(pQInfo);
H
Haojun Liao 已提交
4931
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
4932
  } else {
4933
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4934
  }
4935

4936
  setQueryStatus(pQuery, QUERY_COMPLETED);
4937

H
Haojun Liao 已提交
4938
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4939
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4940 4941
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
4942
  }
4943

H
Haojun Liao 已提交
4944
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4945
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4946
      copyResToQueryResultBuf(pQInfo, pQuery);
4947 4948

#ifdef _DEBUG_VIEW
4949
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4950 4951 4952
#endif
    }
  } else {  // not a interval query
4953
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4954
  }
4955

4956
  // handle the limitation of output buffer
4957
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4958 4959 4960 4961 4962 4963 4964 4965
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4966
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4967
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4968 4969
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4970
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
4971 4972 4973
    return;
  }
  
H
hjxilinx 已提交
4974 4975
  pQuery->current = pTableInfo;  // set current query table info
  
4976
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4977
  finalizeQueryResult(pRuntimeEnv);
4978

H
Haojun Liao 已提交
4979
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4980 4981
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4982
  }
4983

H
Haojun Liao 已提交
4984
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4985
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4986

4987
  skipResults(pRuntimeEnv);
4988
  limitResults(pRuntimeEnv);
4989 4990
}

H
hjxilinx 已提交
4991
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4992
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4993 4994 4995 4996
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4997 4998 4999 5000
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
5001

5002 5003 5004 5005 5006 5007
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
5008 5009

  while (1) {
5010
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
5011
    finalizeQueryResult(pRuntimeEnv);
5012

5013 5014
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
5015
      skipResults(pRuntimeEnv);
5016 5017 5018
    }

    /*
H
hjxilinx 已提交
5019 5020
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
5021
     */
5022
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5023 5024 5025
      break;
    }

5026
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
5027
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
5028 5029 5030 5031

    resetCtxOutputBuf(pRuntimeEnv);
  }

5032
  limitResults(pRuntimeEnv);
5033
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
5034
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
5035
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
5036 5037
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
5038
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5039

H
Haojun Liao 已提交
5040 5041
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5042 5043
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5044 5045
  }

5046 5047 5048
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5049 5050
}

H
Haojun Liao 已提交
5051
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5052
  SQuery *pQuery = pRuntimeEnv->pQuery;
5053

5054
  while (1) {
5055
    scanOneTableDataBlocks(pRuntimeEnv, start);
5056

5057
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5058
    finalizeQueryResult(pRuntimeEnv);
5059

5060 5061 5062
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5063
        pQuery->fillType == TSDB_FILL_NONE) {
5064 5065
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5066

S
TD-1057  
Shengliang Guan 已提交
5067
      int32_t c = (int32_t)(MIN(numOfClosed, pQuery->limit.offset));
5068 5069 5070
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5071

5072
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5073 5074 5075 5076 5077
      break;
    }
  }
}

5078
// handle time interval query on table
H
hjxilinx 已提交
5079
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5080 5081
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5082 5083
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5084

H
Haojun Liao 已提交
5085
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5086 5087
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
5088
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5089
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5090
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5091 5092 5093 5094
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5095
  while (1) {
H
Haojun Liao 已提交
5096
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5097

H
Haojun Liao 已提交
5098
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5099
      pQInfo->groupIndex = 0;  // always start from 0
5100
      pQuery->rec.rows = 0;
5101
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5102

5103
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5104
    }
5105

5106
    // the offset is handled at prepare stage if no interpolation involved
5107
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5108
      limitResults(pRuntimeEnv);
5109 5110
      break;
    } else {
S
TD-1057  
Shengliang Guan 已提交
5111
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, (int32_t)pQuery->rec.rows, pQuery->window.ekey);
5112
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5113
      numOfFilled = 0;
5114
      
H
Haojun Liao 已提交
5115
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5116
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5117
        limitResults(pRuntimeEnv);
5118 5119
        break;
      }
5120

5121
      // no result generated yet, continue retrieve data
5122
      pQuery->rec.rows = 0;
5123 5124
    }
  }
5125

5126
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5127
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5128
    pQInfo->groupIndex = 0;
5129
    pQuery->rec.rows = 0;
5130
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5131
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5132 5133 5134
  }
}

5135 5136 5137 5138
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
5139
  if (queryHasRemainResForTableQuery(pRuntimeEnv)) {
5140

H
Haojun Liao 已提交
5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5153
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5154
      return;
H
Haojun Liao 已提交
5155
    } else {
5156
      pQuery->rec.rows = 0;
5157
      pQInfo->groupIndex = 0;  // always start from 0
5158

5159
      if (pRuntimeEnv->windowResInfo.size > 0) {
5160
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5161
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5162

5163
        if (pQuery->rec.rows > 0) {
5164
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5165 5166 5167

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5168
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5169 5170
          }

5171 5172 5173 5174 5175
          return;
        }
      }
    }
  }
5176

H
hjxilinx 已提交
5177
  // number of points returned during this query
5178
  pQuery->rec.rows = 0;
5179
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
5180
  
5181
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5182
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5183
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
5184
  
5185
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5186
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5187
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5188
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5189
    tableFixedOutputProcess(pQInfo, item);
5190 5191
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5192
    tableMultiOutputProcess(pQInfo, item);
5193
  }
5194

5195
  // record the total elapsed time
5196
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5197
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5198 5199
}

5200
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5201 5202
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5203
  pQuery->rec.rows = 0;
5204

5205
  int64_t st = taosGetTimestampUs();
5206

H
Haojun Liao 已提交
5207
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
H
Haojun Liao 已提交
5208
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5209
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5210
    multiTableQueryProcess(pQInfo);
5211
  } else {
5212
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5213
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5214

5215
    sequentialTableProcess(pQInfo);
H
Haojun Liao 已提交
5216

5217
  }
5218

H
hjxilinx 已提交
5219
  // record the total elapsed time
5220
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5221 5222
}

5223
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5224
  int32_t j = 0;
5225

5226
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5227 5228 5229 5230
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5231 5232 5233 5234
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5235

5236 5237
      j += 1;
    }
5238

H
Haojun Liao 已提交
5239 5240
  } else if (pExprMsg->colInfo.flag == TSDB_COL_UDC) {  // user specified column data
    return TSDB_UD_COLUMN_INDEX;
5241 5242 5243 5244 5245
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5246

5247
      j += 1;
5248 5249 5250
    }
  }

5251
  assert(0);
5252 5253
}

5254 5255 5256
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5257 5258
}

5259
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5260
  if (pQueryMsg->intervalTime < 0) {
5261
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5262
    return false;
5263 5264
  }

H
hjxilinx 已提交
5265
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5266
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5267
    return false;
5268 5269
  }

H
hjxilinx 已提交
5270
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5271
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5272
    return false;
5273 5274
  }

5275 5276
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5277
    return false;
5278 5279
  }

5280 5281 5282 5283 5284 5285 5286 5287 5288 5289
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5290 5291 5292 5293 5294
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5295
        continue;
5296
      }
5297

5298
      return false;
5299 5300
    }
  }
5301

5302
  return true;
5303 5304
}

5305
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5306
  assert(pQueryMsg->numOfTables > 0);
5307

weixin_48148422's avatar
weixin_48148422 已提交
5308
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5309

weixin_48148422's avatar
weixin_48148422 已提交
5310 5311
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5312

5313
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5314 5315
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5316

H
hjxilinx 已提交
5317 5318 5319
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5320

H
hjxilinx 已提交
5321 5322
  return pMsg;
}
5323

5324
/**
H
hjxilinx 已提交
5325
 * pQueryMsg->head has been converted before this function is called.
5326
 *
H
hjxilinx 已提交
5327
 * @param pQueryMsg
5328 5329 5330 5331
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5332
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5333
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5334 5335
  int32_t code = TSDB_CODE_SUCCESS;

5336 5337 5338 5339 5340 5341 5342 5343
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5344

5345 5346
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5347
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5348
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5349 5350

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5351
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5352
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5353 5354 5355
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5356
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5357
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5358
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5359

5360
  // query msg safety check
5361
  if (!validateQueryMsg(pQueryMsg)) {
5362 5363
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5364 5365
  }

H
hjxilinx 已提交
5366 5367
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5368 5369
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5370
    pColInfo->colId = htons(pColInfo->colId);
5371
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5372 5373
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5374

H
hjxilinx 已提交
5375
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5376

H
hjxilinx 已提交
5377
    int32_t numOfFilters = pColInfo->numOfFilters;
5378
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5379
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5380 5381 5382
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5383 5384 5385 5386
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5387 5388 5389

      pMsg += sizeof(SColumnFilterInfo);

5390 5391
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5392

5393
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5394 5395
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5396
      } else {
5397 5398
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5399 5400
      }

5401 5402
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5403 5404 5405
    }
  }

5406 5407
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5408

5409
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5410
    (*pExpr)[i] = pExprMsg;
5411

5412
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5413 5414 5415 5416
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5417

5418
    pMsg += sizeof(SSqlFuncMsg);
5419 5420

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5421
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5422 5423 5424 5425
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5426
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5427 5428 5429 5430 5431
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5432 5433
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5434
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5435 5436
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5437 5438
      }
    } else {
5439
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5440
//        return TSDB_CODE_QRY_INVALID_MSG;
5441
//      }
5442 5443
    }

5444
    pExprMsg = (SSqlFuncMsg *)pMsg;
5445
  }
5446

5447
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5448
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5449
    goto _cleanup;
5450
  }
5451

H
hjxilinx 已提交
5452
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5453

H
hjxilinx 已提交
5454
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5455
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5456 5457 5458 5459
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5460 5461 5462

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5463
      pMsg += sizeof((*groupbyCols)[i].colId);
5464 5465

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5466 5467
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5468
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5469 5470 5471 5472 5473
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5474

H
hjxilinx 已提交
5475 5476
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5477 5478
  }

5479 5480
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5481
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5482 5483

    int64_t *v = (int64_t *)pMsg;
5484
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5485 5486
      v[i] = htobe64(v[i]);
    }
5487

5488
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5489
  }
5490

5491 5492 5493 5494
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5495

5496 5497 5498 5499
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5500

5501
      (*tagCols)[i] = *pTagCol;
5502
      pMsg += sizeof(SColumnInfo);
5503
    }
H
hjxilinx 已提交
5504
  }
5505

5506 5507 5508 5509 5510 5511
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5512

weixin_48148422's avatar
weixin_48148422 已提交
5513
  if (*pMsg != 0) {
5514
    size_t len = strlen(pMsg) + 1;
5515

5516
    *tbnameCond = malloc(len);
5517 5518 5519 5520 5521
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5522
    strcpy(*tbnameCond, pMsg);
5523
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5524
  }
5525

5526
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5527 5528
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5529
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5530
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5531 5532

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5533 5534

_cleanup:
S
Shengliang Guan 已提交
5535
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5536 5537
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5538 5539 5540 5541
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5542 5543

  return code;
5544 5545
}

H
hjxilinx 已提交
5546
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5547
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5548 5549

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5550
  TRY(TSDB_MAX_TAG_CONDITIONS) {
weixin_48148422's avatar
weixin_48148422 已提交
5551 5552 5553
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5554
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5555 5556 5557
    return code;
  } END_TRY

H
hjxilinx 已提交
5558
  if (pExprNode == NULL) {
5559
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5560
    return TSDB_CODE_QRY_APP_ERROR;
5561
  }
5562

5563
  pArithExprInfo->pExpr = pExprNode;
5564 5565 5566
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5567
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5568 5569
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5570
  int32_t code = TSDB_CODE_SUCCESS;
5571

H
Haojun Liao 已提交
5572
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5573
  if (pExprs == NULL) {
5574
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5575 5576 5577 5578 5579
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5580
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5581
    pExprs[i].base = *pExprMsg[i];
5582
    pExprs[i].bytes = 0;
5583 5584 5585 5586

    int16_t type = 0;
    int16_t bytes = 0;

5587
    // parse the arithmetic expression
5588
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5589
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5590

5591
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5592
        taosTFree(pExprs);
5593
        return code;
5594 5595
      }

5596
      type  = TSDB_DATA_TYPE_DOUBLE;
5597
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5598
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5599
      SSchema s = tGetTableNameColumnSchema();
H
Haojun Liao 已提交
5600
      type = s.type;
H
Haojun Liao 已提交
5601
      bytes = s.bytes;
H
Haojun Liao 已提交
5602 5603 5604 5605 5606 5607 5608 5609 5610 5611
    } else if (pExprs[i].base.colInfo.colId == TSDB_UD_COLUMN_INDEX) {
      assert(pExprs[i].base.functionId == TSDB_FUNC_PRJ);

      type = pExprs[i].base.arg[0].argType;
      bytes = pExprs[i].base.arg[0].argBytes;

      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        bytes += VARSTR_HEADER_SIZE;
      }
    } else {
5612
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5613
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5614

dengyihao's avatar
dengyihao 已提交
5615
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5616 5617 5618 5619
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5620
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5621

H
Haojun Liao 已提交
5622 5623 5624
        type  = s.type;
        bytes = s.bytes;
      }
5625 5626
    }

S
TD-1057  
Shengliang Guan 已提交
5627
    int32_t param = (int32_t)pExprs[i].base.arg[0].argValue.i64;
5628
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5629
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5630
      taosTFree(pExprs);
5631
      return TSDB_CODE_QRY_INVALID_MSG;
5632 5633
    }

5634
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5635
      tagLen += pExprs[i].bytes;
5636
    }
5637
    assert(isValidDataType(pExprs[i].type));
5638 5639 5640
  }

  // TODO refactor
5641
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5642 5643
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5644

5645
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5646
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5647 5648 5649 5650 5651
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
S
TD-1057  
Shengliang Guan 已提交
5652
        getResultDataInfo(pCol->type, pCol->bytes, functId, (int32_t)pExprs[i].base.arg[0].argValue.i64,
5653
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5654 5655 5656
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5657
  *pExprInfo = pExprs;
5658 5659 5660 5661

  return TSDB_CODE_SUCCESS;
}

5662
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5663 5664 5665 5666 5667
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5668
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5669
  if (pGroupbyExpr == NULL) {
5670
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5671 5672 5673 5674 5675 5676 5677
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5678 5679 5680 5681
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5682

5683 5684 5685
  return pGroupbyExpr;
}

5686
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5687
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5688
    if (pQuery->colList[i].numOfFilters > 0) {
5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5700
    if (pQuery->colList[i].numOfFilters > 0) {
5701 5702
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5703
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5704
      pFilterInfo->info = pQuery->colList[i];
5705

5706
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5707 5708 5709 5710
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5711
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5712 5713 5714 5715 5716

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5717
          qError("QInfo:%p invalid filter info", pQInfo);
5718
          return TSDB_CODE_QRY_INVALID_MSG;
5719 5720
        }

5721 5722
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5723

5724 5725 5726
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5727 5728

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5729
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5730
          return TSDB_CODE_QRY_INVALID_MSG;
5731 5732
        }

5733
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5734
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5735
          assert(rangeFilterArray != NULL);
5736
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5750
          assert(filterArray != NULL);
5751 5752 5753 5754
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5755
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5756
              return TSDB_CODE_QRY_INVALID_MSG;
5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5773
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5774
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5775

5776
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5777
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5778
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5779 5780
      continue;
    }
5781

5782
    // todo opt performance
H
Haojun Liao 已提交
5783
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
H
Haojun Liao 已提交
5784
    if (TSDB_COL_IS_NORMAL_COL(pColIndex->flag)) {
5785 5786
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5787 5788
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5789 5790 5791
          break;
        }
      }
H
Haojun Liao 已提交
5792 5793 5794 5795

      assert(f < pQuery->numOfCols);
    } else if (pColIndex->colId == TSDB_UD_COLUMN_INDEX) {
      // do nothing
5796
    } else {
5797 5798
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5799 5800
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5801 5802
          break;
        }
5803
      }
5804 5805
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5806 5807 5808 5809
    }
  }
}

5810
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5811 5812 5813 5814 5815 5816 5817
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5818 5819
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
5820 5821 5822
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
S
TD-1057  
Shengliang Guan 已提交
5823
  const float RESULT_THRESHOLD_RATIO = 0.85f;
H
Haojun Liao 已提交
5824

5825 5826 5827 5828 5829
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
5830

5831
    pQuery->rec.capacity  = numOfRes;
S
TD-1057  
Shengliang Guan 已提交
5832
    pQuery->rec.threshold = (int32_t)(numOfRes * RESULT_THRESHOLD_RATIO);
5833 5834
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
S
TD-1057  
Shengliang Guan 已提交
5835
    pQuery->rec.threshold = (int32_t)(pQuery->rec.capacity * RESULT_THRESHOLD_RATIO);
5836
  }
H
Haojun Liao 已提交
5837 5838
}

weixin_48148422's avatar
weixin_48148422 已提交
5839
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5840
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
B
Bomin Zhang 已提交
5841 5842 5843
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

5844 5845
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
5846
    goto _cleanup_qinfo;
5847
  }
5848

B
Bomin Zhang 已提交
5849 5850 5851
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
5852 5853

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
5854 5855 5856
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
5857 5858
  pQInfo->runtimeEnv.pQuery = pQuery;

5859
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5860
  pQuery->numOfOutput     = numOfOutput;
5861 5862 5863
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5864
  pQuery->order.orderColId = pQueryMsg->orderColId;
5865 5866 5867 5868
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5869
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5870
  pQuery->fillType        = pQueryMsg->fillType;
5871
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
5872
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
5873

5874
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5875
  if (pQuery->colList == NULL) {
5876
    goto _cleanup;
5877
  }
5878

H
hjxilinx 已提交
5879
  for (int16_t i = 0; i < numOfCols; ++i) {
5880
    pQuery->colList[i] = pQueryMsg->colList[i];
5881
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5882
  }
5883

5884
  // calculate the result row size
5885 5886 5887
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5888
  }
5889

5890
  doUpdateExprColumnIndex(pQuery);
5891

5892
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5893
  if (ret != TSDB_CODE_SUCCESS) {
5894
    goto _cleanup;
5895 5896 5897
  }

  // prepare the result buffer
5898
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5899
  if (pQuery->sdata == NULL) {
5900
    goto _cleanup;
5901 5902
  }

H
Haojun Liao 已提交
5903
  calResultBufSize(pQuery);
5904

5905
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5906
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5907 5908

    // allocate additional memory for interResults that are usually larger then final results
5909 5910
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5911
    if (pQuery->sdata[col] == NULL) {
5912
      goto _cleanup;
5913 5914 5915
    }
  }

5916
  if (pQuery->fillType != TSDB_FILL_NONE) {
5917 5918
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5919
      goto _cleanup;
5920 5921 5922
    }

    // the first column is the timestamp
5923
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5924 5925
  }

dengyihao's avatar
dengyihao 已提交
5926 5927 5928 5929 5930 5931
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5932
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
5933
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
5934
  }
5935

weixin_48148422's avatar
weixin_48148422 已提交
5936 5937
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5938
  taosArraySort(pTableIdList, compareTableIdInfo);
5939

H
Haojun Liao 已提交
5940
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
5941 5942 5943
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5944
  for(int32_t i = 0; i < numOfGroups; ++i) {
5945
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5946

H
Haojun Liao 已提交
5947
    size_t s = taosArrayGetSize(pa);
5948
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
5949 5950 5951
    if (p1 == NULL) {
      goto _cleanup;
    }
5952

H
hjxilinx 已提交
5953
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
5954 5955
      STableKeyInfo* info = taosArrayGet(pa, j);
      STableId* id = TSDB_TABLEID(info->pTable);
5956

H
Haojun Liao 已提交
5957
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5958 5959 5960
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5961
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5962
      }
5963

S
TD-1057  
Shengliang Guan 已提交
5964
      void* buf = (char*)pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
5965
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
5966 5967 5968
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
5969

5970
      item->groupIndex = i;
H
hjxilinx 已提交
5971
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5972 5973
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5974
    }
5975

5976
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5977
  }
5978

weixin_48148422's avatar
weixin_48148422 已提交
5979
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
5980 5981
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
  pthread_mutex_init(&pQInfo->lock, NULL);
weixin_48148422's avatar
weixin_48148422 已提交
5982

5983
  pQuery->pos = -1;
5984
  pQuery->window = pQueryMsg->window;
5985
  colIdCheck(pQuery);
5986

5987
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5988 5989
  return pQInfo;

B
Bomin Zhang 已提交
5990
_cleanup_qinfo:
H
Haojun Liao 已提交
5991
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
5992 5993

_cleanup_query:
5994 5995 5996 5997
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
5998

S
Shengliang Guan 已提交
5999
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
6000 6001 6002 6003 6004 6005
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
6006

S
Shengliang Guan 已提交
6007
  taosTFree(pExprs);
B
Bomin Zhang 已提交
6008

6009
_cleanup:
dengyihao's avatar
dengyihao 已提交
6010
  freeQInfo(pQInfo);
6011 6012 6013
  return NULL;
}

H
hjxilinx 已提交
6014
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
6015 6016 6017 6018
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
6019

H
hjxilinx 已提交
6020 6021 6022 6023
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
6024
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
6025 6026 6027
  return (sig == (uint64_t)pQInfo);
}

6028
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
6029
  int32_t code = TSDB_CODE_SUCCESS;
6030
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6031

H
hjxilinx 已提交
6032 6033
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
6034
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
6035
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
6036

H
hjxilinx 已提交
6037
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
6038 6039
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
6040
  }
6041

6042 6043
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
6044
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
6045
           pQuery->window.ekey, pQuery->order.order);
6046
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
6047
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
6048 6049
    return TSDB_CODE_SUCCESS;
  }
6050

6051
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6052
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6053 6054 6055
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6056 6057

  // filter the qualified
6058
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6059 6060
    goto _error;
  }
H
hjxilinx 已提交
6061
  
H
hjxilinx 已提交
6062 6063 6064 6065
  return code;

_error:
  // table query ref will be decrease during error handling
6066
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6067 6068 6069
  return code;
}

B
Bomin Zhang 已提交
6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
6082 6083 6084 6085
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6086 6087

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6088
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6089
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
Shengliang Guan 已提交
6090
    taosTFree(pQuery->sdata[col]);
H
hjxilinx 已提交
6091
  }
6092

6093
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6094

H
hjxilinx 已提交
6095 6096 6097
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
S
Shengliang Guan 已提交
6098
      taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6099 6100
    }
  }
6101

H
hjxilinx 已提交
6102
  if (pQuery->pSelectExpr != NULL) {
6103
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
6104
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
6105

H
hjxilinx 已提交
6106 6107 6108
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
6109
    }
6110

S
Shengliang Guan 已提交
6111
    taosTFree(pQuery->pSelectExpr);
H
hjxilinx 已提交
6112
  }
6113

6114
  if (pQuery->fillVal != NULL) {
S
Shengliang Guan 已提交
6115
    taosTFree(pQuery->fillVal);
H
hjxilinx 已提交
6116
  }
6117

6118
  // todo refactor, extract method to destroytableDataInfo
B
Bomin Zhang 已提交
6119
  if (pQInfo->tableqinfoGroupInfo.pGroupList != NULL) {
S
TD-1057  
Shengliang Guan 已提交
6120
    int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo));
B
Bomin Zhang 已提交
6121 6122 6123 6124 6125 6126
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = GET_TABLEGROUP(pQInfo, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
H
Haojun Liao 已提交
6127
        destroyTableQueryInfo(item);
6128
      }
6129

B
Bomin Zhang 已提交
6130 6131
      taosArrayDestroy(p);
    }
H
hjxilinx 已提交
6132
  }
6133

S
Shengliang Guan 已提交
6134
  taosTFree(pQInfo->pBuf);
6135
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
6136
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
H
Haojun Liao 已提交
6137
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6138
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
6139
  
6140 6141
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
S
Shengliang Guan 已提交
6142
    taosTFree(pQuery->pGroupbyExpr);
6143
  }
6144

S
Shengliang Guan 已提交
6145 6146
  taosTFree(pQuery->tagColList);
  taosTFree(pQuery->pFilterInfo);
B
Bomin Zhang 已提交
6147 6148 6149 6150 6151 6152

  if (pQuery->colList != NULL) {
    for (int32_t i = 0; i < pQuery->numOfCols; i++) {
      SColumnInfo* column = pQuery->colList + i;
      freeColumnFilterInfo(column->filters, column->numOfFilters);
    }
S
Shengliang Guan 已提交
6153
    taosTFree(pQuery->colList);
B
Bomin Zhang 已提交
6154 6155
  }

S
Shengliang Guan 已提交
6156 6157
  taosTFree(pQuery->sdata);
  taosTFree(pQuery);
6158
  pQInfo->signature = 0;
6159

6160
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6161

S
Shengliang Guan 已提交
6162
  taosTFree(pQInfo);
H
hjxilinx 已提交
6163 6164
}

H
hjxilinx 已提交
6165
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6166 6167
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6179
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6180 6181 6182 6183
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
6184
  }
H
hjxilinx 已提交
6185
}
6186

H
hjxilinx 已提交
6187 6188 6189
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6190

H
hjxilinx 已提交
6191 6192 6193
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6194

H
hjxilinx 已提交
6195 6196
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
6197 6198
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
6199
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6200
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
6201 6202
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
6203 6204
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
6205
      }
H
Haojun Liao 已提交
6206

H
hjxilinx 已提交
6207 6208 6209
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6210
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6211
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6212
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6213 6214 6215
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
6216
    }
6217

H
hjxilinx 已提交
6218 6219 6220 6221
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6222
  } else {
S
TD-1057  
Shengliang Guan 已提交
6223
    doCopyQueryResultToMsg(pQInfo, (int32_t)pQuery->rec.rows, data);
6224
  }
6225

6226
  pQuery->rec.total += pQuery->rec.rows;
6227
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6228

6229
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6230
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6231 6232 6233
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
6234
  return TSDB_CODE_SUCCESS;
6235 6236
}

6237 6238 6239 6240 6241 6242 6243
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6244
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6245
  assert(pQueryMsg != NULL && tsdb != NULL);
6246 6247

  int32_t code = TSDB_CODE_SUCCESS;
6248

6249 6250 6251 6252 6253 6254 6255 6256
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6257

6258 6259
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6260
    goto _over;
6261 6262
  }

H
hjxilinx 已提交
6263
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6264
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6265
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6266
    goto _over;
6267 6268
  }

H
hjxilinx 已提交
6269
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6270
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6271
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6272
    goto _over;
6273 6274
  }

H
Haojun Liao 已提交
6275
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6276
    goto _over;
6277 6278
  }

dengyihao's avatar
dengyihao 已提交
6279
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6280
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6281
    goto _over;
6282
  }
6283

H
hjxilinx 已提交
6284
  bool isSTableQuery = false;
6285
  STableGroupInfo tableGroupInfo = {0};
6286 6287
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6288
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6289
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6290

6291
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6292
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6293
      goto _over;
6294
    }
H
Haojun Liao 已提交
6295
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6296
    isSTableQuery = true;
H
Haojun Liao 已提交
6297 6298 6299

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6300 6301 6302 6303 6304 6305 6306
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6307 6308

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6309 6310 6311
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6312
      if (code != TSDB_CODE_SUCCESS) {
6313
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6314 6315
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6316
    } else {
6317 6318 6319 6320
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6321

S
TD-1057  
Shengliang Guan 已提交
6322
      qDebug("qmsg:%p query on %" PRIzu " tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6323
    }
6324 6325

    int64_t el = taosGetTimestampUs() - st;
S
TD-1057  
Shengliang Guan 已提交
6326
    qDebug("qmsg:%p tag filter completed, numOfTables:%" PRIzu ", elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6327
  } else {
6328
    assert(0);
6329
  }
6330

6331
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6332 6333 6334 6335
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6336
  if ((*pQInfo) == NULL) {
6337
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6338
    goto _over;
6339
  }
6340

6341
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6342

H
hjxilinx 已提交
6343
_over:
dengyihao's avatar
dengyihao 已提交
6344 6345 6346
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6347 6348
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6349
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6350
  } 
dengyihao's avatar
dengyihao 已提交
6351 6352
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6353
  free(pExprMsg);
H
hjxilinx 已提交
6354
  taosArrayDestroy(pTableIdList);
6355

B
Bomin Zhang 已提交
6356 6357 6358 6359 6360
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6361
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6362 6363 6364 6365
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6366
  // if failed to add ref for all tables in this query, abort current query
6367
  return code;
H
hjxilinx 已提交
6368 6369
}

H
Haojun Liao 已提交
6370
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6371 6372 6373 6374 6375
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6376 6377 6378
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6379 6380
}

6381 6382 6383 6384 6385 6386 6387 6388 6389 6390
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

  pthread_mutex_unlock(&pQInfo->lock);

H
Haojun Liao 已提交
6391
  // clear qhandle owner
6392 6393
  assert(pQInfo->owner == taosGetPthreadId());
  pQInfo->owner = 0;
H
Haojun Liao 已提交
6394

6395 6396 6397
  return buildRes;
}

6398
bool qTableQuery(qinfo_t qinfo) {
6399
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6400
  assert(pQInfo && pQInfo->signature == pQInfo);
6401
  int64_t threadId = taosGetPthreadId();
6402

6403 6404 6405 6406
  int64_t curOwner = 0;
  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
6407
    return false;
H
hjxilinx 已提交
6408
  }
6409

H
Haojun Liao 已提交
6410
  if (IS_QUERY_KILLED(pQInfo)) {
6411
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6412
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6413
  }
6414

6415 6416
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6417 6418
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6419 6420 6421
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6422
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6423 6424
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6425
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6426
    return doBuildResCheck(pQInfo);
6427 6428
  }

6429
  qDebug("QInfo:%p query task is launched", pQInfo);
6430

6431
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6432
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6433
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6434
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6435
  } else if (pQInfo->runtimeEnv.stableQuery) {
6436
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6437
  } else {
6438
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6439
  }
6440

6441
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6442
  if (IS_QUERY_KILLED(pQInfo)) {
6443 6444
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
S
TD-1057  
Shengliang Guan 已提交
6445
    qDebug("QInfo:%p over, %" PRIzu " tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
6446 6447 6448 6449 6450
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6451
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6452 6453
}

6454
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6455 6456
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6457
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6458
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6459
  }
6460

6461
  *buildRes = false;
H
hjxilinx 已提交
6462
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6463
  if (IS_QUERY_KILLED(pQInfo)) {
6464
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6465
    return pQInfo->code;
H
hjxilinx 已提交
6466
  }
6467

6468
  int32_t code = TSDB_CODE_SUCCESS;
6469 6470 6471 6472 6473 6474
  pthread_mutex_lock(&pQInfo->lock);
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6475
    *buildRes = false;
6476
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6477 6478
    pQInfo->rspContext = pRspContext;
  }
6479

6480
  code = pQInfo->code;
6481
  pthread_mutex_unlock(&pQInfo->lock);
6482
  return code;
H
hjxilinx 已提交
6483
}
6484

6485
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6486 6487
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6488
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6489
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6490
  }
6491

6492
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6493 6494
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6495

weixin_48148422's avatar
weixin_48148422 已提交
6496 6497
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6498

S
TD-1057  
Shengliang Guan 已提交
6499
  *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp));
6500

B
Bomin Zhang 已提交
6501 6502
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6503
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6504 6505 6506
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6507

S
TD-1057  
Shengliang Guan 已提交
6508
  (*pRsp)->numOfRows = htonl((int32_t)pQuery->rec.rows);
6509

H
Haojun Liao 已提交
6510
  if (pQInfo->code == TSDB_CODE_SUCCESS) {
6511
    (*pRsp)->offset   = htobe64(pQuery->limit.offset);
6512
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6513
  } else {
6514 6515
    (*pRsp)->offset   = 0;
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6516
  }
6517 6518
  
  (*pRsp)->precision = htons(pQuery->precision);
H
Haojun Liao 已提交
6519 6520
  if (pQuery->rec.rows > 0 && pQInfo->code == TSDB_CODE_SUCCESS) {
    doDumpQueryResult(pQInfo, (*pRsp)->data);
6521
  } else {
H
hjxilinx 已提交
6522
    setQueryStatus(pQuery, QUERY_OVER);
6523
  }
6524

6525
  pQInfo->rspContext = NULL;
6526
  pQInfo->dataReady  = QUERY_RESULT_NOT_READY;
6527

H
Haojun Liao 已提交
6528
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6529
    *continueExec = false;
6530
    (*pRsp)->completed = 1;  // notify no more result to client
6531
  } else {
6532
    *continueExec = true;
6533
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
6534 6535
  }

H
Haojun Liao 已提交
6536
  return pQInfo->code;
6537
}
H
hjxilinx 已提交
6538

6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549
int32_t qQueryCompleted(qinfo_t qinfo) {
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  return IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER);
}

H
Haojun Liao 已提交
6550
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6551 6552 6553 6554 6555 6556 6557
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
6558 6559 6560

  // Wait for the query executing thread being stopped/
  // Once the query is stopped, the owner of qHandle will be cleared immediately.
6561
  while (pQInfo->owner != 0) {
6562 6563 6564
    taosMsleep(100);
  }

H
Haojun Liao 已提交
6565 6566 6567
  return TSDB_CODE_SUCCESS;
}

6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
6584 6585 6586
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6587

H
Haojun Liao 已提交
6588
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6589
  assert(numOfGroup == 0 || numOfGroup == 1);
6590

H
Haojun Liao 已提交
6591
  if (numOfGroup == 0) {
6592 6593
    return;
  }
H
hjxilinx 已提交
6594
  
H
Haojun Liao 已提交
6595
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6596

H
Haojun Liao 已提交
6597
  size_t num = taosArrayGetSize(pa);
6598
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6599

H
Haojun Liao 已提交
6600
  int32_t count = 0;
6601 6602 6603
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6604

6605 6606
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6607
    count = 0;
6608

H
Haojun Liao 已提交
6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6620 6621
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6622
      STableQueryInfo *item = taosArrayGetP(pa, i);
6623

6624
      char *output = pQuery->sdata[0]->data + count * rsize;
6625
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6626

6627
      output = varDataVal(output);
H
Haojun Liao 已提交
6628
      STableId* id = TSDB_TABLEID(item->pTable);
6629

6630 6631 6632
      *(int16_t *)output = 0;
      output += sizeof(int16_t);

H
Haojun Liao 已提交
6633 6634
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6635

H
Haojun Liao 已提交
6636 6637
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6638

6639
      *(int32_t *)output = pQInfo->vgId;
6640
      output += sizeof(pQInfo->vgId);
6641

6642
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6643
        char* data = tsdbGetTableName(item->pTable);
6644
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6645
      } else {
6646 6647
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
6648
      }
6649

H
Haojun Liao 已提交
6650
      count += 1;
6651
    }
6652

6653
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6654

H
Haojun Liao 已提交
6655 6656 6657 6658
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
H
Haojun Liao 已提交
6659
    SET_STABLE_QUERY_OVER(pQInfo);
6660
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6661
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6662
    count = 0;
H
Haojun Liao 已提交
6663
    SSchema tbnameSchema = tGetTableNameColumnSchema();
6664

S
TD-1057  
Shengliang Guan 已提交
6665
    int32_t maxNumOfTables = (int32_t)pQuery->rec.capacity;
6666
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
S
TD-1057  
Shengliang Guan 已提交
6667
      maxNumOfTables = (int32_t)pQuery->limit.limit;
6668 6669
    }

6670
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
6671
      int32_t i = pQInfo->tableIndex++;
6672

6673 6674 6675 6676 6677 6678
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

6679
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6680
      STableQueryInfo* item = taosArrayGetP(pa, i);
6681

6682 6683
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
6684
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
6685

6686
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6687 6688 6689 6690 6691 6692 6693 6694
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
H
[td-90]  
Haojun Liao 已提交
6695
          
6696 6697
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6698

6699
        }
6700 6701

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
6702
      }
H
Haojun Liao 已提交
6703
      count += 1;
H
hjxilinx 已提交
6704
    }
6705

6706
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6707
  }
6708

H
Haojun Liao 已提交
6709
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6710
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6711 6712
}

6713 6714 6715 6716 6717 6718 6719
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

6720 6721 6722 6723 6724 6725 6726
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6727
  qDestroyQueryInfo(*handle);
6728 6729 6730
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
6731
  const int32_t REFRESH_HANDLE_INTERVAL = 60; // every 30 seconds, refresh handle pool
6732 6733 6734 6735

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

6736
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
6737

6738 6739 6740 6741 6742
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
6743 6744

  qDebug("vgId:%d, open querymgmt success", vgId);
6745
  return pQueryMgmt;
6746 6747
}

H
Haojun Liao 已提交
6748
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
6749 6750
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
6751 6752 6753
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
6754 6755 6756 6757 6758 6759 6760
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
6761
//  pthread_mutex_lock(&pQueryMgmt->lock);
6762
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
6763
//  pthread_mutex_unlock(&pQueryMgmt->lock);
6764

H
Haojun Liao 已提交
6765
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
6783
  taosTFree(pQueryMgmt);
6784

6785
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
6786 6787
}

6788
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6789 6790 6791 6792
  if (pMgmt == NULL) {
    return NULL;
  }

6793
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2 * 1000;
6794

6795 6796
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
6797
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
6798 6799 6800
    return NULL;
  }

H
Haojun Liao 已提交
6801
//  pthread_mutex_lock(&pQueryMgmt->lock);
6802
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
6803
//    pthread_mutex_unlock(&pQueryMgmt->lock);
6804
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
6805 6806
    return NULL;
  } else {
6807 6808 6809
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
6810
//    pthread_mutex_unlock(&pQueryMgmt->lock);
6811 6812 6813 6814 6815

    return handle;
  }
}

6816
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6817 6818 6819 6820 6821 6822
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6823
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6824 6825 6826 6827 6828 6829 6830
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
6831
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
6832 6833 6834 6835 6836
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
6837
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
6838 6839 6840
  return 0;
}

6841