qExecutor.c 226.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include "os.h"
H
Haojun Liao 已提交
16 17
#include "qFill.h"
#include "taosmsg.h"
18 19
#include "tcache.h"
#include "tglobal.h"
20

H
Haojun Liao 已提交
21
#include "exception.h"
22
#include "hash.h"
H
Haojun Liao 已提交
23 24 25 26
#include "qAst.h"
#include "qExecutor.h"
#include "qResultbuf.h"
#include "qUtil.h"
H
hjxilinx 已提交
27
#include "query.h"
S
slguan 已提交
28
#include "queryLog.h"
29 30
#include "tlosertree.h"
#include "tscompression.h"
31 32 33 34 35

/**
 * check if the primary column is load by default, otherwise, the program will
 * forced to load primary column explicitly.
 */
36 37
#define Q_STATUS_EQUAL(p, s)  (((p) & (s)) != 0)
#define TSDB_COL_IS_TAG(f)    (((f)&TSDB_COL_TAG) != 0)
38 39
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)

40
#define IS_MASTER_SCAN(runtime)        ((runtime)->scanFlag == MASTER_SCAN)
H
hjxilinx 已提交
41
#define IS_REVERSE_SCAN(runtime)       ((runtime)->scanFlag == REVERSE_SCAN)
42
#define SET_MASTER_SCAN_FLAG(runtime)  ((runtime)->scanFlag = MASTER_SCAN)
H
hjxilinx 已提交
43
#define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN)
44

H
Haojun Liao 已提交
45
#define GET_QINFO_ADDR(x) ((SQInfo *)((char *)(x)-offsetof(SQInfo, runtimeEnv)))
46

47
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step))
48
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
49

H
Haojun Liao 已提交
50 51
#define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0}

52
enum {
H
hjxilinx 已提交
53
  // when query starts to execute, this status will set
54 55
  QUERY_NOT_COMPLETED = 0x1u,

H
hjxilinx 已提交
56 57
  /* result output buffer is full, current query is paused.
   * this status is only exist in group-by clause and diff/add/division/multiply/ query.
58
   */
59 60
  QUERY_RESBUF_FULL = 0x2u,

H
hjxilinx 已提交
61 62 63
  /* query is over
   * 1. this status is used in one row result query process, e.g., count/sum/first/last/ avg...etc.
   * 2. when all data within queried time window, it is also denoted as query_completed
64
   */
65
  QUERY_COMPLETED = 0x4u,
66

H
hjxilinx 已提交
67 68
  /* when the result is not completed return to client, this status will be
   * usually used in case of interval query with interpolation option
69
   */
70
  QUERY_OVER = 0x8u,
71
};
72 73

enum {
74 75
  TS_JOIN_TS_EQUAL       = 0,
  TS_JOIN_TS_NOT_EQUALS  = 1,
76 77 78
  TS_JOIN_TAG_NOT_EQUALS = 2,
};

79
typedef struct {
80 81 82 83 84 85
  int32_t     status;       // query status
  TSKEY       lastKey;      // the lastKey value before query executed
  STimeWindow w;            // whole query time window
  STimeWindow curWindow;    // current query window
  int32_t     windowIndex;  // index of active time window result for interval query
  STSCursor   cur;
86 87
} SQueryStatusInfo;

H
Haojun Liao 已提交
88
#if 0
H
Haojun Liao 已提交
89
static UNUSED_FUNC void *u_malloc (size_t __size) {
H
Haojun Liao 已提交
90 91 92 93
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
H
Haojun Liao 已提交
94
    return malloc(__size);
H
Haojun Liao 已提交
95
  }
H
Haojun Liao 已提交
96 97
}

H
Haojun Liao 已提交
98 99 100 101 102 103 104 105 106 107
static UNUSED_FUNC void* u_calloc(size_t num, size_t __size) {
  uint32_t v = rand();
  if (v % 5 <= 1) {
    return NULL;
  } else {
    return calloc(num, __size);
  }
}

#define calloc  u_calloc
H
Haojun Liao 已提交
108
#define malloc  u_malloc
H
Haojun Liao 已提交
109
#endif
H
Haojun Liao 已提交
110

111
#define CLEAR_QUERY_STATUS(q, st)   ((q)->status &= (~(st)))
H
Haojun Liao 已提交
112 113 114
#define GET_NUM_OF_TABLEGROUP(q)    taosArrayGetSize((q)->tableqinfoGroupInfo.pGroupList)
#define GET_TABLEGROUP(q, _index)   ((SArray*) taosArrayGetP((q)->tableqinfoGroupInfo.pGroupList, (_index)))

115
static void setQueryStatus(SQuery *pQuery, int8_t status);
H
Haojun Liao 已提交
116
static void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv);
117

H
Haojun Liao 已提交
118
#define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->intervalTime > 0)
119

H
Haojun Liao 已提交
120 121 122 123 124 125 126 127
// previous time window may not be of the same size of pQuery->intervalTime
#define GET_NEXT_TIMEWINDOW(_q, tw)                                   \
  do {                                                                \
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR((_q)->order.order); \
    (tw)->skey += ((_q)->slidingTime * factor);                       \
    (tw)->ekey = (tw)->skey + ((_q)->intervalTime - 1);               \
  } while (0)

H
hjxilinx 已提交
128
// todo move to utility
129
static int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *group);
130

H
hjxilinx 已提交
131
static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
H
Haojun Liao 已提交
132
static void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult);
133 134
static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo);
static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId);
135

136 137 138
static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
                          SDataStatis *pStatis, void *param, int32_t colIndex);

139
static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
H
Haojun Liao 已提交
140
static void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo);
141 142
static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv);
static bool hasMainOutput(SQuery *pQuery);
H
hjxilinx 已提交
143
static void buildTagQueryResult(SQInfo *pQInfo);
144

145
static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo);
146
static int32_t flushFromResultBuf(SQInfo *pQInfo);
147

148
bool doFilterData(SQuery *pQuery, int32_t elemPos) {
149 150
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
151

152 153
    char *pElem = pFilterInfo->pData + pFilterInfo->info.bytes * elemPos;
    if (isNull(pElem, pFilterInfo->info.type)) {
154 155
      return false;
    }
156

157 158
    bool qualified = false;
    for (int32_t j = 0; j < pFilterInfo->numOfFilters; ++j) {
159
      SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j];
160

161 162 163 164 165
      if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
        qualified = true;
        break;
      }
    }
166

167 168 169 170
    if (!qualified) {
      return false;
    }
  }
171

172 173 174 175 176 177
  return true;
}

int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  bool    hasMainFunction = hasMainOutput(pQuery);
178

179
  int64_t maxOutput = 0;
180
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
181
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
182

183 184 185 186 187 188 189 190
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (hasMainFunction &&
        (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) {
      continue;
    }
191

192 193 194 195 196
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) {
      maxOutput = pResInfo->numOfRes;
    }
  }
197

198
  assert(maxOutput >= 0);
199 200 201
  return maxOutput;
}

202 203 204 205 206 207 208 209 210
/*
 * the value of number of result needs to be update due to offset value upated.
 */
void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    
H
Haojun Liao 已提交
211 212 213 214 215 216 217
    int16_t functionId = pRuntimeEnv->pCtx[j].functionId;
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ ||
        functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
    
    assert(pResInfo->numOfRes > numOfRes);
218 219 220 221
    pResInfo->numOfRes = numOfRes;
  }
}

222
static int32_t getGroupResultId(int32_t groupIndex) {
H
Haojun Liao 已提交
223
  int32_t base = 20000000;
224 225 226 227 228 229 230
  return base + (groupIndex * 10000);
}

bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) {
  if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
    return false;
  }
231

232
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
233
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
234
    if (pColIndex->flag == TSDB_COL_NORMAL) {
235
      //make sure the normal column locates at the second position if tbname exists in group by clause
236
      if (pGroupbyExpr->numOfGroupCols > 1) {
237
        assert(pColIndex->colIndex > 0);
238
      }
239

240 241 242
      return true;
    }
  }
243

244 245 246 247 248
  return false;
}

int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) {
  assert(pGroupbyExpr != NULL);
249

250 251
  int32_t colId = -2;
  int16_t type = TSDB_DATA_TYPE_NULL;
252

253
  for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
254
    SColIndex *pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, i);
255 256 257 258 259
    if (pColIndex->flag == TSDB_COL_NORMAL) {
      colId = pColIndex->colId;
      break;
    }
  }
260

261
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
262 263
    if (colId == pQuery->colList[i].colId) {
      type = pQuery->colList[i].type;
264 265 266
      break;
    }
  }
267

268 269 270 271 272 273
  return type;
}

bool isSelectivityWithTagsQuery(SQuery *pQuery) {
  bool    hasTags = false;
  int32_t numOfSelectivity = 0;
274

275
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
276
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
277 278 279 280
    if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) {
      hasTags = true;
      continue;
    }
281

282 283 284 285
    if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
      numOfSelectivity++;
    }
  }
286

287 288 289
  if (numOfSelectivity > 0 && hasTags) {
    return true;
  }
290

291 292 293
  return false;
}

294 295 296 297 298 299 300 301 302 303 304
bool isProjQuery(SQuery *pQuery) {
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t functId = pQuery->pSelectExpr[i].base.functionId;
    if (functId != TSDB_FUNC_PRJ && functId != TSDB_FUNC_TAGPRJ) {
      return false;
    }
  }

  return true;
}

305
bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].base.functionId == TSDB_FUNC_TS_COMP; }
306

307 308 309 310
static bool limitResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery* pQuery = pRuntimeEnv->pQuery;
  
311 312
  if ((pQuery->limit.limit > 0) && (pQuery->rec.total + pQuery->rec.rows > pQuery->limit.limit)) {
    pQuery->rec.rows = pQuery->limit.limit - pQuery->rec.total;
313
    
314
    qDebug("QInfo:%p discard remain data due to result limitation, limit:%"PRId64", current return:%" PRId64 ", total:%"PRId64,
315 316
        pQInfo, pQuery->limit.limit, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
    assert(pQuery->rec.rows >= 0);
317 318 319
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return true;
  }
320

321 322 323 324
  return false;
}

static bool isTopBottomQuery(SQuery *pQuery) {
325
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
326
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
327 328 329
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
330

331 332 333 334
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      return true;
    }
  }
335

336 337 338
  return false;
}

H
Haojun Liao 已提交
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
static bool hasTagValOutput(SQuery* pQuery) {
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    return true;
  } else {  // set tag value, by which the results are aggregated.
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
      SExprInfo *pLocalExprInfo = &pQuery->pSelectExpr[idx];

      // ts_comp column required the tag value for join filter
      if (TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
        return true;
      }
    }
  }

  return false;
}

357 358 359 360 361 362 363 364
/**
 * @param pQuery
 * @param col
 * @param pDataBlockInfo
 * @param pStatis
 * @param pColStatis
 * @return
 */
H
Haojun Liao 已提交
365
static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis **pColStatis) {
H
Haojun Liao 已提交
366
  if (pStatis != NULL && !TSDB_COL_IS_TAG(pColIndex->flag)) {
H
Haojun Liao 已提交
367 368
    *pColStatis = &pStatis[pColIndex->colIndex];
    assert((*pColStatis)->colId == pColIndex->colId);
H
hjxilinx 已提交
369 370
  } else {
    *pColStatis = NULL;
371
  }
372

H
Haojun Liao 已提交
373 374 375 376
  if (TSDB_COL_IS_TAG(pColIndex->flag) || pColIndex->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
    return false;
  }

377 378 379
  if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) {
    return false;
  }
380

381 382 383 384
  return true;
}

static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData,
385
                                             int16_t bytes, bool masterscan) {
386
  SQuery *pQuery = pRuntimeEnv->pQuery;
387

388
  int32_t *p1 = (int32_t *) taosHashGet(pWindowResInfo->hashList, pData, bytes);
389 390
  if (p1 != NULL) {
    pWindowResInfo->curIndex = *p1;
391
  } else {
H
Haojun Liao 已提交
392 393 394
    if (!masterscan) {  // not master scan, do not add new timewindow
      return NULL;
    }
395

H
Haojun Liao 已提交
396 397 398 399
    // more than the capacity, reallocate the resources
    if (pWindowResInfo->size >= pWindowResInfo->capacity) {
      int64_t newCap = pWindowResInfo->capacity * 1.5;
      char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult));
400 401
      if (t == NULL) {
        longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
402
      }
403

404 405 406 407 408
      pWindowResInfo->pResult = (SWindowResult *)t;

      int32_t inc = newCap - pWindowResInfo->capacity;
      memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * inc);

H
Haojun Liao 已提交
409 410 411 412 413
      for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) {
        createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, pRuntimeEnv->interBufSize);
      }

      pWindowResInfo->capacity = newCap;
414
    }
H
Haojun Liao 已提交
415 416 417 418

    // add a new result set for a new group
    pWindowResInfo->curIndex = pWindowResInfo->size++;
    taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t));
419
  }
420

421 422 423 424 425 426
  return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex);
}

// get the correct time window according to the handled timestamp
static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) {
  STimeWindow w = {0};
427

428 429 430 431 432 433 434
  if (pWindowResInfo->curIndex == -1) {  // the first window, from the previous stored value
    w.skey = pWindowResInfo->prevSKey;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  } else {
    int32_t slot = curTimeWindow(pWindowResInfo);
    w = getWindowResult(pWindowResInfo, slot)->window;
  }
435

436 437
  if (w.skey > ts || w.ekey < ts) {
    int64_t st = w.skey;
438

439 440 441
    if (st > ts) {
      st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
442

443 444 445 446
    int64_t et = st + pQuery->intervalTime - 1;
    if (et < ts) {
      st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    }
447

448 449 450
    w.skey = st;
    w.ekey = w.skey + pQuery->intervalTime - 1;
  }
451

452 453 454 455 456 457 458
  /*
   * query border check, skey should not be bounded by the query time range, since the value skey will
   * be used as the time window index value. So we only change ekey of time window accordingly.
   */
  if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) {
    w.ekey = pQuery->window.ekey;
  }
459

460
  assert(ts >= w.skey && ts <= w.ekey);
461

462 463 464 465 466 467 468 469
  return w;
}

static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid,
                                     int32_t numOfRowsPerPage) {
  if (pWindowRes->pos.pageId != -1) {
    return 0;
  }
470

471
  tFilePage *pData = NULL;
472

473 474 475
  // in the first scan, new space needed for results
  int32_t pageId = -1;
  SIDList list = getDataBufPagesIdList(pResultBuf, sid);
476

H
Haojun Liao 已提交
477
  if (taosArrayGetSize(list) == 0) {
478 479
    pData = getNewDataBuf(pResultBuf, sid, &pageId);
  } else {
H
Haojun Liao 已提交
480 481 482
    SPageInfo* pi = getLastPageInfo(list);
    pData = getResBufPage(pResultBuf, pi->pageId);
    pageId = pi->pageId;
483

484
    if (pData->num >= numOfRowsPerPage) {
H
Haojun Liao 已提交
485 486 487
      // release current page first, and prepare the next one
      releaseResBufPageInfo(pResultBuf, pi);

488 489
      pData = getNewDataBuf(pResultBuf, sid, &pageId);
      if (pData != NULL) {
490
        assert(pData->num == 0);  // number of elements must be 0 for new allocated buffer
491 492 493
      }
    }
  }
494

495 496 497
  if (pData == NULL) {
    return -1;
  }
498

499 500 501
  // set the number of rows in current disk page
  if (pWindowRes->pos.pageId == -1) {  // not allocated yet, allocate new buffer
    pWindowRes->pos.pageId = pageId;
502
    pWindowRes->pos.rowId = pData->num++;
H
Haojun Liao 已提交
503 504

    assert(pWindowRes->pos.pageId >= 0);
505
  }
506

507 508 509 510
  return 0;
}

static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid,
511
                                       STimeWindow *win, bool masterscan, bool* newWind) {
512 513
  assert(win->skey <= win->ekey);
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
514

515 516
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey,
      TSDB_KEYSIZE, masterscan);
517
  if (pWindowRes == NULL) {
518 519 520
    *newWind = false;

    return masterscan? -1:0;
521
  }
522

523
  *newWind = true;
H
Haojun Liao 已提交
524

525 526 527
  // not assign result buffer yet, add new result buffer
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage);
H
Haojun Liao 已提交
528
    if (ret != TSDB_CODE_SUCCESS) {
529 530 531
      return -1;
    }
  }
532

533 534
  // set time window for current result
  pWindowRes->window = *win;
535

H
Haojun Liao 已提交
536
  setWindowResOutputBufInitCtx(pRuntimeEnv, pWindowRes);
537 538 539 540 541 542 543 544
  return TSDB_CODE_SUCCESS;
}

static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) {
  assert(slot >= 0 && slot < pWindowResInfo->size);
  return &pWindowResInfo->pResult[slot].status;
}

H
Haojun Liao 已提交
545
static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos,
546 547
                                      int16_t order, int64_t *pData) {
  int32_t forwardStep = 0;
548

H
Haojun Liao 已提交
549 550 551 552
  if (order == TSDB_ORDER_ASC) {
    int32_t end = searchFn((char*) &pData[pos], numOfRows - pos, ekey, order);
    if (end >= 0) {
      forwardStep = end;
553

H
Haojun Liao 已提交
554 555 556 557 558 559 560 561 562 563 564 565
      if (pData[end + pos] == ekey) {
        forwardStep += 1;
      }
    }
  } else {
    int32_t end = searchFn((char *)pData, pos + 1, ekey, order);
    if (end >= 0) {
      forwardStep = pos - end;

      if (pData[end] == ekey) {
        forwardStep += 1;
      }
566 567
    }
  }
568

H
Haojun Liao 已提交
569
  assert(forwardStep > 0);
570 571 572 573 574 575
  return forwardStep;
}

/**
 * NOTE: the query status only set for the first scan of master scan.
 */
576
static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) {
577
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
578
  if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!QUERY_IS_INTERVAL_QUERY(pQuery))) {
579
    return pWindowResInfo->size;
580
  }
581

582
  // no qualified results exist, abort check
583 584
  int32_t numOfClosed = 0;
  
585
  if (pWindowResInfo->size == 0) {
586
    return pWindowResInfo->size;
587
  }
588

589
  // query completed
H
hjxilinx 已提交
590 591
  if ((lastKey >= pQuery->current->win.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (lastKey <= pQuery->current->win.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
592
    closeAllTimeWindow(pWindowResInfo);
593

594 595 596 597
    pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL);
  } else {  // set the current index to be the last unclosed window
    int32_t i = 0;
598
    int64_t skey = TSKEY_INITIAL_VAL;
599

600 601 602
    for (i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = &pWindowResInfo->pResult[i];
      if (pResult->status.closed) {
603
        numOfClosed += 1;
604 605
        continue;
      }
606

607 608 609 610 611 612 613 614
      if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) {
        closeTimeWindow(pWindowResInfo, i);
      } else {
        skey = pResult->window.skey;
        break;
      }
    }
615

616
    // all windows are closed, set the last one to be the skey
617
    if (skey == TSKEY_INITIAL_VAL) {
618 619 620 621 622
      assert(i == pWindowResInfo->size);
      pWindowResInfo->curIndex = pWindowResInfo->size - 1;
    } else {
      pWindowResInfo->curIndex = i;
    }
623

624
    pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey;
625

626 627
    // the number of completed slots are larger than the threshold, return current generated results to client.
    if (numOfClosed > pWindowResInfo->threshold) {
628
      qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return",
629 630
          GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold);
      
631
      setQueryStatus(pQuery, QUERY_RESBUF_FULL);
632
    } else {
633
      qDebug("QInfo:%p total result window:%d already closed:%d", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size,
634
             numOfClosed);
635 636
    }
  }
637 638 639 640 641 642 643
  
  // output has reached the limitation, set query completed
  if (pQuery->limit.limit > 0 && (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosed &&
      pRuntimeEnv->scanFlag == MASTER_SCAN) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
  
644
  assert(pWindowResInfo->prevSKey != TSKEY_INITIAL_VAL);
645
  return numOfClosed;
646 647 648
}

static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn,
H
hjxilinx 已提交
649
                                        int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) {
650
  assert(startPos >= 0 && startPos < pDataBlockInfo->rows);
651

652 653 654
  int32_t num = -1;
  int32_t order = pQuery->order.order;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(order);
655

H
hjxilinx 已提交
656 657
  STableQueryInfo* item = pQuery->current;
  
658 659
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (ekey < pDataBlockInfo->window.ekey) {
660
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
661 662
      if (updateLastKey) { // update the last key
        item->lastKey = pPrimaryColumn[startPos + (num - 1)] + step;
663 664
      }
    } else {
665
      num = pDataBlockInfo->rows - startPos;
666
      if (updateLastKey) {
H
hjxilinx 已提交
667
        item->lastKey = pDataBlockInfo->window.ekey + step;
668 669 670 671
      }
    }
  } else {  // desc
    if (ekey > pDataBlockInfo->window.skey) {
672
      num = getForwardStepsInBlock(pDataBlockInfo->rows, searchFn, ekey, startPos, order, pPrimaryColumn);
H
Haojun Liao 已提交
673 674
      if (updateLastKey) {  // update the last key
        item->lastKey = pPrimaryColumn[startPos - (num - 1)] + step;
675 676 677 678
      }
    } else {
      num = startPos + 1;
      if (updateLastKey) {
H
hjxilinx 已提交
679
        item->lastKey = pDataBlockInfo->window.skey + step;
680 681 682
      }
    }
  }
683

H
Haojun Liao 已提交
684
  assert(num > 0);
685 686 687 688
  return num;
}

static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
H
Haojun Liao 已提交
689
                                      int32_t offset, int32_t forwardStep, TSKEY *tsBuf, int32_t numOfTotal) {
690 691
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
692

693 694 695
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
696

697 698 699
      pCtx[k].nStartQueryTimestamp = pWin->skey;
      pCtx[k].size = forwardStep;
      pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? offset : offset - (forwardStep - 1);
700

701 702 703
      if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        pCtx[k].ptsList = &tsBuf[offset];
      }
704

705 706 707 708
      // not a whole block involved in query processing, statistics data can not be used
      if (forwardStep != numOfTotal) {
        pCtx[k].preAggVals.isSet = false;
      }
H
Haojun Liao 已提交
709

710 711 712
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
713 714 715 716 717 718 719 720
    }
  }
}

static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin,
                                    int32_t offset) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
721

722 723 724
  if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) {
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
      pCtx[k].nStartQueryTimestamp = pWin->skey;
725

726 727 728 729
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunctionF(&pCtx[k], offset);
      }
730 731 732 733
    }
  }
}

H
Haojun Liao 已提交
734 735
static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNext, SDataBlockInfo *pDataBlockInfo,
    TSKEY *primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) {
736
  SQuery *pQuery = pRuntimeEnv->pQuery;
737

H
Haojun Liao 已提交
738
  GET_NEXT_TIMEWINDOW(pQuery, pNext);
739

H
Haojun Liao 已提交
740
  // next time window is not in current block
H
Haojun Liao 已提交
741 742
  if ((pNext->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (pNext->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
743 744
    return -1;
  }
745

H
Haojun Liao 已提交
746 747
  TSKEY startKey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
748
    startKey = pNext->skey;
H
Haojun Liao 已提交
749 750
    if (startKey < pQuery->window.skey) {
      startKey = pQuery->window.skey;
751
    }
H
Haojun Liao 已提交
752
  } else {
H
Haojun Liao 已提交
753
    startKey = pNext->ekey;
H
Haojun Liao 已提交
754 755
    if (startKey > pQuery->window.skey) {
      startKey = pQuery->window.skey;
756
    }
H
Haojun Liao 已提交
757
  }
758

H
Haojun Liao 已提交
759 760 761 762 763 764 765 766
  int32_t startPos = 0;
  // tumbling time window query, a special case of sliding time window query
  if (pQuery->slidingTime == pQuery->intervalTime && prevPosition != -1) {
    int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
    startPos = prevPosition + factor;
  } else {
    startPos = searchFn((char *)primaryKeys, pDataBlockInfo->rows, startKey, pQuery->order.order);
  }
767

H
Haojun Liao 已提交
768 769 770 771
  /*
   * This time window does not cover any data, try next time window,
   * this case may happen when the time window is too small
   */
H
Haojun Liao 已提交
772
  if (QUERY_IS_ASC_QUERY(pQuery) && primaryKeys[startPos] > pNext->ekey) {
H
Haojun Liao 已提交
773
    TSKEY next = primaryKeys[startPos];
774

H
Haojun Liao 已提交
775 776 777
    pNext->ekey += ((next - pNext->ekey + pQuery->slidingTime - 1)/pQuery->slidingTime) * pQuery->slidingTime;
    pNext->skey = pNext->ekey - pQuery->intervalTime + 1;
  } else if ((!QUERY_IS_ASC_QUERY(pQuery)) && primaryKeys[startPos] < pNext->skey) {
H
Haojun Liao 已提交
778
    TSKEY next = primaryKeys[startPos];
779

H
Haojun Liao 已提交
780 781
    pNext->skey -= ((pNext->skey - next + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime;
    pNext->ekey = pNext->skey + pQuery->intervalTime - 1;
782
  }
783

H
Haojun Liao 已提交
784
  return startPos;
785 786
}

H
Haojun Liao 已提交
787
static FORCE_INLINE TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) {
788 789 790 791 792 793 794 795 796 797 798 799
  TSKEY ekey = -1;
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    ekey = pWindow->ekey;
    if (ekey > pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  } else {
    ekey = pWindow->skey;
    if (ekey < pQuery->window.ekey) {
      ekey = pQuery->window.ekey;
    }
  }
800

801 802 803
  return ekey;
}

H
hjxilinx 已提交
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818
//todo binary search
static void* getDataBlockImpl(SArray* pDataBlock, int32_t colId) {
  int32_t numOfCols = taosArrayGetSize(pDataBlock);
  
  for (int32_t i = 0; i < numOfCols; ++i) {
    SColumnInfoData *p = taosArrayGet(pDataBlock, i);
    if (colId == p->info.colId) {
      return p->pData;
    }
  }
  
  return NULL;
}

static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size,
819
                    SArray *pDataBlock) {
dengyihao's avatar
dengyihao 已提交
820 821 822
  if (pDataBlock == NULL) {
    return NULL;
  }
823

H
Haojun Liao 已提交
824
  char *dataBlock = NULL;
H
Haojun Liao 已提交
825
  SQuery *pQuery = pRuntimeEnv->pQuery;
826
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
827

828
  int32_t functionId = pQuery->pSelectExpr[col].base.functionId;
829
  if (functionId == TSDB_FUNC_ARITHM) {
830
    sas->pArithExpr = &pQuery->pSelectExpr[col];
831

832 833 834 835 836 837
    // set the start offset to be the lowest start position, no matter asc/desc query order
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      pCtx->startOffset = pQuery->pos;
    } else {
      pCtx->startOffset = pQuery->pos - (size - 1);
    }
838

839 840 841 842
    sas->offset  = 0;
    sas->colList = pQuery->colList;
    sas->numOfCols = pQuery->numOfCols;
    sas->data    = calloc(pQuery->numOfCols, POINTER_BYTES);
843

H
Haojun Liao 已提交
844
    if (sas->data == NULL) {
H
Haojun Liao 已提交
845
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
846 847 848
      longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
    }

849
    // here the pQuery->colList and sas->colList are identical
H
Haojun Liao 已提交
850
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
851
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
852
      SColumnInfo *pColMsg = &pQuery->colList[i];
853

854 855 856 857 858 859 860 861
      dataBlock = NULL;
      for (int32_t k = 0; k < numOfCols; ++k) {  //todo refactor
        SColumnInfoData *p = taosArrayGet(pDataBlock, k);
        if (pColMsg->colId == p->info.colId) {
          dataBlock = p->pData;
          break;
        }
      }
862

863
      assert(dataBlock != NULL);
H
Haojun Liao 已提交
864
      sas->data[i] = dataBlock/* + pQuery->colList[i].bytes*/;  // start from the offset
865
    }
866

867
  } else {  // other type of query function
868
    SColIndex *pCol = &pQuery->pSelectExpr[col].base.colInfo;
H
Haojun Liao 已提交
869
    if (TSDB_COL_IS_TAG(pCol->flag)) {
870 871
      dataBlock = NULL;
    } else {
H
Haojun Liao 已提交
872 873 874 875 876
      SColIndex* pColIndex = &pQuery->pSelectExpr[col].base.colInfo;
      SColumnInfoData *p = taosArrayGet(pDataBlock, pColIndex->colIndex);
      assert(p->info.colId == pColIndex->colId);

      dataBlock = p->pData;
877 878
    }
  }
879

880 881 882 883
  return dataBlock;
}

/**
H
Haojun Liao 已提交
884
 * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions
885 886
 * @param pRuntimeEnv
 * @param forwardStep
887
 * @param tsCols
888 889 890 891 892
 * @param pFields
 * @param isDiskFileBlock
 * @return                  the incremental number of output value, so it maybe 0 for fixed number of query,
 *                          such as count/min/max etc.
 */
893
static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis,
894 895
                                       SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo,
                                       __block_search_fn_t searchFn, SArray *pDataBlock) {
896
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
897 898
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);

899 900
  SQuery *pQuery = pRuntimeEnv->pQuery;
  TSKEY  *tsCols = NULL;
901
  if (pDataBlock != NULL) {
902
    SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0);
903
    tsCols = (TSKEY *)(pColInfo->pData);
904
  }
905

906
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
907
  if (sasArray == NULL) {
H
Haojun Liao 已提交
908
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
909 910
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
911

912
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
913
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
914
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
915
  }
916

917
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
918 919
  if (QUERY_IS_INTERVAL_QUERY(pQuery)/* && tsCols != NULL*/) {
    TSKEY ts = TSKEY_INITIAL_VAL;
920

H
Haojun Liao 已提交
921 922 923 924 925 926 927 928
    if (tsCols == NULL) {
      ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey;
    } else {
      int32_t offset = GET_COL_DATA_POS(pQuery, 0, step);
      ts = tsCols[offset];
    }

    bool        hasTimeWindow = false;
929
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
H
Haojun Liao 已提交
930 931
    if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow) !=
        TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
932
      taosTFree(sasArray);
H
hjxilinx 已提交
933
      return;
934
    }
935

H
Haojun Liao 已提交
936 937 938
    int32_t forwardStep = 0;
    int32_t startPos = pQuery->pos;

939
    if (hasTimeWindow) {
H
Haojun Liao 已提交
940
      TSKEY ekey = reviseWindowEkey(pQuery, &win);
H
Haojun Liao 已提交
941
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true);
942

943
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
H
Haojun Liao 已提交
944
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
945
    }
946

947 948
    int32_t     index = pWindowResInfo->curIndex;
    STimeWindow nextWin = win;
949

950
    while (1) {
H
Haojun Liao 已提交
951 952
      int32_t prevEndPos = (forwardStep - 1) * step + startPos;
      startPos = getNextQualifiedWindow(pRuntimeEnv, &nextWin, pDataBlockInfo, tsCols, searchFn, prevEndPos);
953 954 955
      if (startPos < 0) {
        break;
      }
956

957
      // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
958
      hasTimeWindow = false;
H
Haojun Liao 已提交
959 960
      if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan,
                                  &hasTimeWindow) != TSDB_CODE_SUCCESS) {
961 962
        break;
      }
963

964 965 966 967 968
      if (!hasTimeWindow) {
        continue;
      }

      TSKEY ekey = reviseWindowEkey(pQuery, &nextWin);
H
Haojun Liao 已提交
969
      forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true);
970

H
Haojun Liao 已提交
971
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
972
      doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows);
973
    }
974

975 976 977 978 979 980 981
    pWindowResInfo->curIndex = index;
  } else {
    /*
     * the sqlfunctionCtx parameters should be set done before all functions are invoked,
     * since the selectivity + tag_prj query needs all parameters been set done.
     * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY
     */
982
    for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
983
      int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
984 985 986 987 988
      if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
        aAggs[functionId].xFunction(&pCtx[k]);
      }
    }
  }
989

990 991 992 993
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
994

S
Shengliang Guan 已提交
995
    taosTFree(sasArray[i].data);
996
  }
997

S
Shengliang Guan 已提交
998
  taosTFree(sasArray);
999 1000 1001 1002 1003 1004
}

static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) {
  if (isNull(pData, type)) {  // ignore the null value
    return -1;
  }
1005

1006
  int32_t GROUPRESULTID = 1;
1007

1008
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
1009

1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
  int64_t v = -1;
  // not assign result buffer yet, add new result buffer
  switch(type) {
    case TSDB_DATA_TYPE_BOOL:
    case TSDB_DATA_TYPE_TINYINT:  v = GET_INT8_VAL(pData);  break;
    case TSDB_DATA_TYPE_SMALLINT: v = GET_INT16_VAL(pData); break;
    case TSDB_DATA_TYPE_INT:      v = GET_INT32_VAL(pData); break;
    case TSDB_DATA_TYPE_BIGINT:   v = GET_INT64_VAL(pData); break;
  }

1020
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes, true);
1021 1022 1023
  if (pWindowRes == NULL) {
    return -1;
  }
1024

1025 1026 1027
  pWindowRes->window.skey = v;
  pWindowRes->window.ekey = v;

1028 1029 1030 1031 1032 1033
  if (pWindowRes->pos.pageId == -1) {
    int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage);
    if (ret != 0) {
      return -1;
    }
  }
1034

1035 1036 1037 1038 1039
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
  return TSDB_CODE_SUCCESS;
}

1040
static char *getGroupbyColumnData(SQuery *pQuery, int16_t *type, int16_t *bytes, SArray* pDataBlock) {
1041
  SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr;
1042

1043
  for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) {
1044 1045
    SColIndex* pColIndex = taosArrayGet(pGroupbyExpr->columnInfo, k);
    if (pColIndex->flag == TSDB_COL_TAG) {
1046 1047
      continue;
    }
1048

1049
    int16_t colIndex = -1;
1050
    int32_t colId = pColIndex->colId;
1051

1052
    for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
1053
      if (pQuery->colList[i].colId == colId) {
1054 1055 1056 1057
        colIndex = i;
        break;
      }
    }
1058

1059
    assert(colIndex >= 0 && colIndex < pQuery->numOfCols);
1060

1061 1062
    *type = pQuery->colList[colIndex].type;
    *bytes = pQuery->colList[colIndex].bytes;
1063
    /*
1064 1065 1066
     *  the colIndex is acquired from the first tables of all qualified tables in this vnode during query prepare
     * stage, the remain tables may not have the required column in cache actually. So, the validation of required
     * column in cache with the corresponding schema is reinforced.
1067 1068
     */
    int32_t numOfCols = taosArrayGetSize(pDataBlock);
1069

1070 1071 1072 1073 1074 1075
    for (int32_t i = 0; i < numOfCols; ++i) {
      SColumnInfoData *p = taosArrayGet(pDataBlock, i);
      if (pColIndex->colId == p->info.colId) {
        return p->pData;
      }
    }
1076
  }
1077

1078
  return NULL;
1079 1080 1081 1082
}

static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
1083

1084 1085
  STSElem         elem = tsBufGetElem(pRuntimeEnv->pTSBuf);
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1086

1087 1088 1089 1090
  // compare tag first
  if (pCtx[0].tag.i64Key != elem.tag) {
    return TS_JOIN_TAG_NOT_EQUALS;
  }
1091

1092 1093 1094
  TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset);

#if defined(_DEBUG_VIEW)
1095 1096
  printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 ", tag:%"PRIu64", query order:%d, ts order:%d, traverse:%d, index:%d\n",
         elem.ts, key, elem.tag, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder,
1097 1098
         pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex);
#endif
1099

1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    if (key < elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key > elem.ts) {
      assert(false);
    }
  } else {
    if (key > elem.ts) {
      return TS_JOIN_TS_NOT_EQUALS;
    } else if (key < elem.ts) {
      assert(false);
    }
  }
1113

1114 1115 1116 1117 1118
  return TS_JOIN_TS_EQUAL;
}

static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) {
  SResultInfo *pResInfo = GET_RES_INFO(pCtx);
H
hjxilinx 已提交
1119
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
1120 1121 1122 1123 1124

  // in case of timestamp column, always generated results.
  if (functionId == TSDB_FUNC_TS) {
    return true;
  }
H
hjxilinx 已提交
1125
  
1126 1127 1128
  if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) {
    return false;
  }
1129

1130
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST) {
H
hjxilinx 已提交
1131 1132
    return QUERY_IS_ASC_QUERY(pQuery);
  }
1133 1134 1135 1136 1137 1138

  // todo add comments
  if ((functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_LAST)) {
    return pCtx->param[0].i64Key == pQuery->order.order;
  }

1139
  // in the supplementary scan, only the following functions need to be executed
H
Haojun Liao 已提交
1140
  if (IS_REVERSE_SCAN(pRuntimeEnv)) {
1141 1142
    return false;
  }
1143

1144 1145 1146
  return true;
}

1147 1148
static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo,
    SWindowResInfo *pWindowResInfo, SArray *pDataBlock) {
1149
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
1150
  bool masterScan = IS_MASTER_SCAN(pRuntimeEnv);
1151

1152
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1153
  STableQueryInfo* item = pQuery->current;
H
Haojun Liao 已提交
1154 1155 1156 1157

  SColumnInfoData* pColumnInfoData = (SColumnInfoData *)taosArrayGet(pDataBlock, 0);

  TSKEY  *tsCols = (pColumnInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP)? (TSKEY*) pColumnInfoData->pData:NULL;
H
Haojun Liao 已提交
1158 1159
  bool    groupbyColumnValue = pRuntimeEnv->groupbyNormalCol;

1160
  SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutput, sizeof(SArithmeticSupport));
H
Haojun Liao 已提交
1161
  if (sasArray == NULL) {
H
Haojun Liao 已提交
1162
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
H
Haojun Liao 已提交
1163 1164
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }
1165

1166 1167
  int16_t type = 0;
  int16_t bytes = 0;
1168

1169
  char *groupbyColumnData = NULL;
H
Haojun Liao 已提交
1170
  if (groupbyColumnValue) {
1171
    groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock);
1172
  }
1173

1174
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
H
hjxilinx 已提交
1175
    char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock);
1176
    setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k);
1177
  }
1178

1179 1180
  // set the input column data
  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
1181
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
H
hjxilinx 已提交
1182 1183
    pFilterInfo->pData = getDataBlockImpl(pDataBlock, pFilterInfo->info.colId);
    assert(pFilterInfo->pData != NULL);
1184
  }
1185

1186
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1187

1188 1189 1190
  // from top to bottom in desc
  // from bottom to top in asc order
  if (pRuntimeEnv->pTSBuf != NULL) {
H
Haojun Liao 已提交
1191
    SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv);
1192
    qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows,
1193 1194
           pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order);
  }
1195

1196
  int32_t j = 0;
H
hjxilinx 已提交
1197
  int32_t offset = -1;
1198

1199
  for (j = 0; j < pDataBlockInfo->rows; ++j) {
H
hjxilinx 已提交
1200
    offset = GET_COL_DATA_POS(pQuery, j, step);
1201

1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
    if (pRuntimeEnv->pTSBuf != NULL) {
      int32_t r = doTSJoinFilter(pRuntimeEnv, offset);
      if (r == TS_JOIN_TAG_NOT_EQUALS) {
        break;
      } else if (r == TS_JOIN_TS_NOT_EQUALS) {
        continue;
      } else {
        assert(r == TS_JOIN_TS_EQUAL);
      }
    }
1212

1213
    if (pQuery->numOfFilterCols > 0 && (!doFilterData(pQuery, offset))) {
1214 1215
      continue;
    }
1216

1217
    // interval window query, decide the time window according to the primary timestamp
H
Haojun Liao 已提交
1218
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1219
      int64_t     ts = tsCols[offset];
1220
      STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery);
1221

1222 1223
      bool hasTimeWindow = false;
      int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &win, masterScan, &hasTimeWindow);
1224 1225 1226
      if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
        continue;
      }
1227

1228 1229 1230 1231
      if (!hasTimeWindow) {
        continue;
      }

1232 1233
      SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
      doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset);
1234

1235 1236
      STimeWindow nextWin = win;
      int32_t     index = pWindowResInfo->curIndex;
1237

1238
      while (1) {
H
Haojun Liao 已提交
1239
        GET_NEXT_TIMEWINDOW(pQuery, &nextWin);
1240
        if ((nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
H
Haojun Liao 已提交
1241
            (nextWin.skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
1242 1243
          break;
        }
1244

1245 1246 1247
        if (ts < nextWin.skey || ts > nextWin.ekey) {
          break;
        }
1248

1249
        // null data, failed to allocate more memory buffer
H
Haojun Liao 已提交
1250
        hasTimeWindow = false;
1251
        if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo->tid, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) {
1252 1253
          break;
        }
1254

1255 1256 1257 1258
        if (hasTimeWindow) {
          pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo));
          doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset);
        }
1259
      }
1260

1261 1262 1263
      pWindowResInfo->curIndex = index;
    } else {  // other queries
      // decide which group this rows belongs to according to current state value
H
Haojun Liao 已提交
1264
      if (groupbyColumnValue) {
H
hjxilinx 已提交
1265
        char *val = groupbyColumnData + bytes * offset;
1266

H
hjxilinx 已提交
1267
        int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, val, type, bytes);
1268 1269 1270 1271
        if (ret != TSDB_CODE_SUCCESS) {  // null data, too many state code
          continue;
        }
      }
1272

1273
      for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
1274
        int32_t functionId = pQuery->pSelectExpr[k].base.functionId;
1275 1276 1277 1278 1279
        if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) {
          aAggs[functionId].xFunctionF(&pCtx[k], offset);
        }
      }
    }
1280

1281 1282 1283
    if (pRuntimeEnv->pTSBuf != NULL) {
      // if timestamp filter list is empty, quit current query
      if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) {
H
hjxilinx 已提交
1284
        setQueryStatus(pQuery, QUERY_COMPLETED);
1285 1286 1287 1288
        break;
      }
    }
  }
H
Haojun Liao 已提交
1289 1290 1291 1292 1293 1294 1295 1296

  assert(offset >= 0);
  if (tsCols != NULL) {
    item->lastKey = tsCols[offset] + step;
  } else {
    item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step;
  }

1297 1298 1299 1300 1301
  // todo refactor: extract method
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) {
      continue;
    }
1302

S
Shengliang Guan 已提交
1303
    taosTFree(sasArray[i].data);
1304
  }
1305

1306 1307 1308 1309
  free(sasArray);
}

static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo,
H
hjxilinx 已提交
1310
                                          SDataStatis *pStatis, __block_search_fn_t searchFn, SArray *pDataBlock) {
H
hjxilinx 已提交
1311
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1312 1313 1314
  
  STableQueryInfo* pTableQInfo = pQuery->current;
  SWindowResInfo*  pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
1315
  
H
Haojun Liao 已提交
1316
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
1317
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
1318
  } else {
1319
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
1320
  }
1321

1322
  // update the lastkey of current table
1323
  TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey;
H
hjxilinx 已提交
1324
  pTableQInfo->lastKey = lastKey + GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
1325

1326
  // interval query with limit applied
1327
  int32_t numOfRes = 0;
H
Haojun Liao 已提交
1328
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1329 1330 1331
    numOfRes = doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo);
  } else {
    numOfRes = getNumOfResult(pRuntimeEnv);
1332

1333 1334 1335 1336
    // update the number of output result
    if (numOfRes > 0 && pQuery->checkBuffer == 1) {
      assert(numOfRes >= pQuery->rec.rows);
      pQuery->rec.rows = numOfRes;
1337

1338 1339 1340
      if (numOfRes >= pQuery->rec.threshold) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
      }
1341

1342 1343 1344
      if ((pQuery->limit.limit >= 0) && (pQuery->limit.limit + pQuery->limit.offset) <= numOfRes) {
        setQueryStatus(pQuery, QUERY_COMPLETED);
      }
H
Haojun Liao 已提交
1345
    }
1346
  }
1347

1348
  return numOfRes;
1349 1350
}

H
Haojun Liao 已提交
1351
void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo,
1352 1353 1354 1355 1356 1357
                   SDataStatis *pStatis, void *param, int32_t colIndex) {
  
  int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId;
  int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId;
  
  SDataStatis *tpField = NULL;
H
Haojun Liao 已提交
1358
  pCtx->hasNull = hasNullValue(&pQuery->pSelectExpr[colIndex].base.colInfo, pStatis, &tpField);
1359
  pCtx->aInputElemBuf = inputData;
1360

1361
  if (tpField != NULL) {
H
Haojun Liao 已提交
1362
    pCtx->preAggVals.isSet  = true;
1363 1364
    pCtx->preAggVals.statis = *tpField;
    assert(pCtx->preAggVals.statis.numOfNull <= pBlockInfo->rows);
1365 1366 1367
  } else {
    pCtx->preAggVals.isSet = false;
  }
1368

H
Haojun Liao 已提交
1369 1370
  pCtx->preAggVals.dataBlockLoaded = (inputData != NULL);

H
Haojun Liao 已提交
1371 1372 1373
  // limit/offset query will affect this value
  pCtx->startOffset = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos:0;
  pCtx->size = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->rows - pQuery->pos : pQuery->pos + 1;
1374

1375 1376
  uint32_t status = aAggs[functionId].nStatus;
  if (((status & (TSDB_FUNCSTATE_SELECTIVITY | TSDB_FUNCSTATE_NEED_TS)) != 0) && (tsCol != NULL)) {
H
Haojun Liao 已提交
1377
    pCtx->ptsList = tsCol;
1378
  }
1379

1380 1381 1382 1383 1384
  if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) {
    // last_dist or first_dist function
    // store the first&last timestamp into the intermediate buffer [1], the true
    // value may be null but timestamp will never be null
  } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA ||
1385
             functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) {
1386
    /*
H
Haojun Liao 已提交
1387
     * least squares function needs two columns of input, currently, the x value of linear equation is set to
1388 1389 1390 1391 1392 1393 1394 1395 1396 1397
     * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer
     *
     * top/bottom function needs timestamp to indicate when the
     * top/bottom values emerge, so does diff function
     */
    if (functionId == TSDB_FUNC_TWA) {
      STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf;
      pTWAInfo->SKey = pQuery->window.skey;
      pTWAInfo->EKey = pQuery->window.ekey;
    }
1398

1399 1400
  } else if (functionId == TSDB_FUNC_ARITHM) {
    pCtx->param[1].pz = param;
H
Haojun Liao 已提交
1401 1402 1403 1404 1405 1406
  } else if (functionId == TSDB_FUNC_SPREAD) {  // set the statistics data for primary time stamp column
    if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
      pCtx->preAggVals.isSet  = true;
      pCtx->preAggVals.statis.min = pBlockInfo->window.skey;
      pCtx->preAggVals.statis.max = pBlockInfo->window.ekey;
    }
1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419
  } else if (functionId == TSDB_FUNC_INTERP) {
    SInterpInfoDetail *pInterpInfo = GET_RES_INFO(pCtx)->interResultBuf;
    pInterpInfo->type = pQuery->fillType;
    pInterpInfo->ts = pQuery->window.skey;
    pInterpInfo->primaryCol = (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX);
  
    if (pQuery->fillVal != NULL) {
      if (isNull((const char*) &pQuery->fillVal[colIndex], pCtx->inputType)) {
        pCtx->param[1].nType = TSDB_DATA_TYPE_NULL;
      } else { // todo refactor, tVariantCreateFromBinary should handle the NULL value
        tVariantCreateFromBinary(&pCtx->param[1], (char*) &pQuery->fillVal[colIndex], pCtx->inputBytes, pCtx->inputType);
      }
    }
1420
  }
1421

1422 1423 1424 1425 1426 1427
#if defined(_DEBUG_VIEW)
  //  int64_t *tsList = (int64_t *)primaryColumnData;
//  int64_t  s = tsList[0];
//  int64_t  e = tsList[size - 1];

//    if (IS_DATA_BLOCK_LOADED(blockStatus)) {
1428
//        qDebug("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d,
1429 1430 1431
//        functId:%d", GET_QINFO_ADDR(pQuery),
//               s, e, startOffset, size, blockStatus, functionId);
//    } else {
1432
//        qDebug("QInfo:%p block not loaded, bstatus:%d",
1433 1434 1435 1436 1437 1438
//        GET_QINFO_ADDR(pQuery), blockStatus);
//    }
#endif
}

// set the output buffer for the selectivity + tag query
H
Haojun Liao 已提交
1439 1440 1441
static void setCtxTagColumnInfo(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

1442
  if (isSelectivityWithTagsQuery(pQuery)) {
1443
    int32_t num = 0;
1444
    int16_t tagLen = 0;
1445 1446
    
    SQLFunctionCtx *p = NULL;
1447
    SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutput, POINTER_BYTES);
H
Haojun Liao 已提交
1448

1449
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1450
      SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1451
      
1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464
      if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        tagLen += pCtx[i].outputBytes;
        pTagCtx[num++] = &pCtx[i];
      } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) {
        p = &pCtx[i];
      } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) {
        // tag function may be the group by tag column
        // ts may be the required primary timestamp column
        continue;
      } else {
        // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ
      }
    }
dengyihao's avatar
dengyihao 已提交
1465 1466 1467 1468 1469
    if (p != NULL) {
      p->tagInfo.pTagCtxList = pTagCtx;
      p->tagInfo.numOfTagCols = num;
      p->tagInfo.tagsLen = tagLen;
    } else {
S
Shengliang Guan 已提交
1470
      taosTFree(pTagCtx); 
dengyihao's avatar
dengyihao 已提交
1471
    }
1472 1473 1474
  }
}

H
Haojun Liao 已提交
1475 1476
static FORCE_INLINE void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery, char* buf) {
  char* p = buf;
1477
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1478 1479 1480 1481
    int32_t size = pQuery->pSelectExpr[i].interBytes;
    setResultInfoBuf(&pResultInfo[i], size, isStableQuery, p);

    p += size;
1482 1483 1484
  }
}

1485
static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order) {
1486
  qDebug("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv));
1487 1488
  SQuery *pQuery = pRuntimeEnv->pQuery;

H
Haojun Liao 已提交
1489 1490 1491
  size_t size = pRuntimeEnv->interBufSize + pQuery->numOfOutput * sizeof(SResultInfo);

  pRuntimeEnv->resultInfo = calloc(1, size);
1492
  pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutput, sizeof(SQLFunctionCtx));
1493

1494
  if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) {
1495
    goto _clean;
1496
  }
1497

1498
  pRuntimeEnv->offset[0] = 0;
1499
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1500
    SSqlFuncMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].base;
1501

1502
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1503
    SColIndex* pIndex = &pSqlFuncMsg->colInfo;
1504

1505 1506
    int32_t index = pSqlFuncMsg->colInfo.colIndex;
    if (TSDB_COL_IS_TAG(pIndex->flag)) {
1507
      if (pIndex->colId == TSDB_TBNAME_COLUMN_INDEX) {  // todo refactor
H
Haojun Liao 已提交
1508 1509 1510 1511
        SSchema s = tGetTableNameColumnSchema();

        pCtx->inputBytes = s.bytes;
        pCtx->inputType = s.type;
1512 1513 1514 1515
      } else {
        pCtx->inputBytes = pQuery->tagColList[index].bytes;
        pCtx->inputType = pQuery->tagColList[index].type;
      }
1516
      
1517 1518 1519 1520
    } else {
      pCtx->inputBytes = pQuery->colList[index].bytes;
      pCtx->inputType = pQuery->colList[index].type;
    }
1521
  
1522
    assert(isValidDataType(pCtx->inputType));
1523
    pCtx->ptsOutputBuf = NULL;
1524

1525 1526
    pCtx->outputBytes = pQuery->pSelectExpr[i].bytes;
    pCtx->outputType = pQuery->pSelectExpr[i].type;
1527

1528 1529
    pCtx->order = pQuery->order.order;
    pCtx->functionId = pSqlFuncMsg->functionId;
1530

1531 1532 1533 1534 1535 1536 1537 1538 1539 1540
    pCtx->numOfParams = pSqlFuncMsg->numOfParams;
    for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
      int16_t type = pSqlFuncMsg->arg[j].argType;
      int16_t bytes = pSqlFuncMsg->arg[j].argBytes;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type);
      } else {
        tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type);
      }
    }
1541

1542 1543
    // set the order information for top/bottom query
    int32_t functionId = pCtx->functionId;
1544

1545
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
1546
      int32_t f = pQuery->pSelectExpr[0].base.functionId;
1547
      assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY);
1548

1549 1550 1551 1552
      pCtx->param[2].i64Key = order;
      pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT;
      pCtx->param[3].i64Key = functionId;
      pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT;
1553

1554 1555
      pCtx->param[1].i64Key = pQuery->order.orderColId;
    }
1556

1557 1558 1559 1560
    if (i > 0) {
      pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes;
    }
  }
1561

H
Haojun Liao 已提交
1562
  char* buf = (char*) pRuntimeEnv->resultInfo + sizeof(SResultInfo) * pQuery->numOfOutput;
H
Haojun Liao 已提交
1563

1564
  // set the intermediate result output buffer
H
Haojun Liao 已提交
1565
  setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
1566

1567
  // if it is group by normal column, do not set output buffer, the output buffer is pResult
1568
  if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery) {
1569 1570
    resetCtxOutputBuf(pRuntimeEnv);
  }
1571

H
Haojun Liao 已提交
1572
  setCtxTagColumnInfo(pRuntimeEnv, pRuntimeEnv->pCtx);
1573

H
Haojun Liao 已提交
1574
  qDebug("QInfo:%p init runtime completed", GET_QINFO_ADDR(pRuntimeEnv));
1575
  return TSDB_CODE_SUCCESS;
1576

1577
_clean:
S
Shengliang Guan 已提交
1578 1579
  taosTFree(pRuntimeEnv->resultInfo);
  taosTFree(pRuntimeEnv->pCtx);
1580

1581
  return TSDB_CODE_QRY_OUT_OF_MEMORY;
1582 1583 1584 1585 1586 1587
}

static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) {
  if (pRuntimeEnv->pQuery == NULL) {
    return;
  }
1588

1589
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
1590
  SQInfo* pQInfo = (SQInfo*) GET_QINFO_ADDR(pRuntimeEnv);
1591

1592
  qDebug("QInfo:%p teardown runtime env", pQInfo);
H
Haojun Liao 已提交
1593
  cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo);
1594

1595
  if (pRuntimeEnv->pCtx != NULL) {
1596
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1597
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
1598

1599 1600 1601
      for (int32_t j = 0; j < pCtx->numOfParams; ++j) {
        tVariantDestroy(&pCtx->param[j]);
      }
1602

1603
      tVariantDestroy(&pCtx->tag);
S
Shengliang Guan 已提交
1604
      taosTFree(pCtx->tagInfo.pTagCtxList);
1605
    }
1606

S
Shengliang Guan 已提交
1607 1608
    taosTFree(pRuntimeEnv->resultInfo);
    taosTFree(pRuntimeEnv->pCtx);
1609
  }
1610

H
Haojun Liao 已提交
1611
  pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo);
1612

H
Haojun Liao 已提交
1613
  destroyResultBuf(pRuntimeEnv->pResultBuf);
1614
  tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
1615
  tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
1616

H
Haojun Liao 已提交
1617
  pRuntimeEnv->pTSBuf = tsBufDestroy(pRuntimeEnv->pTSBuf);
1618 1619
}

H
Haojun Liao 已提交
1620
#define IS_QUERY_KILLED(_q) ((_q)->code == TSDB_CODE_TSC_QUERY_CANCELLED)
1621

H
Haojun Liao 已提交
1622
static void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;}
H
hjxilinx 已提交
1623

H
Haojun Liao 已提交
1624 1625 1626
static bool isFixedOutputQuery(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
1627 1628
    return false;
  }
1629

1630
  // Note:top/bottom query is fixed output query
H
Haojun Liao 已提交
1631
  if (pRuntimeEnv->topBotQuery || pRuntimeEnv->groupbyNormalCol) {
1632 1633
    return true;
  }
1634

1635
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1636
    SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1637

1638 1639
    // ignore the ts_comp function
    if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 &&
1640
        pExprMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
1641 1642
      continue;
    }
1643

1644 1645 1646
    if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
      continue;
    }
1647

1648 1649 1650 1651
    if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) {
      return true;
    }
  }
1652

1653 1654 1655
  return false;
}

1656
// todo refactor with isLastRowQuery
H
hjxilinx 已提交
1657
static bool isPointInterpoQuery(SQuery *pQuery) {
1658
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1659
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1660
    if (functionID == TSDB_FUNC_INTERP) {
1661 1662 1663
      return true;
    }
  }
1664

1665 1666 1667 1668
  return false;
}

// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION
H
hjxilinx 已提交
1669
static bool isSumAvgRateQuery(SQuery *pQuery) {
1670
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1671
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1672 1673 1674
    if (functionId == TSDB_FUNC_TS) {
      continue;
    }
1675

1676 1677 1678 1679 1680
    if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE ||
        functionId == TSDB_FUNC_AVG_IRATE) {
      return true;
    }
  }
1681

1682 1683 1684
  return false;
}

H
hjxilinx 已提交
1685
static bool isFirstLastRowQuery(SQuery *pQuery) {
1686
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1687
    int32_t functionID = pQuery->pSelectExpr[i].base.functionId;
1688 1689 1690 1691
    if (functionID == TSDB_FUNC_LAST_ROW) {
      return true;
    }
  }
1692

1693 1694 1695
  return false;
}

H
hjxilinx 已提交
1696
static bool needReverseScan(SQuery *pQuery) {
1697
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1698
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1699 1700 1701
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) {
      continue;
    }
1702

1703
    if ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery)) {
1704 1705
      return true;
    }
1706 1707 1708 1709 1710

    if (functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) {
      int32_t order = pQuery->pSelectExpr[i].base.arg->argValue.i64;
      return order != pQuery->order.order;
    }
1711
  }
1712

1713 1714
  return false;
}
H
hjxilinx 已提交
1715 1716 1717

static bool onlyQueryTags(SQuery* pQuery) {
  for(int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
1718 1719 1720 1721 1722
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];

    int32_t functionId = pExprInfo->base.functionId;
    if (functionId != TSDB_FUNC_TAGPRJ && functionId != TSDB_FUNC_TID_TAG &&
        (!(functionId == TSDB_FUNC_COUNT && pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX))) {
H
hjxilinx 已提交
1723 1724 1725
      return false;
    }
  }
1726

H
hjxilinx 已提交
1727 1728 1729
  return true;
}

1730 1731
/////////////////////////////////////////////////////////////////////////////////////////////

H
Haojun Liao 已提交
1732
void getAlignQueryTimeWindow(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) {
1733
  assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime);
H
Haojun Liao 已提交
1734
  win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->intervalTime, pQuery->slidingTimeUnit, pQuery->precision);
H
Haojun Liao 已提交
1735 1736 1737 1738 1739

  /*
   * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between
   * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges.
   */
1740 1741 1742 1743
  if (keyFirst > (INT64_MAX - pQuery->intervalTime)) {
    assert(keyLast - keyFirst < pQuery->intervalTime);
    win->ekey = INT64_MAX;
    return;
H
Haojun Liao 已提交
1744 1745
  } else {
    win->ekey = win->skey + pQuery->intervalTime - 1;
1746 1747 1748 1749 1750
  }
}

static void setScanLimitationByResultBuffer(SQuery *pQuery) {
  if (isTopBottomQuery(pQuery)) {
1751
    pQuery->checkBuffer = 0;
1752
  } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
1753
    pQuery->checkBuffer = 0;
1754 1755
  } else {
    bool hasMultioutput = false;
1756
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1757
      SSqlFuncMsg *pExprMsg = &pQuery->pSelectExpr[i].base;
1758 1759 1760
      if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) {
        continue;
      }
1761

1762 1763 1764 1765 1766
      hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus);
      if (!hasMultioutput) {
        break;
      }
    }
1767

1768
    pQuery->checkBuffer = hasMultioutput ? 1 : 0;
1769 1770 1771 1772 1773 1774
  }
}

/*
 * todo add more parameters to check soon..
 */
1775
bool colIdCheck(SQuery *pQuery) {
1776 1777
  // load data column information is incorrect
  for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) {
1778
    if (pQuery->colList[i].colId == pQuery->colList[i + 1].colId) {
S
slguan 已提交
1779
      qError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery));
1780 1781 1782
      return false;
    }
  }
1783
  
1784 1785 1786 1787 1788 1789
  return true;
}

// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which
// the scan order is not matter
static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) {
1790
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
1791
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
1792

1793 1794 1795 1796
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG ||
        functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
1797

1798 1799 1800 1801
    if (functionId != functId && functionId != functIdDst) {
      return false;
    }
  }
1802

1803 1804 1805 1806 1807 1808 1809
  return true;
}

static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); }

static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); }

H
Haojun Liao 已提交
1810 1811 1812 1813 1814 1815
// todo refactor, add iterator
static void doExchangeTimeWindow(SQInfo* pQInfo) {
  size_t t = GET_NUM_OF_TABLEGROUP(pQInfo);
  for(int32_t i = 0; i < t; ++i) {
    SArray* p1 = GET_TABLEGROUP(pQInfo, i);

H
Haojun Liao 已提交
1816
    SArray* tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);
H
Haojun Liao 已提交
1817 1818 1819 1820
    size_t len = taosArrayGetSize(p1);
    for(int32_t j = 0; j < len; ++j) {
      STableQueryInfo* pTableQueryInfo = (STableQueryInfo*) taosArrayGetP(p1, j);
      SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
H
Haojun Liao 已提交
1821 1822 1823

      STableKeyInfo* pInfo = taosArrayGet(tableKeyGroup, j);
      pInfo->lastKey = pTableQueryInfo->win.skey;
H
Haojun Liao 已提交
1824 1825 1826 1827
    }
  }
}

H
Haojun Liao 已提交
1828 1829 1830
static void changeExecuteScanOrder(SQInfo *pQInfo, bool stableQuery) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;

1831 1832 1833
  // in case of point-interpolation query, use asc order scan
  char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64
               "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64;
1834

1835 1836 1837
  // todo handle the case the the order irrelevant query type mixed up with order critical query type
  // descending order query for last_row query
  if (isFirstLastRowQuery(pQuery)) {
1838
    qDebug("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery),
1839
           pQuery->order.order, TSDB_ORDER_DESC);
1840

1841
    pQuery->order.order = TSDB_ORDER_DESC;
1842

1843 1844
    int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey);
    int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey);
1845

1846 1847
    pQuery->window.skey = ekey;
    pQuery->window.ekey = skey;
1848

1849 1850
    return;
  }
1851

1852 1853
  if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) {
    if (!QUERY_IS_ASC_QUERY(pQuery)) {
1854
      qDebug(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1855
             pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);
1856 1857
      SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
    }
1858

1859
    pQuery->order.order = TSDB_ORDER_ASC;
1860 1861
    return;
  }
1862

1863 1864 1865
  if (pQuery->intervalTime == 0) {
    if (onlyFirstQuery(pQuery)) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
1866
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSDB_ORDER_ASC, pQuery->window.skey,
1867 1868
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1869
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1870
        doExchangeTimeWindow(pQInfo);
1871
      }
1872

1873
      pQuery->order.order = TSDB_ORDER_ASC;
1874 1875
    } else if (onlyLastQuery(pQuery)) {
      if (QUERY_IS_ASC_QUERY(pQuery)) {
1876
        qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSDB_ORDER_DESC, pQuery->window.skey,
1877 1878
               pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1879
        SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
H
Haojun Liao 已提交
1880
        doExchangeTimeWindow(pQInfo);
1881
      }
1882

1883
      pQuery->order.order = TSDB_ORDER_DESC;
1884
    }
1885

1886
  } else {  // interval query
1887
    if (stableQuery) {
1888 1889
      if (onlyFirstQuery(pQuery)) {
        if (!QUERY_IS_ASC_QUERY(pQuery)) {
1890
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSDB_ORDER_ASC,
1891 1892
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1893 1894
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1895

1896
        pQuery->order.order = TSDB_ORDER_ASC;
1897 1898
      } else if (onlyLastQuery(pQuery)) {
        if (QUERY_IS_ASC_QUERY(pQuery)) {
1899
          qDebug(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSDB_ORDER_DESC,
1900 1901
                 pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey);

1902 1903
          SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
        }
1904

1905
        pQuery->order.order = TSDB_ORDER_DESC;
1906 1907 1908 1909 1910 1911 1912 1913
      }
    }
  }
}

static int32_t getInitialPageNum(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t INITIAL_RESULT_ROWS_VALUE = 16;
1914

1915
  int32_t num = 0;
1916

1917 1918
  if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
    num = 128;
H
Haojun Liao 已提交
1919
  } else if (QUERY_IS_INTERVAL_QUERY(pQuery)) {  // time window query, allocate one page for each table
1920
    size_t s = pQInfo->tableqinfoGroupInfo.numOfTables;
1921
    num = MAX(s, INITIAL_RESULT_ROWS_VALUE);
1922 1923
  } else {    // for super table query, one page for each subset
    num = 1;  // pQInfo->pSidSet->numOfSubSet;
1924
  }
1925

1926 1927 1928 1929
  assert(num > 0);
  return num;
}

1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943
static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  *rowsize = pQuery->rowSize * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery);
  int32_t overhead = sizeof(tFilePage);

  // one page contains at least two rows
  *ps = DEFAULT_INTERN_BUF_PAGE_SIZE;
  while(((*rowsize) * 2) > (*ps) - overhead) {
    *ps = (*ps << 1u);
  }

  pRuntimeEnv->numOfRowsPerPage = ((*ps) - sizeof(tFilePage)) / (*rowsize);

1944 1945
}

H
Haojun Liao 已提交
1946
#define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR)
1947

H
Haojun Liao 已提交
1948 1949 1950 1951
static bool needToLoadDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx,
    int32_t numOfRows) {
  SQuery* pQuery = pRuntimeEnv->pQuery;
  if (pDataStatis == NULL || (pQuery->numOfFilterCols == 0 && (!pRuntimeEnv->topBotQuery))) {
1952 1953 1954 1955 1956
    return true;
  }

  for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
    SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
1957

H
Haojun Liao 已提交
1958 1959 1960 1961 1962 1963 1964 1965
    int32_t index = -1;
    for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
      if (pDataStatis[i].colId == pFilterInfo->info.colId) {
        index = i;
        break;
      }
    }

H
Haojun Liao 已提交
1966
    // no statistics data
H
Haojun Liao 已提交
1967
    if (index == -1) {
H
Haojun Liao 已提交
1968
      return true;
1969
    }
1970

1971
    // not support pre-filter operation on binary/nchar data type
H
Haojun Liao 已提交
1972
    if (!IS_PREFILTER_TYPE(pFilterInfo->info.type)) {
H
Haojun Liao 已提交
1973
      return true;
1974
    }
1975

1976
    // all points in current column are NULL, no need to check its boundary value
H
Haojun Liao 已提交
1977
    if (pDataStatis[index].numOfNull == numOfRows) {
1978 1979
      continue;
    }
1980

H
Haojun Liao 已提交
1981 1982 1983 1984 1985
    SDataStatis* pDataBlockst = &pDataStatis[index];

    if (pFilterInfo->info.type == TSDB_DATA_TYPE_FLOAT) {
      float minval = *(double *)(&pDataBlockst->min);
      float maxval = *(double *)(&pDataBlockst->max);
1986

1987 1988 1989 1990 1991 1992 1993
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) {
          return true;
        }
      }
    } else {
      for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) {
H
Haojun Liao 已提交
1994
        if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataBlockst->min, (char *)&pDataBlockst->max)) {
1995 1996 1997 1998 1999
          return true;
        }
      }
    }
  }
2000

H
Haojun Liao 已提交
2001 2002 2003 2004 2005 2006 2007 2008
  if (pRuntimeEnv->topBotQuery) {
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
      if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
        return topbot_datablock_filter(&pCtx[i], functionId, (char *)&pDataStatis[i].min, (char *)&pDataStatis[i].max);
      }
    }
  }
2009

H
Haojun Liao 已提交
2010
  return false;
2011 2012
}

H
Haojun Liao 已提交
2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059
#define PT_IN_WINDOW(_p, _w)  ((_p) > (_w).skey && (_p) < (_w).ekey)

static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) {
  STimeWindow w = {0};

  TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
  TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);


  if (QUERY_IS_ASC_QUERY(pQuery)) {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, sk, ek, &w);

    if (PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.skey > pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  } else {
    getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, sk, ek, &w);
    if (PT_IN_WINDOW(w.skey, pBlockInfo->window)) {
      return true;
    }

    while(1) {
      GET_NEXT_TIMEWINDOW(pQuery, &w);
      if (w.ekey < pBlockInfo->window.skey) {
        break;
      }

      if (PT_IN_WINDOW(w.skey, pBlockInfo->window) || PT_IN_WINDOW(w.ekey, pBlockInfo->window)) {
        return true;
      }
    }
  }

  return false;
}

H
Haojun Liao 已提交
2060
int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock) {
2061
  SQuery *pQuery = pRuntimeEnv->pQuery;
2062

H
Haojun Liao 已提交
2063
  uint32_t status = 0;
H
Haojun Liao 已提交
2064
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf > 0) {
H
Haojun Liao 已提交
2065 2066
    status = BLK_DATA_ALL_NEEDED;
  } else { // check if this data block is required to load
H
Haojun Liao 已提交
2067

H
Haojun Liao 已提交
2068
    // Calculate all time windows that are overlapping or contain current data block.
2069
    // If current data block is contained by all possible time window, do not load current data block.
H
Haojun Liao 已提交
2070 2071
    if (QUERY_IS_INTERVAL_QUERY(pQuery) && overlapWithTimeWindow(pQuery, pBlockInfo)) {
      status = BLK_DATA_ALL_NEEDED;
2072
    }
2073

H
Haojun Liao 已提交
2074 2075 2076 2077 2078 2079 2080 2081
    if (status != BLK_DATA_ALL_NEEDED) {
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        SSqlFuncMsg* pSqlFunc = &pQuery->pSelectExpr[i].base;

        int32_t functionId = pSqlFunc->functionId;
        int32_t colId = pSqlFunc->colInfo.colId;

        status |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], pBlockInfo->window.skey, pBlockInfo->window.ekey, colId);
H
Haojun Liao 已提交
2082
        if ((status & BLK_DATA_ALL_NEEDED) == BLK_DATA_ALL_NEEDED) {
H
Haojun Liao 已提交
2083 2084 2085
          break;
        }
      }
2086 2087
    }
  }
2088

H
Haojun Liao 已提交
2089 2090 2091
  if (status == BLK_DATA_NO_NEEDED) {
    qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
           pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
2092
    pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2093
  } else if (status == BLK_DATA_STATIS_NEEDED) {
H
hjxilinx 已提交
2094
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
2095
      //        return DISK_DATA_LOAD_FAILED;
2096
    }
2097 2098 2099 2100
  
    pRuntimeEnv->summary.loadBlockStatis += 1;
  
    if (*pStatis == NULL) { // data block statistics does not exist, load data block
H
Haojun Liao 已提交
2101
      *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2102
      pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
2103 2104
    }
  } else {
H
Haojun Liao 已提交
2105
    assert(status == BLK_DATA_ALL_NEEDED);
2106 2107
  
    // load the data block statistics to perform further filter
H
Haojun Liao 已提交
2108
    pRuntimeEnv->summary.loadBlockStatis += 1;
H
hjxilinx 已提交
2109
    if (tsdbRetrieveDataBlockStatisInfo(pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) {
2110
    }
2111
    
H
Haojun Liao 已提交
2112
    if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) {
2113 2114
      // current block has been discard due to filter applied
      pRuntimeEnv->summary.discardBlocks += 1;
H
Haojun Liao 已提交
2115 2116 2117
      qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv),
          pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows);
      return BLK_DATA_DISCARD;
2118
    }
2119
  
2120
    pRuntimeEnv->summary.totalCheckedRows += pBlockInfo->rows;
H
Haojun Liao 已提交
2121
    pRuntimeEnv->summary.loadBlocks += 1;
H
Haojun Liao 已提交
2122
    *pDataBlock = tsdbRetrieveDataBlock(pQueryHandle, NULL);
2123
  }
2124

H
Haojun Liao 已提交
2125
  return TSDB_CODE_SUCCESS;
2126 2127
}

H
hjxilinx 已提交
2128
int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) {
2129
  int32_t midPos = -1;
H
Haojun Liao 已提交
2130
  int32_t numOfRows;
2131

2132 2133 2134
  if (num <= 0) {
    return -1;
  }
2135

2136
  assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC);
2137 2138

  TSKEY * keyList = (TSKEY *)pValue;
2139
  int32_t firstPos = 0;
2140
  int32_t lastPos = num - 1;
2141

2142
  if (order == TSDB_ORDER_DESC) {
H
hjxilinx 已提交
2143 2144 2145 2146 2147
    // find the first position which is smaller than the key
    while (1) {
      if (key >= keyList[lastPos]) return lastPos;
      if (key == keyList[firstPos]) return firstPos;
      if (key < keyList[firstPos]) return firstPos - 1;
2148

H
Haojun Liao 已提交
2149 2150
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2151

H
hjxilinx 已提交
2152 2153 2154 2155 2156 2157 2158 2159
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
2160

H
hjxilinx 已提交
2161 2162 2163 2164 2165
  } else {
    // find the first position which is bigger than the key
    while (1) {
      if (key <= keyList[firstPos]) return firstPos;
      if (key == keyList[lastPos]) return lastPos;
2166

H
hjxilinx 已提交
2167 2168 2169 2170 2171 2172 2173
      if (key > keyList[lastPos]) {
        lastPos = lastPos + 1;
        if (lastPos >= num)
          return -1;
        else
          return lastPos;
      }
2174

H
Haojun Liao 已提交
2175 2176
      numOfRows = lastPos - firstPos + 1;
      midPos = (numOfRows >> 1) + firstPos;
2177

H
hjxilinx 已提交
2178 2179 2180 2181 2182 2183 2184 2185 2186
      if (key < keyList[midPos]) {
        lastPos = midPos - 1;
      } else if (key > keyList[midPos]) {
        firstPos = midPos + 1;
      } else {
        break;
      }
    }
  }
2187

H
hjxilinx 已提交
2188 2189 2190
  return midPos;
}

2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212
static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capacity) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (capacity < pQuery->rec.capacity) {
    return;
  }

  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    int32_t bytes = pQuery->pSelectExpr[i].bytes;
    assert(bytes > 0 && capacity > 0);

    char *tmp = realloc(pQuery->sdata[i], bytes * capacity + sizeof(tFilePage));
    if (tmp == NULL) {  // todo handle the oom
      assert(0);
    } else {
      pQuery->sdata[i] = (tFilePage *)tmp;
    }

    // set the pCtx output buffer position
    pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data;
  }

2213
  qDebug("QInfo:%p realloc output buffer to inc output buffer from: %" PRId64 " rows to:%d rows", GET_QINFO_ADDR(pRuntimeEnv),
2214 2215 2216 2217 2218
         pQuery->rec.capacity, capacity);

  pQuery->rec.capacity = capacity;
}

2219 2220 2221
static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
  SQuery* pQuery = pRuntimeEnv->pQuery;
2222
  if (!QUERY_IS_INTERVAL_QUERY(pQuery) && !pRuntimeEnv->groupbyNormalCol && !isFixedOutputQuery(pRuntimeEnv) && !isTSCompQuery(pQuery)) {
2223 2224 2225 2226 2227 2228 2229 2230
    SResultRec *pRec = &pQuery->rec;
    
    if (pQuery->rec.capacity - pQuery->rec.rows < pBlockInfo->rows) {
      int32_t remain = pRec->capacity - pRec->rows;
      int32_t newSize = pRec->capacity + (pBlockInfo->rows - remain);
      
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        int32_t bytes = pQuery->pSelectExpr[i].bytes;
H
Haojun Liao 已提交
2231 2232
        assert(bytes > 0 && newSize > 0);

2233 2234 2235 2236
        char *tmp = realloc(pQuery->sdata[i], bytes * newSize + sizeof(tFilePage));
        if (tmp == NULL) {  // todo handle the oom
          assert(0);
        } else {
H
Hongze Cheng 已提交
2237
          memset(tmp + sizeof(tFilePage) + bytes * pRec->rows, 0, (newSize - pRec->rows) * bytes);
2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249
          pQuery->sdata[i] = (tFilePage *)tmp;
        }
        
        // set the pCtx output buffer position
        pRuntimeEnv->pCtx[i].aOutputBuf = pQuery->sdata[i]->data + pRec->rows * bytes;
        
        int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
        if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
          pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
        }
      }
      
2250
      qDebug("QInfo:%p realloc output buffer, new size: %d rows, old:%" PRId64 ", remain:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2251 2252 2253 2254 2255 2256 2257
             newSize, pRec->capacity, newSize - pRec->rows);
      
      pRec->capacity = newSize;
    }
  }
}

2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278
static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) {
  SQuery* pQuery = pRuntimeEnv->pQuery;

  if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) {
    STimeWindow w = TSWINDOW_INITIALIZER;
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

    if (QUERY_IS_ASC_QUERY(pQuery)) {
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w);
      pWindowResInfo->startTime = w.skey;
      pWindowResInfo->prevSKey = w.skey;
    } else {
      // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp
      getAlignQueryTimeWindow(pQuery, pBlockInfo->window.ekey, pQuery->window.ekey, pBlockInfo->window.ekey, &w);

      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
  }
}

2279 2280
static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
2281
  STableQueryInfo* pTableQueryInfo = pQuery->current;
H
Haojun Liao 已提交
2282
  SQueryCostInfo*  summary  = &pRuntimeEnv->summary;
2283

2284
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d",
H
hjxilinx 已提交
2285 2286
         GET_QINFO_ADDR(pRuntimeEnv), pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, pTableQueryInfo->lastKey,
         pQuery->order.order);
2287

2288
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
2289
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2290

H
Haojun Liao 已提交
2291
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
B
Bomin Zhang 已提交
2292 2293 2294 2295 2296 2297 2298
  while (true) {
    if (!tsdbNextDataBlock(pQueryHandle)) {
      if (terrno != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, terrno);
      }
      break;
    }
H
Haojun Liao 已提交
2299
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
2300

H
Haojun Liao 已提交
2301
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
2302
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
2303
    }
2304

H
Haojun Liao 已提交
2305
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
2306
    doSetInitialTimewindow(pRuntimeEnv, &blockInfo);
2307

H
hjxilinx 已提交
2308
    // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block
2309
    ensureOutputBuffer(pRuntimeEnv, &blockInfo);
2310

2311
    SDataStatis *pStatis = NULL;
H
Haojun Liao 已提交
2312 2313 2314 2315 2316
    SArray *pDataBlock   = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }
2317

H
Haojun Liao 已提交
2318 2319
    // query start position can not move into tableApplyFunctionsOnBlock due to limit/offset condition
    pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : blockInfo.rows - 1;
H
hjxilinx 已提交
2320
    int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock);
2321

H
Haojun Liao 已提交
2322
    summary->totalRows += blockInfo.rows;
2323
    qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
2324
           blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
2325

2326 2327
    // while the output buffer is full or limit/offset is applied, query may be paused here
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL | QUERY_COMPLETED)) {
H
hjxilinx 已提交
2328
      break;
2329 2330
    }
  }
2331

H
hjxilinx 已提交
2332
  // if the result buffer is not full, set the query complete
2333 2334 2335
  if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
  }
2336

H
Haojun Liao 已提交
2337
  if (QUERY_IS_INTERVAL_QUERY(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) {
H
hjxilinx 已提交
2338
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
2339
      closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
H
hjxilinx 已提交
2340
      pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1;  // point to the last time window
2341 2342 2343 2344
    } else {
      assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
    }
  }
2345

2346
  return 0;
2347 2348 2349 2350 2351 2352
}

/*
 * set tag value in SQLFunctionCtx
 * e.g.,tag information into input buffer
 */
2353
static void doSetTagValueInParam(void *tsdb, void* pTable, int32_t tagColId, tVariant *tag, int16_t type, int16_t bytes) {
H
[td-90]  
Haojun Liao 已提交
2354
  tVariantDestroy(tag);
2355

2356
  if (tagColId == TSDB_TBNAME_COLUMN_INDEX) {
2357
    char* val = tsdbGetTableName(pTable);
H
[td-90]  
Haojun Liao 已提交
2358 2359 2360
    assert(val != NULL);
    
    tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), TSDB_DATA_TYPE_BINARY);
2361
  } else {
2362
    char* val = tsdbGetTableTagVal(pTable, tagColId, type, bytes);
H
[td-90]  
Haojun Liao 已提交
2363 2364 2365 2366
    if (val == NULL) {
      tag->nType = TSDB_DATA_TYPE_NULL;
      return;
    }
H
hjxilinx 已提交
2367 2368
    
    if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
H
Hongze Cheng 已提交
2369
      if (isNull(val, type)) {
H
Haojun Liao 已提交
2370 2371 2372 2373
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2374
      tVariantCreateFromBinary(tag, varDataVal(val), varDataLen(val), type);
H
hjxilinx 已提交
2375
    } else {
H
Haojun Liao 已提交
2376 2377 2378 2379 2380
      if (isNull(val, type)) {
        tag->nType = TSDB_DATA_TYPE_NULL;
        return;
      }

H
[td-90]  
Haojun Liao 已提交
2381
      tVariantCreateFromBinary(tag, val, bytes, type);
H
hjxilinx 已提交
2382
    }
2383
  }
2384 2385
}

2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397
static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t numOfTags, int16_t colId) {
  assert(pTagColList != NULL && numOfTags > 0);

  for(int32_t i = 0; i < numOfTags; ++i) {
    if (pTagColList[i].colId == colId) {
      return &pTagColList[i];
    }
  }

  return NULL;
}

2398
void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) {
2399
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
2400
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
2401

H
[td-90]  
Haojun Liao 已提交
2402 2403 2404
  SExprInfo *pExprInfo = &pQuery->pSelectExpr[0];
  if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) {
    assert(pExprInfo->base.numOfParams == 1);
H
Haojun Liao 已提交
2405

2406 2407
    int16_t tagColId = pExprInfo->base.arg->argValue.i64;
    SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2408

2409
    doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2410 2411
  } else {
    // set tag value, by which the results are aggregated.
2412
    for (int32_t idx = 0; idx < pQuery->numOfOutput; ++idx) {
H
Haojun Liao 已提交
2413
      SExprInfo* pLocalExprInfo = &pQuery->pSelectExpr[idx];
H
[td-90]  
Haojun Liao 已提交
2414
  
2415
      // ts_comp column required the tag value for join filter
H
Haojun Liao 已提交
2416
      if (!TSDB_COL_IS_TAG(pLocalExprInfo->base.colInfo.flag)) {
2417 2418
        continue;
      }
2419

2420
      // todo use tag column index to optimize performance
2421
      doSetTagValueInParam(tsdb, pTable, pLocalExprInfo->base.colInfo.colId, &pRuntimeEnv->pCtx[idx].tag,
H
Haojun Liao 已提交
2422
                           pLocalExprInfo->type, pLocalExprInfo->bytes);
2423
    }
2424

2425
    // set the join tag for first column
H
[td-90]  
Haojun Liao 已提交
2426
    SSqlFuncMsg *pFuncMsg = &pExprInfo->base;
2427 2428
    if ((pFuncMsg->functionId == TSDB_FUNC_TS || pFuncMsg->functionId == TSDB_FUNC_PRJ) && pRuntimeEnv->pTSBuf != NULL &&
        pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
2429
      assert(pFuncMsg->numOfParams == 1);
H
Haojun Liao 已提交
2430

2431 2432
      int16_t tagColId = pExprInfo->base.arg->argValue.i64;
      SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId);
H
Haojun Liao 已提交
2433

2434
      doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes);
2435
      qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64,
B
Bomin Zhang 已提交
2436
          pRuntimeEnv->pCtx[0].tag.i64Key)
2437 2438 2439 2440 2441 2442 2443
    }
  }
}

static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) {
  SQuery *        pQuery = pRuntimeEnv->pQuery;
  SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx;
2444

H
Haojun Liao 已提交
2445 2446
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);

2447
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2448
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2449 2450 2451
    if (!mergeFlag) {
      pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes;
      pCtx[i].currentStage = FIRST_STAGE_MERGE;
2452

2453
      RESET_RESULT_INFO(pCtx[i].resultInfo);
2454 2455
      aAggs[functionId].init(&pCtx[i]);
    }
2456

2457 2458
    pCtx[i].hasNull = true;
    pCtx[i].nStartQueryTimestamp = timestamp;
H
Haojun Liao 已提交
2459
    pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page);
2460

2461 2462 2463
    // in case of tag column, the tag information should be extracted from input buffer
    if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) {
      tVariantDestroy(&pCtx[i].tag);
2464 2465 2466 2467 2468 2469 2470 2471
  
      int32_t type = pCtx[i].outputType;
      if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
        tVariantCreateFromBinary(&pCtx[i].tag, varDataVal(pCtx[i].aInputElemBuf), varDataLen(pCtx[i].aInputElemBuf), type);
      } else {
        tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType);
      }
      
2472 2473
    }
  }
2474

2475
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2476
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
2477 2478 2479
    if (functionId == TSDB_FUNC_TAG_DUMMY) {
      continue;
    }
2480

2481 2482 2483 2484
    aAggs[functionId].distMergeFunc(&pCtx[i]);
  }
}

2485
static UNUSED_FUNC void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) {
2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553
  if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_BINARY:
        printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1));
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1));
        break;
    }
  } else if (functionId == TSDB_FUNC_AVG) {
    printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_SPREAD) {
    printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double)));
  } else if (functionId == TSDB_FUNC_TWA) {
    data += 1;
    printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8),
           *(int64_t *)(data + 16), *(int64_t *)(data + 24));
  } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) {
    switch (srcDataType) {
      case TSDB_DATA_TYPE_TINYINT:
      case TSDB_DATA_TYPE_BOOL:
        printf("%d\t", *(int8_t *)data);
        break;
      case TSDB_DATA_TYPE_SMALLINT:
        printf("%d\t", *(int16_t *)data);
        break;
      case TSDB_DATA_TYPE_BIGINT:
      case TSDB_DATA_TYPE_TIMESTAMP:
        printf("%" PRId64 "\t", *(int64_t *)data);
        break;
      case TSDB_DATA_TYPE_INT:
        printf("%d\t", *(int *)data);
        break;
      case TSDB_DATA_TYPE_FLOAT:
        printf("%f\t", *(float *)data);
        break;
      case TSDB_DATA_TYPE_DOUBLE:
        printf("%f\t", *(float *)data);
        break;
    }
  } else if (functionId == TSDB_FUNC_SUM) {
    if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) {
      printf("%lf\t", *(float *)data);
    } else {
      printf("%" PRId64 "\t", *(int64_t *)data);
    }
  } else {
    printf("%s\t", data);
  }
}

2554
void UNUSED_FUNC displayInterResult(tFilePage **pdata, SQueryRuntimeEnv* pRuntimeEnv, int32_t numOfRows) {
2555
  SQuery* pQuery = pRuntimeEnv->pQuery;
2556
  int32_t numOfCols = pQuery->numOfOutput;
2557
  printf("super table query intermediate result, total:%d\n", numOfRows);
2558

2559 2560
  for (int32_t j = 0; j < numOfRows; ++j) {
    for (int32_t i = 0; i < numOfCols; ++i) {
2561
      
2562
      switch (pQuery->pSelectExpr[i].type) {
2563
        case TSDB_DATA_TYPE_BINARY: {
2564
          int32_t type = pQuery->pSelectExpr[i].type;
2565
          printBinaryData(pQuery->pSelectExpr[i].base.functionId, pdata[i]->data + pQuery->pSelectExpr[i].bytes * j,
2566 2567 2568 2569 2570
                          type);
          break;
        }
        case TSDB_DATA_TYPE_TIMESTAMP:
        case TSDB_DATA_TYPE_BIGINT:
2571
          printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2572 2573
          break;
        case TSDB_DATA_TYPE_INT:
2574
          printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2575 2576
          break;
        case TSDB_DATA_TYPE_FLOAT:
2577
          printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2578 2579
          break;
        case TSDB_DATA_TYPE_DOUBLE:
2580
          printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].bytes * j));
2581 2582 2583 2584 2585 2586 2587 2588
          break;
      }
    }
    printf("\n");
  }
}

typedef struct SCompSupporter {
H
hjxilinx 已提交
2589 2590 2591
  STableQueryInfo **pTableQueryInfo;
  int32_t *         position;
  SQInfo *          pQInfo;
2592 2593 2594 2595 2596
} SCompSupporter;

int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) {
  int32_t left = *(int32_t *)pLeft;
  int32_t right = *(int32_t *)pRight;
2597

2598 2599
  SCompSupporter *  supporter = (SCompSupporter *)param;
  SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv;
2600

2601 2602
  int32_t leftPos = supporter->position[left];
  int32_t rightPos = supporter->position[right];
2603

2604 2605 2606 2607
  /* left source is exhausted */
  if (leftPos == -1) {
    return 1;
  }
2608

2609 2610 2611 2612
  /* right source is exhausted*/
  if (rightPos == -1) {
    return -1;
  }
2613

H
hjxilinx 已提交
2614
  SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo;
2615
  SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos);
H
Haojun Liao 已提交
2616
  tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId);
2617

H
Haojun Liao 已提交
2618
  char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1);
2619
  TSKEY leftTimestamp = GET_INT64_VAL(b1);
2620

H
hjxilinx 已提交
2621
  SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo;
2622
  SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos);
H
Haojun Liao 已提交
2623
  tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId);
2624

H
Haojun Liao 已提交
2625
  char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2);
2626
  TSKEY rightTimestamp = GET_INT64_VAL(b2);
2627

2628 2629 2630
  if (leftTimestamp == rightTimestamp) {
    return 0;
  }
2631

2632 2633 2634
  return leftTimestamp > rightTimestamp ? 1 : -1;
}

2635
int32_t mergeIntoGroupResult(SQInfo *pQInfo) {
2636
  int64_t st = taosGetTimestampMs();
2637
  int32_t ret = TSDB_CODE_SUCCESS;
2638

H
Haojun Liao 已提交
2639
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
2640

2641
  while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
2642
    SArray *group = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
2643
    ret = mergeIntoGroupResultImpl(pQInfo, group);
2644 2645 2646 2647
    if (ret < 0) {  // not enough disk space to save the data into disk
      return -1;
    }

2648
    pQInfo->groupIndex += 1;
2649 2650

    // this group generates at least one result, return results
2651 2652 2653
    if (ret > 0) {
      break;
    }
2654 2655

    assert(pQInfo->numOfGroupResultPages == 0);
2656
    qDebug("QInfo:%p no result in group %d, continue", pQInfo, pQInfo->groupIndex - 1);
2657
  }
2658

2659
  qDebug("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%" PRId64 "ms", pQInfo,
2660
         pQInfo->groupIndex - 1, numOfGroups, taosGetTimestampMs() - st);
2661

2662 2663 2664 2665 2666 2667
  return TSDB_CODE_SUCCESS;
}

void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) {
  if (pQInfo->offset == pQInfo->numOfGroupResultPages) {
    pQInfo->numOfGroupResultPages = 0;
2668

2669
    // current results of group has been sent to client, try next group
2670
    if (mergeIntoGroupResult(pQInfo) != TSDB_CODE_SUCCESS) {
2671 2672
      return;  // failed to save data in the disk
    }
2673

2674
    // check if all results has been sent to client
H
Haojun Liao 已提交
2675
    int32_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
2676
    if (pQInfo->numOfGroupResultPages == 0 && pQInfo->groupIndex == numOfGroup) {
2677
      pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;  // set query completed
2678 2679
      return;
    }
2680
  }
2681 2682

  SQueryRuntimeEnv *   pRuntimeEnv = &pQInfo->runtimeEnv;
2683
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2684

2685
  int32_t id = getGroupResultId(pQInfo->groupIndex - 1);
2686
  SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id);
2687

H
Haojun Liao 已提交
2688
  int32_t size = taosArrayGetSize(list);
2689

2690
  int32_t offset = 0;
H
Haojun Liao 已提交
2691
  for (int32_t j = 0; j < size; ++j) {
H
Haojun Liao 已提交
2692 2693
    SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j);
    tFilePage *pData = getResBufPage(pResultBuf, pi->pageId);
2694

2695
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2696
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2697
      char *  pDest = pQuery->sdata[i]->data;
H
Haojun Liao 已提交
2698
      memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num, bytes * pData->num);
2699
    }
2700

H
Haojun Liao 已提交
2701
//    rows += pData->num;
2702
    offset += pData->num;
2703
  }
2704

2705
  assert(pQuery->rec.rows == 0);
2706

H
Haojun Liao 已提交
2707
  pQuery->rec.rows += offset;
2708 2709 2710
  pQInfo->offset += 1;
}

H
Haojun Liao 已提交
2711
int64_t getNumOfResultWindowRes(SQuery *pQuery, SWindowResult *pWindowRes) {
2712
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2713
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
2714

2715 2716 2717 2718 2719 2720 2721
    /*
     * ts, tag, tagprj function can not decide the output number of current query
     * the number of output result is decided by main output
     */
    if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
      continue;
    }
2722

2723
    SResultInfo *pResultInfo = &pWindowRes->resultInfo[j];
H
Haojun Liao 已提交
2724
    assert(pResultInfo != NULL);
2725

H
Haojun Liao 已提交
2726 2727
    if (pResultInfo->numOfRes > 0) {
      return pResultInfo->numOfRes;
2728 2729
    }
  }
2730

H
Haojun Liao 已提交
2731
  return 0;
2732 2733
}

2734
int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) {
2735
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
2736
  SQuery *          pQuery = pRuntimeEnv->pQuery;
2737

2738
  size_t size = taosArrayGetSize(pGroup);
2739
  tFilePage **buffer = pQuery->sdata;
2740

2741
  int32_t*   posList = calloc(size, sizeof(int32_t));
H
hjxilinx 已提交
2742
  STableQueryInfo **pTableList = malloc(POINTER_BYTES * size);
2743

2744
  if (pTableList == NULL || posList == NULL) {
S
Shengliang Guan 已提交
2745 2746
    taosTFree(posList);
    taosTFree(pTableList);
2747 2748

    qError("QInfo:%p failed alloc memory", pQInfo);
H
Haojun Liao 已提交
2749
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
2750 2751
  }

2752
  // todo opt for the case of one table per group
2753
  int32_t numOfTables = 0;
2754
  for (int32_t i = 0; i < size; ++i) {
2755
    STableQueryInfo *item = taosArrayGetP(pGroup, i);
2756

H
Haojun Liao 已提交
2757
    SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, TSDB_TABLEID(item->pTable)->tid);
H
Haojun Liao 已提交
2758
    if (taosArrayGetSize(list) > 0 && item->windowResInfo.size > 0) {
2759
      pTableList[numOfTables] = item;
2760
      numOfTables += 1;
2761 2762
    }
  }
2763

2764
  if (numOfTables == 0) {
S
Shengliang Guan 已提交
2765 2766
    taosTFree(posList);
    taosTFree(pTableList);
2767

2768 2769
    assert(pQInfo->numOfGroupResultPages == 0);
    return 0;
H
Haojun Liao 已提交
2770
  } else if (numOfTables == 1) { // no need to merge results since only one table in each group
2771
  }
2772

2773
  SCompSupporter cs = {pTableList, posList, pQInfo};
2774

2775
  SLoserTreeInfo *pTree = NULL;
2776
  tLoserTreeCreate(&pTree, numOfTables, &cs, tableResultComparFn);
2777

2778
  SResultInfo *pResultInfo = calloc(pQuery->numOfOutput, sizeof(SResultInfo));
H
Haojun Liao 已提交
2779 2780 2781 2782
  if (pResultInfo == NULL) {
    longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY);
  }

H
Haojun Liao 已提交
2783 2784
  char* buf = calloc(1, pRuntimeEnv->interBufSize);
  setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery, buf);
2785
  resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
2786

H
Haojun Liao 已提交
2787
  // todo add windowRes iterator
2788 2789
  int64_t lastTimestamp = -1;
  int64_t startt = taosGetTimestampMs();
2790

2791 2792
  while (1) {
    int32_t pos = pTree->pNode[0].index;
2793

H
hjxilinx 已提交
2794
    SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo;
H
Haojun Liao 已提交
2795
    SWindowResult  *pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
H
Haojun Liao 已提交
2796
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId);
2797

H
Haojun Liao 已提交
2798
    char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page);
2799
    TSKEY ts = GET_INT64_VAL(b);
2800

2801
    assert(ts == pWindowRes->window.skey);
H
Haojun Liao 已提交
2802
    int64_t num = getNumOfResultWindowRes(pQuery, pWindowRes);
2803 2804
    if (num <= 0) {
      cs.position[pos] += 1;
2805

2806 2807
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2808

2809
        // all input sources are exhausted
2810
        if (--numOfTables == 0) {
2811 2812 2813 2814 2815 2816 2817
          break;
        }
      }
    } else {
      if (ts == lastTimestamp) {  // merge with the last one
        doMerge(pRuntimeEnv, ts, pWindowRes, true);
      } else {  // copy data to disk buffer
2818
        if (buffer[0]->num == pQuery->rec.capacity) {
2819 2820 2821
          if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
            return -1;
          }
2822

2823 2824
          resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo);
        }
2825

2826
        doMerge(pRuntimeEnv, ts, pWindowRes, false);
2827
        buffer[0]->num += 1;
2828
      }
2829

2830
      lastTimestamp = ts;
2831

H
Haojun Liao 已提交
2832 2833 2834
      // move to the next element of current entry
      int32_t currentPageId = pWindowRes->pos.pageId;

2835 2836 2837
      cs.position[pos] += 1;
      if (cs.position[pos] >= pWindowResInfo->size) {
        cs.position[pos] = -1;
2838

2839
        // all input sources are exhausted
2840
        if (--numOfTables == 0) {
2841 2842
          break;
        }
H
Haojun Liao 已提交
2843 2844 2845 2846 2847 2848
      } else {
        // current page is not needed anymore
        SWindowResult  *pNextWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]);
        if (pNextWindowRes->pos.pageId != currentPageId) {
          releaseResBufPage(pRuntimeEnv->pResultBuf, page);
        }
2849 2850
      }
    }
2851

2852 2853
    tLoserTreeAdjust(pTree, pos + pTree->numOfEntries);
  }
2854

2855
  if (buffer[0]->num != 0) {  // there are data in buffer
2856
    if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) {
S
slguan 已提交
2857
      qError("QInfo:%p failed to flush data into temp file, abort query", pQInfo);
2858

S
Shengliang Guan 已提交
2859 2860 2861 2862
      taosTFree(pTree);
      taosTFree(pTableList);
      taosTFree(posList);
      taosTFree(pResultInfo);
2863

2864 2865 2866
      return -1;
    }
  }
2867

2868 2869 2870
  int64_t endt = taosGetTimestampMs();

#ifdef _DEBUG_VIEW
2871
  displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
2872
#endif
2873

2874
  qDebug("QInfo:%p result merge completed for group:%d, elapsed time:%" PRId64 " ms", pQInfo, pQInfo->groupIndex, endt - startt);
2875

S
Shengliang Guan 已提交
2876 2877 2878
  taosTFree(pTableList);
  taosTFree(posList);
  taosTFree(pTree);
2879

2880
  pQInfo->offset = 0;
2881

S
Shengliang Guan 已提交
2882 2883
  taosTFree(pResultInfo);
  taosTFree(buf);
2884 2885 2886 2887
  return pQInfo->numOfGroupResultPages;
}

int32_t flushFromResultBuf(SQInfo *pQInfo) {
2888 2889 2890
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

2891
  SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf;
2892

2893 2894
  // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000.
  int32_t pageId = -1;
2895
  int32_t capacity = pResultBuf->numOfRowsPerPage;
2896

2897
  int32_t remain = pQuery->sdata[0]->num;
2898
  int32_t offset = 0;
2899

2900 2901 2902 2903 2904
  while (remain > 0) {
    int32_t r = remain;
    if (r > capacity) {
      r = capacity;
    }
2905

2906
    int32_t    id = getGroupResultId(pQInfo->groupIndex) + pQInfo->numOfGroupResultPages;
2907
    tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId);
2908

2909
    // pagewise copy to dest buffer
2910
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
2911
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
2912
      buf->num = r;
2913

2914 2915
      memcpy(buf->data + pRuntimeEnv->offset[i] * buf->num, ((char *)pQuery->sdata[i]->data) + offset * bytes,
             buf->num * bytes);
2916
    }
2917

2918 2919 2920
    offset += r;
    remain -= r;
  }
2921

2922 2923 2924 2925 2926
  pQInfo->numOfGroupResultPages += 1;
  return TSDB_CODE_SUCCESS;
}

void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) {
2927
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
2928
    pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes;
2929 2930 2931
    pCtx[k].size = 1;
    pCtx[k].startOffset = 0;
    pCtx[k].resultInfo = &pResultInfo[k];
2932

2933
    pQuery->sdata[k]->num = 0;
2934 2935 2936
  }
}

2937 2938 2939 2940 2941
static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo *pTableQueryInfo) {
  if (pTableQueryInfo == NULL) {
    return;
  }
  
H
Haojun Liao 已提交
2942
  // order has changed already
2943
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
H
Haojun Liao 已提交
2944 2945 2946 2947 2948 2949 2950
  
  // TODO validate the assertion
//  if (!QUERY_IS_ASC_QUERY(pQuery)) {
//    assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step);
//  } else {
//    assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step);
//  }
H
Haojun Liao 已提交
2951 2952 2953 2954 2955 2956 2957

  if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) {
    // do nothing, no results
  } else {
    pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step;
  }

2958 2959 2960 2961 2962
  SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY);
  pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
  
  SWITCH_ORDER(pTableQueryInfo->cur.order);
  pTableQueryInfo->cur.vgroupIndex = -1;
H
Haojun Liao 已提交
2963 2964 2965

  // set the index at the end of time window
  pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1;
2966 2967 2968 2969 2970
}

static void disableFuncInReverseScanImpl(SQInfo* pQInfo, SWindowResInfo *pWindowResInfo, int32_t order) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  
2971 2972 2973 2974 2975
  for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
    SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i);
    if (!pStatus->closed) {
      continue;
    }
2976

2977
    SWindowResult *buf = getWindowResult(pWindowResInfo, i);
2978

2979
    // open/close the specified query for each group result
2980
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
2981
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
2982

2983 2984
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
2985 2986 2987 2988 2989 2990 2991 2992
        buf->resultInfo[j].complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        buf->resultInfo[j].complete = true;
      }
    }
  }
}

2993 2994
void disableFuncInReverseScan(SQInfo *pQInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
2995
  SQuery *pQuery = pRuntimeEnv->pQuery;
2996
  int32_t order = pQuery->order.order;
2997

2998 2999
  // group by normal columns and interval query on normal table
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3000
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3001
    disableFuncInReverseScanImpl(pQInfo, pWindowResInfo, order);
3002
  } else {  // for simple result of table query,
3003
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {  // todo refactor
3004
      int32_t functId = pQuery->pSelectExpr[j].base.functionId;
3005

3006
      SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j];
3007 3008 3009
      if (pCtx->resultInfo == NULL) {
        continue; // resultInfo is NULL, means no data checked in previous scan
      }
3010

3011 3012
      if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSDB_ORDER_ASC) ||
          ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSDB_ORDER_DESC)) {
3013 3014 3015 3016 3017 3018
        pCtx->resultInfo->complete = false;
      } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) {
        pCtx->resultInfo->complete = true;
      }
    }
  }
H
Haojun Liao 已提交
3019 3020 3021 3022
}

static void setupQueryRangeForReverseScan(SQInfo* pQInfo) {
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3023
  int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
3024

H
hjxilinx 已提交
3025
  for(int32_t i = 0; i < numOfGroups; ++i) {
H
Haojun Liao 已提交
3026
    SArray *group = GET_TABLEGROUP(pQInfo, i);
H
Haojun Liao 已提交
3027 3028
    SArray *tableKeyGroup = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i);

H
hjxilinx 已提交
3029 3030
    size_t t = taosArrayGetSize(group);
    for (int32_t j = 0; j < t; ++j) {
3031 3032
      STableQueryInfo *pCheckInfo = taosArrayGetP(group, j);
      updateTableQueryInfoForReverseScan(pQuery, pCheckInfo);
H
Haojun Liao 已提交
3033 3034 3035 3036 3037 3038

      // update the last key in tableKeyInfo list
      STableKeyInfo *pTableKeyInfo = taosArrayGet(tableKeyGroup, j);
      pTableKeyInfo->lastKey = pCheckInfo->lastKey;

      assert(pCheckInfo->pTable == pTableKeyInfo->pTable);
H
hjxilinx 已提交
3039 3040
    }
  }
3041 3042
}

3043
void switchCtxOrder(SQueryRuntimeEnv *pRuntimeEnv) {
3044
  SQuery *pQuery = pRuntimeEnv->pQuery;
3045
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
Haojun Liao 已提交
3046
    SWITCH_ORDER(pRuntimeEnv->pCtx[i].order);
3047 3048 3049
  }
}

H
Haojun Liao 已提交
3050
int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize) {
3051
  int32_t numOfCols = pQuery->numOfOutput;
3052

H
Haojun Liao 已提交
3053 3054
  size_t size = numOfCols * sizeof(SResultInfo) + interBufSize;
  pResultRow->resultInfo = calloc(1, size);
B
Bomin Zhang 已提交
3055 3056 3057
  if (pResultRow->resultInfo == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
3058

H
Haojun Liao 已提交
3059
  pResultRow->pos = (SPosInfo) {-1, -1};
3060

H
Haojun Liao 已提交
3061
  char* buf = (char*) pResultRow->resultInfo + numOfCols * sizeof(SResultInfo);
H
Haojun Liao 已提交
3062

3063
  // set the intermediate result output buffer
H
Haojun Liao 已提交
3064
  setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery, buf);
B
Bomin Zhang 已提交
3065
  return TSDB_CODE_SUCCESS;
3066 3067 3068 3069
}

void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3070

3071
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3072 3073
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
    pCtx->aOutputBuf = pQuery->sdata[i]->data;
3074

3075 3076 3077 3078
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc.
     */
3079
    RESET_RESULT_INFO(&pRuntimeEnv->resultInfo[i]);
3080
    pCtx->resultInfo = &pRuntimeEnv->resultInfo[i];
3081

3082
    // set the timestamp output buffer for top/bottom/diff query
3083
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3084 3085 3086
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3087

3088
    memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].bytes * pQuery->rec.capacity);
3089
  }
3090

3091 3092 3093 3094 3095
  initCtxOutputBuf(pRuntimeEnv);
}

void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3096

3097
  // reset the execution contexts
3098
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3099
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3100
    assert(functionId != TSDB_FUNC_DIFF);
3101

3102 3103 3104 3105
    // set next output position
    if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) {
      pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output;
    }
3106

3107 3108 3109 3110 3111 3112 3113 3114 3115 3116
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
      /*
       * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned
       * in the procedure of top/bottom routine
       * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer
       *
       * diff function is handled in multi-output function
       */
      pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output;
    }
3117

3118
    RESET_RESULT_INFO(pRuntimeEnv->pCtx[j].resultInfo);
3119 3120 3121 3122 3123
  }
}

void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3124

3125
  for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3126
    int32_t functionId = pQuery->pSelectExpr[j].base.functionId;
3127
    pRuntimeEnv->pCtx[j].currentStage = 0;
3128

H
Haojun Liao 已提交
3129 3130 3131 3132
    SResultInfo* pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
    if (pResInfo->initialized) {
      continue;
    }
3133

3134 3135 3136 3137
    aAggs[functionId].init(&pRuntimeEnv->pCtx[j]);
  }
}

3138
void skipResults(SQueryRuntimeEnv *pRuntimeEnv) {
3139
  SQuery *pQuery = pRuntimeEnv->pQuery;
3140
  if (pQuery->rec.rows == 0 || pQuery->limit.offset == 0) {
3141 3142
    return;
  }
3143

3144
  if (pQuery->rec.rows <= pQuery->limit.offset) {
3145
    qDebug("QInfo:%p skip rows:%" PRId64 ", new offset:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), pQuery->rec.rows,
3146 3147
        pQuery->limit.offset - pQuery->rec.rows);
    
3148 3149
    pQuery->limit.offset -= pQuery->rec.rows;
    pQuery->rec.rows = 0;
3150

3151
    resetCtxOutputBuf(pRuntimeEnv);
3152

H
Haojun Liao 已提交
3153
    // clear the buffer full flag if exists
3154
    CLEAR_QUERY_STATUS(pQuery, QUERY_RESBUF_FULL);
3155
  } else {
3156
    int64_t numOfSkip = pQuery->limit.offset;
3157
    pQuery->rec.rows -= numOfSkip;
3158 3159
    pQuery->limit.offset = 0;
  
3160
    qDebug("QInfo:%p skip row:%"PRId64", new offset:%d, numOfRows remain:%" PRIu64, GET_QINFO_ADDR(pRuntimeEnv), numOfSkip,
3161 3162
           0, pQuery->rec.rows);
    
3163
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3164
      int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3165
      int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes;
3166
      
H
Haojun Liao 已提交
3167 3168
      memmove(pQuery->sdata[i]->data, (char*) pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->rec.rows * bytes);
      pRuntimeEnv->pCtx[i].aOutputBuf = ((char*) pQuery->sdata[i]->data) + pQuery->rec.rows * bytes;
3169

3170
      if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) {
3171
        pRuntimeEnv->pCtx[i].ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
3172 3173
      }
    }
3174

3175
    updateNumOfResult(pRuntimeEnv, pQuery->rec.rows);
3176 3177 3178 3179 3180 3181 3182 3183
  }
}

void setQueryStatus(SQuery *pQuery, int8_t status) {
  if (status == QUERY_NOT_COMPLETED) {
    pQuery->status = status;
  } else {
    // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first
3184
    CLEAR_QUERY_STATUS(pQuery, QUERY_NOT_COMPLETED);
3185 3186 3187 3188 3189 3190
    pQuery->status |= status;
  }
}

bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3191

H
hjxilinx 已提交
3192
  bool toContinue = false;
H
Haojun Liao 已提交
3193
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3194 3195
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
3196

3197 3198 3199 3200 3201
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *pResult = getWindowResult(pWindowResInfo, i);
      if (!pResult->status.closed) {
        continue;
      }
3202

3203
      setWindowResOutputBuf(pRuntimeEnv, pResult);
3204

3205
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3206
        int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3207 3208 3209
        if (functId == TSDB_FUNC_TS) {
          continue;
        }
3210

3211 3212
        aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
        SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3213

3214 3215 3216 3217
        toContinue |= (!pResInfo->complete);
      }
    }
  } else {
3218
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3219
      int16_t functId = pQuery->pSelectExpr[j].base.functionId;
3220 3221 3222
      if (functId == TSDB_FUNC_TS) {
        continue;
      }
3223

3224 3225
      aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]);
      SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]);
3226

3227 3228 3229
      toContinue |= (!pResInfo->complete);
    }
  }
3230

3231 3232 3233
  return toContinue;
}

H
Haojun Liao 已提交
3234
static SQueryStatusInfo getQueryStatusInfo(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
3235
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3236 3237
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
H
Haojun Liao 已提交
3238 3239 3240
  assert((start <= pTableQueryInfo->lastKey && QUERY_IS_ASC_QUERY(pQuery)) ||
      (start >= pTableQueryInfo->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
  
3241
  SQueryStatusInfo info = {
H
hjxilinx 已提交
3242
      .status      = pQuery->status,
3243
      .windowIndex = pRuntimeEnv->windowResInfo.curIndex,
H
Haojun Liao 已提交
3244
      .lastKey     = start,
H
hjxilinx 已提交
3245
      .w           = pQuery->window,
H
Haojun Liao 已提交
3246
      .curWindow   = {.skey = start, .ekey = pTableQueryInfo->win.ekey},
3247
  };
3248

3249 3250 3251
  return info;
}

3252 3253 3254 3255
static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
  SQuery *pQuery = pRuntimeEnv->pQuery;

3256 3257 3258 3259 3260
  pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf);  // save the cursor
  if (pRuntimeEnv->pTSBuf) {
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
    tsBufNextPos(pRuntimeEnv->pTSBuf);
  }
3261

3262
  // reverse order time range
3263 3264 3265
  pQuery->window = pStatus->curWindow;
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);

3266
  SWITCH_ORDER(pQuery->order.order);
3267 3268 3269 3270 3271 3272 3273

  if (QUERY_IS_ASC_QUERY(pQuery)) {
    assert(pQuery->window.skey <= pQuery->window.ekey);
  } else {
    assert(pQuery->window.skey >= pQuery->window.ekey);
  }

3274
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
3275

3276
  STsdbQueryCond cond = {
3277
      .twindow = pQuery->window,
H
hjxilinx 已提交
3278
      .order   = pQuery->order.order,
3279
      .colList = pQuery->colList,
3280 3281
      .numOfCols = pQuery->numOfCols,
  };
3282

H
Haojun Liao 已提交
3283 3284 3285 3286 3287
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

3288 3289 3290 3291
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
3292

3293
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3294 3295 3296
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
3297 3298
}

3299 3300
static void clearEnvAfterReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusInfo *pStatus) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3301
  STableQueryInfo* pTableQueryInfo = pQuery->current;
3302

3303 3304
  SWITCH_ORDER(pQuery->order.order);
  switchCtxOrder(pRuntimeEnv);
3305

3306 3307 3308 3309
  tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur);
  if (pRuntimeEnv->pTSBuf) {
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
  }
3310

3311
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
3312

3313
  // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during reverse scan
H
hjxilinx 已提交
3314
  pTableQueryInfo->lastKey = pStatus->lastKey;
3315
  pQuery->status = pStatus->status;
3316
  
H
hjxilinx 已提交
3317
  pTableQueryInfo->win = pStatus->w;
3318
  pQuery->window = pTableQueryInfo->win;
3319 3320
}

H
Haojun Liao 已提交
3321 3322 3323 3324 3325 3326 3327
static void restoreTimeWindow(STableGroupInfo* pTableGroupInfo, STsdbQueryCond* pCond) {
  assert(pTableGroupInfo->numOfTables == 1);
  SArray* pTableKeyGroup = taosArrayGetP(pTableGroupInfo->pGroupList, 0);
  STableKeyInfo* pKeyInfo = taosArrayGet(pTableKeyGroup, 0);
  pKeyInfo->lastKey = pCond->twindow.skey;
}

3328
void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
H
hjxilinx 已提交
3329
  SQInfo *pQInfo = (SQInfo *) GET_QINFO_ADDR(pRuntimeEnv);
3330
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3331 3332
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3333
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
3334

3335
  // store the start query position
H
Haojun Liao 已提交
3336
  SQueryStatusInfo qstatus = getQueryStatusInfo(pRuntimeEnv, start);
3337

3338 3339
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3340

3341 3342
  while (1) {
    doScanAllDataBlocks(pRuntimeEnv);
3343

3344 3345
    if (pRuntimeEnv->scanFlag == MASTER_SCAN) {
      qstatus.status = pQuery->status;
3346 3347 3348 3349 3350 3351

      // do nothing if no data blocks are found qualified during scan
      if (qstatus.lastKey != pTableQueryInfo->lastKey) {
        qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step;
      }

3352
      qstatus.lastKey = pTableQueryInfo->lastKey;
3353
    }
3354

3355
    if (!needScanDataBlocksAgain(pRuntimeEnv)) {
3356
      // restore the status code and jump out of loop
3357
      if (pRuntimeEnv->scanFlag == REPEAT_SCAN) {
3358
        pQuery->status = qstatus.status;
3359
      }
3360

3361 3362
      break;
    }
3363

3364
    STsdbQueryCond cond = {
3365
        .twindow = qstatus.curWindow,
H
hjxilinx 已提交
3366
        .order   = pQuery->order.order,
3367
        .colList = pQuery->colList,
3368
        .numOfCols = pQuery->numOfCols,
3369
    };
3370

3371 3372
    if (pRuntimeEnv->pSecQueryHandle != NULL) {
      tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
3373
    }
3374

H
Haojun Liao 已提交
3375
    restoreTimeWindow(&pQInfo->tableGroupInfo, &cond);
3376
    pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
3377 3378 3379
    if (pRuntimeEnv->pSecQueryHandle == NULL) {
      longjmp(pRuntimeEnv->env, terrno);
    }
3380

3381
    pRuntimeEnv->windowResInfo.curIndex = qstatus.windowIndex;
3382 3383
    setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
    pRuntimeEnv->scanFlag = REPEAT_SCAN;
3384
    
3385
    qDebug("QInfo:%p start to repeat scan data blocks due to query func required, qrange:%"PRId64"-%"PRId64, pQInfo,
3386
        cond.twindow.skey, cond.twindow.ekey);
3387

3388
    // check if query is killed or not
H
Haojun Liao 已提交
3389
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
3390 3391
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
3392 3393
    }
  }
3394

H
hjxilinx 已提交
3395
  if (!needReverseScan(pQuery)) {
3396 3397
    return;
  }
3398

3399
  setEnvBeforeReverseScan(pRuntimeEnv, &qstatus);
3400

3401
  // reverse scan from current position
3402
  qDebug("QInfo:%p start to reverse scan", pQInfo);
3403
  doScanAllDataBlocks(pRuntimeEnv);
3404 3405

  clearEnvAfterReverseScan(pRuntimeEnv, &qstatus);
3406 3407
}

H
hjxilinx 已提交
3408
void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) {
3409
  SQuery *pQuery = pRuntimeEnv->pQuery;
3410

H
Haojun Liao 已提交
3411
  if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
3412 3413
    // for each group result, call the finalize function for each column
    SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
Haojun Liao 已提交
3414
    if (pRuntimeEnv->groupbyNormalCol) {
3415 3416
      closeAllTimeWindow(pWindowResInfo);
    }
3417

3418 3419 3420 3421 3422
    for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
      SWindowResult *buf = &pWindowResInfo->pResult[i];
      if (!isWindowResClosed(pWindowResInfo, i)) {
        continue;
      }
3423

3424
      setWindowResOutputBuf(pRuntimeEnv, buf);
3425

3426
      for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3427
        aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3428
      }
3429

3430 3431 3432 3433 3434 3435
      /*
       * set the number of output results for group by normal columns, the number of output rows usually is 1 except
       * the top and bottom query
       */
      buf->numOfRows = getNumOfResult(pRuntimeEnv);
    }
3436

3437
  } else {
3438
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3439
      aAggs[pQuery->pSelectExpr[j].base.functionId].xFinalize(&pRuntimeEnv->pCtx[j]);
3440 3441 3442 3443 3444
    }
  }
}

static bool hasMainOutput(SQuery *pQuery) {
3445
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3446
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3447

3448 3449 3450 3451
    if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) {
      return true;
    }
  }
3452

3453 3454 3455
  return false;
}

H
Haojun Liao 已提交
3456
static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pTable, STimeWindow win, void* buf) {
H
Haojun Liao 已提交
3457
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
3458

H
Haojun Liao 已提交
3459
  STableQueryInfo *pTableQueryInfo = buf;
3460

H
hjxilinx 已提交
3461 3462
  pTableQueryInfo->win = win;
  pTableQueryInfo->lastKey = win.skey;
3463

3464
  pTableQueryInfo->pTable = pTable;
3465
  pTableQueryInfo->cur.vgroupIndex = -1;
3466

H
Haojun Liao 已提交
3467 3468
  // set more initial size of interval/groupby query
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
3469
    int32_t initialSize = 16;
H
Haojun Liao 已提交
3470
    int32_t initialThreshold = 100;
B
Bomin Zhang 已提交
3471 3472 3473 3474
    int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, initialSize, initialThreshold, TSDB_DATA_TYPE_INT);
    if (code != TSDB_CODE_SUCCESS) {
      return NULL;
    }
H
Haojun Liao 已提交
3475
  } else { // in other aggregate query, do not initialize the windowResInfo
H
Haojun Liao 已提交
3476 3477
  }

3478 3479 3480
  return pTableQueryInfo;
}

H
Haojun Liao 已提交
3481
void destroyTableQueryInfo(STableQueryInfo *pTableQueryInfo) {
3482 3483 3484
  if (pTableQueryInfo == NULL) {
    return;
  }
3485

H
Haojun Liao 已提交
3486
  cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo);
3487 3488
}

H
Haojun Liao 已提交
3489 3490 3491 3492
#define CHECK_QUERY_TIME_RANGE(_q, _tableInfo)                                              \
  do {                                                                                      \
    assert((((_tableInfo)->lastKey >= (_tableInfo)->win.skey) && QUERY_IS_ASC_QUERY(_q)) || \
           (((_tableInfo)->lastKey <= (_tableInfo)->win.skey) && !QUERY_IS_ASC_QUERY(_q))); \
H
Haojun Liao 已提交
3493
  } while (0)
3494 3495 3496 3497

/**
 * set output buffer for different group
 * @param pRuntimeEnv
3498
 * @param pDataBlockInfo
3499
 */
H
Haojun Liao 已提交
3500
void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) {
3501
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
3502 3503 3504
  STableQueryInfo  *pTableQueryInfo = pRuntimeEnv->pQuery->current;
  SWindowResInfo   *pWindowResInfo = &pRuntimeEnv->windowResInfo;

H
Haojun Liao 已提交
3505 3506
  // lastKey needs to be updated
  pTableQueryInfo->lastKey = nextKey;
H
Haojun Liao 已提交
3507 3508 3509 3510

  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
  }
H
Haojun Liao 已提交
3511

H
Haojun Liao 已提交
3512 3513 3514
  if (pRuntimeEnv->prevGroupId != INT32_MIN && pRuntimeEnv->prevGroupId == groupIndex) {
    return;
  }
3515

3516 3517
  SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIndex,
      sizeof(groupIndex), true);
3518 3519 3520
  if (pWindowRes == NULL) {
    return;
  }
3521

3522 3523 3524 3525 3526
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one
   */
  if (pWindowRes->pos.pageId == -1) {
3527
    if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, groupIndex, pRuntimeEnv->numOfRowsPerPage) !=
3528 3529 3530 3531
        TSDB_CODE_SUCCESS) {
      return;
    }
  }
3532

H
Haojun Liao 已提交
3533 3534
  // record the current active group id
  pRuntimeEnv->prevGroupId = groupIndex;
3535 3536 3537 3538
  setWindowResOutputBuf(pRuntimeEnv, pWindowRes);
  initCtxOutputBuf(pRuntimeEnv);
}

H
Haojun Liao 已提交
3539
void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
3540
  SQuery *pQuery = pRuntimeEnv->pQuery;
3541

3542
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3543 3544
  tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

3545
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
3546
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];
H
Haojun Liao 已提交
3547
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page);
3548

3549
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3550 3551 3552
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3553

3554 3555 3556 3557 3558
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo = &pResult->resultInfo[i];
3559

3560 3561 3562 3563 3564 3565
    // set super table query flag
    SResultInfo *pResInfo = GET_RES_INFO(pCtx);
    pResInfo->superTableQ = pRuntimeEnv->stableQuery;
  }
}

H
Haojun Liao 已提交
3566 3567
void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
3568

H
Haojun Liao 已提交
3569
  // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group
H
Haojun Liao 已提交
3570 3571
  tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId);

H
Haojun Liao 已提交
3572 3573 3574 3575
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
    SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i];

    pCtx->resultInfo = &pResult->resultInfo[i];
H
Haojun Liao 已提交
3576
    if (pCtx->resultInfo->initialized && pCtx->resultInfo->complete) {
H
Haojun Liao 已提交
3577 3578
      continue;
    }
3579

H
Haojun Liao 已提交
3580
    pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage);
H
Haojun Liao 已提交
3581
    pCtx->currentStage = 0;
3582

H
Haojun Liao 已提交
3583 3584 3585 3586
    int32_t functionId = pCtx->functionId;
    if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) {
      pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf;
    }
3587

H
Haojun Liao 已提交
3588 3589 3590 3591 3592
    /*
     * set the output buffer information and intermediate buffer
     * not all queries require the interResultBuf, such as COUNT
     */
    pCtx->resultInfo->superTableQ = pRuntimeEnv->stableQuery;     // set super table query flag
3593

H
Haojun Liao 已提交
3594 3595 3596 3597 3598 3599
    if (!pCtx->resultInfo->initialized) {
      aAggs[functionId].init(pCtx);
    }
  }
}

3600
int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQueryInfo) {
3601
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3602

3603
  setTagVal(pRuntimeEnv, pTable, pQInfo->tsdb);
3604

3605 3606
  // both the master and supplement scan needs to set the correct ts comp start position
  if (pRuntimeEnv->pTSBuf != NULL) {
3607
    if (pTableQueryInfo->cur.vgroupIndex == -1) {
3608
      pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key;
3609

3610
      tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag);
3611

3612 3613 3614 3615 3616 3617
      // keep the cursor info of current meter
      pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur;
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur);
    }
  }
3618

3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630
  return 0;
}

/*
 * There are two cases to handle:
 *
 * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey,
 *    pQuery->window.skey, and pQuery->eKey.
 * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be
 *    merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there
 *    is a previous result generated or not.
 */
H
hjxilinx 已提交
3631
void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) {
3632 3633
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3634 3635
  STableQueryInfo *pTableQueryInfo = pQuery->current;
  
3636 3637 3638
  if (pTableQueryInfo->queryRangeSet) {
    pTableQueryInfo->lastKey = key;
  } else {
3639
    pTableQueryInfo->win.skey = key;
3640
    STimeWindow win = {.skey = key, .ekey = pQuery->window.ekey};
3641

3642 3643 3644 3645 3646
    // for too small query range, no data in this interval.
    if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) ||
        (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) {
      return;
    }
3647

3648 3649 3650 3651 3652 3653
    /**
     * In handling the both ascending and descending order super table query, we need to find the first qualified
     * timestamp of this table, and then set the first qualified start timestamp.
     * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional
     * operations involve.
     */
H
Haojun Liao 已提交
3654
    STimeWindow     w = TSWINDOW_INITIALIZER;
3655
    SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo;
3656

H
Haojun Liao 已提交
3657 3658
    TSKEY sk = MIN(win.skey, win.ekey);
    TSKEY ek = MAX(win.skey, win.ekey);
H
Haojun Liao 已提交
3659
    getAlignQueryTimeWindow(pQuery, win.skey, sk, ek, &w);
3660
    pWindowResInfo->startTime = pTableQueryInfo->win.skey;  // windowSKey may be 0 in case of 1970 timestamp
3661

3662 3663
    if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
      if (!QUERY_IS_ASC_QUERY(pQuery)) {
H
Haojun Liao 已提交
3664
        assert(win.ekey == pQuery->window.ekey);
3665
      }
3666 3667
      
      pWindowResInfo->prevSKey = w.skey;
3668
    }
3669

3670
    pTableQueryInfo->queryRangeSet = 1;
3671
    pTableQueryInfo->lastKey = pTableQueryInfo->win.skey;
3672 3673 3674 3675
  }
}

bool requireTimestamp(SQuery *pQuery) {
3676
  for (int32_t i = 0; i < pQuery->numOfOutput; i++) {
3677
    int32_t functionId = pQuery->pSelectExpr[i].base.functionId;
3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690
    if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) {
      return true;
    }
  }
  return false;
}

bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) {
  /*
   * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position
   * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases;
   */
  STimeWindow *w = &pDataBlockInfo->window;
H
hjxilinx 已提交
3691 3692 3693
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
  bool loadPrimaryTS = (pTableQueryInfo->lastKey >= w->skey && pTableQueryInfo->lastKey <= w->ekey) ||
3694 3695
                       (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery);

3696 3697 3698
  return loadPrimaryTS;
}

3699
static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) {
3700 3701
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
3702

3703 3704 3705
  int32_t numOfResult = 0;
  int32_t startIdx = 0;
  int32_t step = -1;
3706

3707
  qDebug("QInfo:%p start to copy data from windowResInfo to query buf", pQInfo);
3708 3709
  int32_t totalSet = numOfClosedTimeWindow(pResultInfo);
  SWindowResult* result = pResultInfo->pResult;
3710

3711
  if (orderType == TSDB_ORDER_ASC) {
3712
    startIdx = pQInfo->groupIndex;
3713 3714
    step = 1;
  } else {  // desc order copy all data
3715
    startIdx = totalSet - pQInfo->groupIndex - 1;
3716 3717
    step = -1;
  }
3718

3719
  for (int32_t i = startIdx; (i < totalSet) && (i >= 0); i += step) {
3720 3721
    if (result[i].numOfRows == 0) {
      pQInfo->offset = 0;
3722
      pQInfo->groupIndex += 1;
3723 3724
      continue;
    }
3725

dengyihao's avatar
dengyihao 已提交
3726
    assert(pQInfo->offset <= 1);
3727

3728 3729
    int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset;
    int32_t oldOffset = pQInfo->offset;
3730

3731 3732 3733 3734
    /*
     * current output space is not enough to keep all the result data of this group, only copy partial results
     * to SQuery object's result buffer
     */
3735 3736 3737 3738 3739
    if (numOfRowsToCopy > pQuery->rec.capacity - numOfResult) {
      numOfRowsToCopy = pQuery->rec.capacity - numOfResult;
      pQInfo->offset += numOfRowsToCopy;
    } else {
      pQInfo->offset = 0;
3740
      pQInfo->groupIndex += 1;
3741
    }
3742

H
Haojun Liao 已提交
3743 3744
    tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId);

3745
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
3746
      int32_t size = pRuntimeEnv->pCtx[j].outputBytes;
3747

3748
      char *out = pQuery->sdata[j]->data + numOfResult * size;
H
Haojun Liao 已提交
3749
      char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page);
3750 3751
      memcpy(out, in + oldOffset * size, size * numOfRowsToCopy);
    }
3752

3753
    numOfResult += numOfRowsToCopy;
3754 3755 3756
    if (numOfResult == pQuery->rec.capacity) {
      break;
    }
3757
  }
3758

3759
  qDebug("QInfo:%p copy data to query buf completed", pQInfo);
3760 3761

#ifdef _DEBUG_VIEW
3762
  displayInterResult(pQuery->sdata, pRuntimeEnv, numOfResult);
3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775
#endif
  return numOfResult;
}

/**
 * copyFromWindowResToSData support copy data in ascending/descending order
 * For interval query of both super table and table, copy the data in ascending order, since the output results are
 * ordered in SWindowResutl already. While handling the group by query for both table and super table,
 * all group result are completed already.
 *
 * @param pQInfo
 * @param result
 */
3776
void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) {
3777
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
3778

3779
  int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC;
3780
  int32_t numOfResult = doCopyToSData(pQInfo, pResultInfo, orderType);
3781

3782
  pQuery->rec.rows += numOfResult;
3783

3784
  assert(pQuery->rec.rows <= pQuery->rec.capacity);
3785 3786
}

H
Haojun Liao 已提交
3787
static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv) {
3788
  SQuery *pQuery = pRuntimeEnv->pQuery;
3789

3790
  // update the number of result for each, only update the number of rows for the corresponding window result.
H
Haojun Liao 已提交
3791 3792 3793
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
    return;
  }
3794

H
Haojun Liao 已提交
3795 3796
  for (int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) {
    SWindowResult *pResult = &pRuntimeEnv->windowResInfo.pResult[i];
3797

H
Haojun Liao 已提交
3798 3799 3800 3801
    for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
      int32_t functionId = pRuntimeEnv->pCtx[j].functionId;
      if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) {
        continue;
3802
      }
H
Haojun Liao 已提交
3803 3804

      pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
3805 3806 3807 3808
    }
  }
}

H
Haojun Liao 已提交
3809
static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis,
3810
    SArray *pDataBlock, __block_search_fn_t searchFn) {
3811
  SQuery *         pQuery = pRuntimeEnv->pQuery;
3812 3813
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3814
  SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo;
H
hjxilinx 已提交
3815
  pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1;
3816

H
Haojun Liao 已提交
3817
  if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) {
3818
    rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock);
3819
  } else {
3820
    blockwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock);
3821 3822 3823
  }
}

3824 3825 3826
bool queryHasRemainResults(SQueryRuntimeEnv* pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
  SFillInfo *pFillInfo = pRuntimeEnv->pFillInfo;
3827

H
Haojun Liao 已提交
3828
  if (pQuery->limit.limit > 0 && pQuery->rec.total >= pQuery->limit.limit) {
3829 3830
    return false;
  }
3831

3832
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
H
Haojun Liao 已提交
3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856
    // There are results not returned to client yet, so filling operation applied to the remain result is required
    // in the first place.
    int32_t remain = taosNumOfRemainRows(pFillInfo);
    if (remain > 0) {
      return true;
    }

    /*
     * While the code reaches here, there are no results remains now.
     * If query is not completed yet, the gaps between two results blocks need to be handled after next data block
     * is retrieved from TSDB.
     *
     * NOTE: If the result set is not the first block, the gap in front of the result set will be filled. If the result
     * set is the FIRST result block, the gap between the start time of query time window and the timestamp of the
     * first result row in the actual result set will fill nothing.
     */
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      int32_t numOfTotal = getFilledNumOfRes(pFillInfo, pQuery->window.ekey, pQuery->rec.capacity);
      return numOfTotal > 0;
    }

  } else {
    // there are results waiting for returned to client.
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED) &&
H
Haojun Liao 已提交
3857
        (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) &&
H
Haojun Liao 已提交
3858 3859 3860
        (pRuntimeEnv->windowResInfo.size > 0)) {
      return true;
    }
3861
  }
3862 3863

  return false;
3864 3865 3866
}

static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) {
3867
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
3868

3869 3870
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
    int32_t bytes = pQuery->pSelectExpr[col].bytes;
3871

3872 3873 3874
    memmove(data, pQuery->sdata[col]->data, bytes * numOfRows);
    data += bytes * numOfRows;
  }
3875

weixin_48148422's avatar
weixin_48148422 已提交
3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887
  int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo);
  *(int32_t*)data = htonl(numOfTables);
  data += sizeof(int32_t);
  for(int32_t i = 0; i < numOfTables; i++) {
    STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i);
    STableIdInfo* pDst = (STableIdInfo*)data;
    pDst->uid = htobe64(pSrc->uid);
    pDst->tid = htonl(pSrc->tid);
    pDst->key = htobe64(pSrc->key);
    data += sizeof(STableIdInfo);
  }

H
hjxilinx 已提交
3888 3889
  // all data returned, set query over
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
3890
    if (pQInfo->runtimeEnv.stableQuery) {
3891
      if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
3892 3893 3894
        setQueryStatus(pQuery, QUERY_OVER);
      }
    } else {
3895 3896 3897
      if (!queryHasRemainResults(&pQInfo->runtimeEnv)) {
        setQueryStatus(pQuery, QUERY_OVER);
      }
3898
    }
H
hjxilinx 已提交
3899
  }
3900 3901
}

H
Haojun Liao 已提交
3902
int32_t doFillGapsInResults(SQueryRuntimeEnv* pRuntimeEnv, tFilePage **pDst, int32_t *numOfFilled) {
3903
  SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv);
3904
  SQuery *pQuery = pRuntimeEnv->pQuery;
3905 3906
  SFillInfo* pFillInfo = pRuntimeEnv->pFillInfo;
  
3907
  while (1) {
3908
    int32_t ret = taosGenerateDataBlock(pFillInfo, (tFilePage**) pQuery->sdata, pQuery->rec.capacity);
3909
    
3910
    // todo apply limit output function
3911 3912
    /* reached the start position of according to offset value, return immediately */
    if (pQuery->limit.offset == 0) {
3913
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows", pQInfo, pFillInfo->numOfRows, ret);
3914 3915
      return ret;
    }
3916

3917
    if (pQuery->limit.offset < ret) {
3918
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, remain:%" PRId64 ", new offset:%d",
3919 3920
             pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, ret - pQuery->limit.offset, 0);
      
3921 3922 3923
      ret -= pQuery->limit.offset;
      // todo !!!!there exactly number of interpo is not valid.
      // todo refactor move to the beginning of buffer
3924 3925 3926
      for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
        memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].bytes * pQuery->limit.offset,
                ret * pQuery->pSelectExpr[i].bytes);
3927
      }
3928
      
3929 3930 3931
      pQuery->limit.offset = 0;
      return ret;
    } else {
3932
      qDebug("QInfo:%p initial numOfRows:%d, generate filled result:%d rows, offset:%" PRId64 ". Discard due to offset, "
B
Bomin Zhang 已提交
3933
             "remain:%d, new offset:%" PRId64, pQInfo, pFillInfo->numOfRows, ret, pQuery->limit.offset, 0,
3934 3935
          pQuery->limit.offset - ret);
      
3936
      pQuery->limit.offset -= ret;
3937
      pQuery->rec.rows = 0;
3938 3939
      ret = 0;
    }
3940 3941

    if (!queryHasRemainResults(pRuntimeEnv)) {
3942 3943 3944 3945 3946
      return ret;
    }
  }
}

3947
static void queryCostStatis(SQInfo *pQInfo) {
3948
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
3949
  SQueryCostInfo *pSummary = &pRuntimeEnv->summary;
H
Haojun Liao 已提交
3950 3951

  qDebug("QInfo:%p :cost summary: elapsed time:%"PRId64" us, total blocks:%d, load block statis:%d,"
H
Haojun Liao 已提交
3952
         " load data block:%d, total rows:%"PRId64 ", check rows:%"PRId64,
H
Haojun Liao 已提交
3953
         pQInfo, pSummary->elapsedTime, pSummary->totalBlocks, pSummary->loadBlockStatis,
H
Haojun Liao 已提交
3954
         pSummary->loadBlocks, pSummary->totalRows, pSummary->totalCheckedRows);
3955 3956
}

3957 3958
static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) {
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
hjxilinx 已提交
3959 3960
  STableQueryInfo* pTableQueryInfo = pQuery->current;
  
3961
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
3962

3963
  if (pQuery->limit.offset == pBlockInfo->rows) {  // current block will ignore completed
H
hjxilinx 已提交
3964
    pTableQueryInfo->lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlockInfo->window.ekey + step : pBlockInfo->window.skey + step;
3965 3966 3967
    pQuery->limit.offset = 0;
    return;
  }
3968

3969 3970 3971 3972 3973
  if (QUERY_IS_ASC_QUERY(pQuery)) {
    pQuery->pos = pQuery->limit.offset;
  } else {
    pQuery->pos = pBlockInfo->rows - pQuery->limit.offset - 1;
  }
3974

3975
  assert(pQuery->pos >= 0 && pQuery->pos <= pBlockInfo->rows - 1);
3976

3977
  SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
3978
  SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);
3979

3980
  // update the pQuery->limit.offset value, and pQuery->pos value
H
Haojun Liao 已提交
3981
  TSKEY *keys = (TSKEY *) pColInfoData->pData;
3982 3983

  // update the offset value
H
hjxilinx 已提交
3984
  pTableQueryInfo->lastKey = keys[pQuery->pos];
3985
  pQuery->limit.offset = 0;
3986

H
hjxilinx 已提交
3987
  int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, pBlockInfo, NULL, binarySearchForKey, pDataBlock);
3988

3989
  qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64, GET_QINFO_ADDR(pRuntimeEnv),
3990
         pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey);
3991
}
3992

3993 3994 3995 3996 3997
void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) {
  SQuery *pQuery = pRuntimeEnv->pQuery;

  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0) {
    return;
3998
  }
3999

4000 4001 4002
  pQuery->pos = 0;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

H
hjxilinx 已提交
4003
  STableQueryInfo* pTableQueryInfo = pQuery->current;
4004
  TsdbQueryHandleT pQueryHandle = pRuntimeEnv->pQueryHandle;
4005

H
Haojun Liao 已提交
4006
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
B
Bomin Zhang 已提交
4007 4008 4009 4010 4011 4012 4013 4014
  while (true) {
    if (!tsdbNextDataBlock(pQueryHandle)) {
      if (terrno != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, terrno);
      }
      break;
    }

H
Haojun Liao 已提交
4015
    if (IS_QUERY_KILLED(GET_QINFO_ADDR(pRuntimeEnv))) {
H
Haojun Liao 已提交
4016 4017
      finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4018
    }
4019

H
Haojun Liao 已提交
4020
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
4021

4022 4023
    if (pQuery->limit.offset > blockInfo.rows) {
      pQuery->limit.offset -= blockInfo.rows;
H
hjxilinx 已提交
4024 4025
      pTableQueryInfo->lastKey = (QUERY_IS_ASC_QUERY(pQuery)) ? blockInfo.window.ekey : blockInfo.window.skey;
      pTableQueryInfo->lastKey += step;
4026

4027
      qDebug("QInfo:%p skip rows:%d, offset:%" PRId64, GET_QINFO_ADDR(pRuntimeEnv), blockInfo.rows,
4028 4029
             pQuery->limit.offset);
    } else {  // find the appropriated start position in current block
4030 4031 4032
      updateOffsetVal(pRuntimeEnv, &blockInfo);
      break;
    }
4033
  }
4034
}
4035

H
Haojun Liao 已提交
4036
static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) {
4037
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4038
  *start = pQuery->current->lastKey;
4039

4040
  // if queried with value filter, do NOT forward query start position
4041
  if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) {
4042
    return true;
4043
  }
4044

4045 4046 4047 4048 4049
  /*
   * 1. for interval without interpolation query we forward pQuery->intervalTime at a time for
   *    pQuery->limit.offset times. Since hole exists, pQuery->intervalTime*pQuery->limit.offset value is
   *    not valid. otherwise, we only forward pQuery->limit.offset number of points
   */
4050
  assert(pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL);
4051

H
Haojun Liao 已提交
4052
  STimeWindow w = TSWINDOW_INITIALIZER;
4053
  
4054
  SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
H
hjxilinx 已提交
4055
  STableQueryInfo *pTableQueryInfo = pQuery->current;
4056

H
Haojun Liao 已提交
4057
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
B
Bomin Zhang 已提交
4058 4059 4060 4061 4062 4063 4064 4065
  while (true) {
    if (!tsdbNextDataBlock(pRuntimeEnv->pQueryHandle)) {
      if (terrno != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, terrno);
      }
      break;
    }

H
Haojun Liao 已提交
4066
    tsdbRetrieveDataBlockInfo(pRuntimeEnv->pQueryHandle, &blockInfo);
4067

H
Haojun Liao 已提交
4068 4069
    if (QUERY_IS_ASC_QUERY(pQuery)) {
      if (pWindowResInfo->prevSKey == TSKEY_INITIAL_VAL) {
H
Haojun Liao 已提交
4070
        getAlignQueryTimeWindow(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &w);
H
Haojun Liao 已提交
4071 4072 4073
        pWindowResInfo->startTime = w.skey;
        pWindowResInfo->prevSKey = w.skey;
      }
4074
    } else {
H
Haojun Liao 已提交
4075
      getAlignQueryTimeWindow(pQuery, blockInfo.window.ekey, pQuery->window.ekey, blockInfo.window.ekey, &w);
4076

4077 4078 4079
      pWindowResInfo->startTime = pQuery->window.skey;
      pWindowResInfo->prevSKey = w.skey;
    }
4080

4081 4082
    // the first time window
    STimeWindow win = getActiveTimeWindow(pWindowResInfo, pWindowResInfo->prevSKey, pQuery);
4083

4084 4085 4086 4087 4088 4089
    while (pQuery->limit.offset > 0) {
      if ((win.ekey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (win.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        pQuery->limit.offset -= 1;
        pWindowResInfo->prevSKey = win.skey;
      }
4090

4091
      STimeWindow tw = win;
H
Haojun Liao 已提交
4092
      GET_NEXT_TIMEWINDOW(pQuery, &tw);
4093

4094
      if (pQuery->limit.offset == 0) {
4095 4096
        if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
            (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
H
Haojun Liao 已提交
4097 4098
          // load the data block and check data remaining in current data block
          // TODO optimize performance
4099 4100 4101
          SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
          SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

4102 4103
          tw = win;
          int32_t startPos =
H
Haojun Liao 已提交
4104
              getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4105 4106 4107 4108
          assert(startPos >= 0);

          // set the abort info
          pQuery->pos = startPos;
H
Haojun Liao 已提交
4109 4110 4111 4112 4113 4114
          
          // reset the query start timestamp
          pTableQueryInfo->win.skey = ((TSKEY *)pColInfoData->pData)[startPos];
          pQuery->window.skey = pTableQueryInfo->win.skey;
          *start = pTableQueryInfo->win.skey;
          
4115
          pWindowResInfo->prevSKey = tw.skey;
H
Haojun Liao 已提交
4116 4117
          int32_t index = pRuntimeEnv->windowResInfo.curIndex;
          
H
hjxilinx 已提交
4118
          int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, NULL, binarySearchForKey, pDataBlock);
H
Haojun Liao 已提交
4119 4120
          pRuntimeEnv->windowResInfo.curIndex = index;  // restore the window index
          
4121
          qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%"PRId64,
4122 4123
                 GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, pQuery->current->lastKey);
          
4124
          return true;
H
Haojun Liao 已提交
4125 4126 4127 4128
        } else { // do nothing
          *start = tw.skey;
          pQuery->window.skey = tw.skey;
          pWindowResInfo->prevSKey = tw.skey;
4129
          return true;
4130 4131 4132
        }
      }

H
Haojun Liao 已提交
4133 4134 4135 4136 4137 4138 4139
      /*
       * If the next time window still starts from current data block,
       * load the primary timestamp column first, and then find the start position for the next queried time window.
       * Note that only the primary timestamp column is required.
       * TODO: Optimize for this cases. All data blocks are not needed to be loaded, only if the first actually required
       * time window resides in current data block.
       */
4140 4141 4142 4143 4144 4145 4146
      if ((tw.skey <= blockInfo.window.ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
          (tw.ekey >= blockInfo.window.skey && !QUERY_IS_ASC_QUERY(pQuery))) {
        SArray *         pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL);
        SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock, 0);

        tw = win;
        int32_t startPos =
H
Haojun Liao 已提交
4147
            getNextQualifiedWindow(pRuntimeEnv, &tw, &blockInfo, pColInfoData->pData, binarySearchForKey, -1);
4148 4149 4150 4151
        assert(startPos >= 0);

        // set the abort info
        pQuery->pos = startPos;
H
hjxilinx 已提交
4152
        pTableQueryInfo->lastKey = ((TSKEY *)pColInfoData->pData)[startPos];
4153 4154
        pWindowResInfo->prevSKey = tw.skey;
        win = tw;
4155
      } else {
H
Haojun Liao 已提交
4156
        break;  // offset is not 0, and next time window begins or ends in the next block.
4157 4158 4159
      }
    }
  }
4160

4161 4162 4163
  return true;
}

B
Bomin Zhang 已提交
4164
static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) {
B
Bomin Zhang 已提交
4165
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4166 4167
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

B
Bomin Zhang 已提交
4168
  if (onlyQueryTags(pQuery)) {
B
Bomin Zhang 已提交
4169
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4170 4171
  }

H
Haojun Liao 已提交
4172
  if (isSTableQuery && (!QUERY_IS_INTERVAL_QUERY(pQuery)) && (!isFixedOutputQuery(pRuntimeEnv))) {
B
Bomin Zhang 已提交
4173
    return TSDB_CODE_SUCCESS;
B
Bomin Zhang 已提交
4174
  }
4175 4176

  STsdbQueryCond cond = {
B
Bomin Zhang 已提交
4177 4178 4179 4180
    .twindow = pQuery->window,
    .order   = pQuery->order.order,
    .colList = pQuery->colList,
    .numOfCols = pQuery->numOfCols,
4181
  };
weixin_48148422's avatar
weixin_48148422 已提交
4182

B
Bomin Zhang 已提交
4183
  if (!isSTableQuery
4184
    && (pQInfo->tableqinfoGroupInfo.numOfTables == 1)
B
Bomin Zhang 已提交
4185
    && (cond.order == TSDB_ORDER_ASC) 
H
Haojun Liao 已提交
4186
    && (!QUERY_IS_INTERVAL_QUERY(pQuery))
B
Bomin Zhang 已提交
4187
    && (!isGroupbyNormalCol(pQuery->pGroupbyExpr))
H
Haojun Liao 已提交
4188
    && (!isFixedOutputQuery(pRuntimeEnv))
B
Bomin Zhang 已提交
4189
  ) {
H
Haojun Liao 已提交
4190
    SArray* pa = GET_TABLEGROUP(pQInfo, 0);
4191 4192
    STableQueryInfo* pCheckInfo = taosArrayGetP(pa, 0);
    cond.twindow = pCheckInfo->win;
4193
  }
B
Bomin Zhang 已提交
4194

B
Bomin Zhang 已提交
4195
  terrno = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
4196
  if (isFirstLastRowQuery(pQuery)) {
4197
    pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
4198
  } else if (isPointInterpoQuery(pQuery)) {
4199
    pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4200
  } else {
4201
    pRuntimeEnv->pQueryHandle = tsdbQueryTables(tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
H
Haojun Liao 已提交
4202
  }
4203

B
Bomin Zhang 已提交
4204
  return terrno;
B
Bomin Zhang 已提交
4205 4206
}

4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219
static SFillColInfo* taosCreateFillColInfo(SQuery* pQuery) {
  int32_t numOfCols = pQuery->numOfOutput;
  int32_t offset = 0;
  
  SFillColInfo* pFillCol = calloc(numOfCols, sizeof(SFillColInfo));
  for(int32_t i = 0; i < numOfCols; ++i) {
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
    
    pFillCol[i].col.bytes  = pExprInfo->bytes;
    pFillCol[i].col.type   = pExprInfo->type;
    pFillCol[i].col.offset = offset;
    pFillCol[i].flag       = TSDB_COL_NORMAL;    // always be ta normal column for table query
    pFillCol[i].functionId = pExprInfo->base.functionId;
4220
    pFillCol[i].fillVal.i = pQuery->fillVal[i];
4221 4222 4223 4224 4225 4226 4227
    
    offset += pExprInfo->bytes;
  }
  
  return pFillCol;
}

4228
int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bool isSTableQuery) {
4229 4230
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;

4231
  int32_t code = TSDB_CODE_SUCCESS;
4232
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
4233

4234
  pQuery->precision = tsdbGetCfg(tsdb)->precision;
H
Haojun Liao 已提交
4235 4236
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);
4237 4238

  setScanLimitationByResultBuffer(pQuery);
H
Haojun Liao 已提交
4239
  changeExecuteScanOrder(pQInfo, false);
4240

B
Bomin Zhang 已提交
4241 4242 4243 4244
  code = setupQueryHandle(tsdb, pQInfo, isSTableQuery);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }
4245
  
4246
  pQInfo->tsdb = tsdb;
4247
  pQInfo->vgId = vgId;
4248 4249

  pRuntimeEnv->pQuery = pQuery;
H
Haojun Liao 已提交
4250
  pRuntimeEnv->pTSBuf = pTsBuf;
4251
  pRuntimeEnv->cur.vgroupIndex = -1;
4252
  pRuntimeEnv->stableQuery = isSTableQuery;
H
Haojun Liao 已提交
4253
  pRuntimeEnv->prevGroupId = INT32_MIN;
H
Haojun Liao 已提交
4254
  pRuntimeEnv->groupbyNormalCol = isGroupbyNormalCol(pQuery->pGroupbyExpr);
4255

H
Haojun Liao 已提交
4256
  if (pTsBuf != NULL) {
4257 4258 4259 4260 4261 4262 4263 4264 4265 4266
    int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
    tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order);
  }

  // create runtime environment
  code = setupQueryRuntimeEnv(pRuntimeEnv, pQuery->order.order);
  if (code != TSDB_CODE_SUCCESS) {
    return code;
  }

4267 4268 4269
  int32_t ps = DEFAULT_PAGE_SIZE;
  int32_t rowsize = 0;
  getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4270
  int32_t TWOMB = 1024*1024*2;
4271

H
Haojun Liao 已提交
4272
  if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) {
H
Haojun Liao 已提交
4273
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4274 4275 4276 4277
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

H
Haojun Liao 已提交
4278
    if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
4279
      int16_t type = TSDB_DATA_TYPE_NULL;
4280
      int32_t threshold = 0;
4281

H
Haojun Liao 已提交
4282
      if (pRuntimeEnv->groupbyNormalCol) {  // group by columns not tags;
4283
        type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
4284
        threshold = 4000;
4285 4286
      } else {
        type = TSDB_DATA_TYPE_INT;  // group id
4287 4288 4289 4290
        threshold = GET_NUM_OF_TABLEGROUP(pQInfo);
        if (threshold < 8) {
          threshold = 8;
        }
4291 4292
      }

4293
      code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 8, threshold, type);
B
Bomin Zhang 已提交
4294 4295 4296
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
4297
    }
H
Haojun Liao 已提交
4298
  } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) {
4299 4300
    int32_t numOfResultRows = getInitialPageNum(pQInfo);
    getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize);
H
Haojun Liao 已提交
4301
    code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo);
4302 4303 4304 4305 4306
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }

    int16_t type = TSDB_DATA_TYPE_NULL;
H
Haojun Liao 已提交
4307
    if (pRuntimeEnv->groupbyNormalCol) {
4308 4309 4310 4311 4312
      type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr);
    } else {
      type = TSDB_DATA_TYPE_TIMESTAMP;
    }

4313
    code = initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, numOfResultRows, 4096, type);
B
Bomin Zhang 已提交
4314 4315 4316
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
4317 4318
  }

4319
  if (pQuery->fillType != TSDB_FILL_NONE && !isPointInterpoQuery(pQuery)) {
4320
    SFillColInfo* pColInfo = taosCreateFillColInfo(pQuery);
H
Haojun Liao 已提交
4321 4322 4323 4324 4325 4326 4327
    STimeWindow w = TSWINDOW_INITIALIZER;

    TSKEY sk = MIN(pQuery->window.skey, pQuery->window.ekey);
    TSKEY ek = MAX(pQuery->window.skey, pQuery->window.ekey);
    getAlignQueryTimeWindow(pQuery, pQuery->window.skey, sk, ek, &w);

    pRuntimeEnv->pFillInfo = taosInitFillInfo(pQuery->order.order, w.skey, 0, pQuery->rec.capacity, pQuery->numOfOutput,
H
Haojun Liao 已提交
4328 4329
                                              pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision,
                                              pQuery->fillType, pColInfo);
4330
  }
4331

H
Haojun Liao 已提交
4332
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
4333
  return TSDB_CODE_SUCCESS;
4334 4335
}

4336
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
4337
  SQuery *pQuery = pRuntimeEnv->pQuery;
4338

4339
  for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
4340 4341 4342 4343 4344 4345 4346
    SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
    if (pResInfo != NULL) {
      pResInfo->complete = false;
    }
  }
}

H
Haojun Liao 已提交
4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363
static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTableQueryInfo, SDataBlockInfo* pBlockInfo) {
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery* pQuery = pQInfo->runtimeEnv.pQuery;
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

  if (!QUERY_IS_INTERVAL_QUERY(pQuery)) {
    setExecutionContext(pQInfo, pTableQueryInfo->groupIndex, pBlockInfo->window.ekey + step);
  } else {  // interval query
    TSKEY nextKey = pBlockInfo->window.skey;
    setIntervalQueryRange(pQInfo, nextKey);

    if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
      setAdditionalInfo(pQInfo, pTableQueryInfo->pTable, pTableQueryInfo);
    }
  }
}

H
Haojun Liao 已提交
4364
static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) {
4365
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
Haojun Liao 已提交
4366 4367
  SQuery*           pQuery = pRuntimeEnv->pQuery;
  SQueryCostInfo*   summary  = &pRuntimeEnv->summary;
4368
  
H
hjxilinx 已提交
4369
  int64_t st = taosGetTimestampMs();
4370

4371
  TsdbQueryHandleT pQueryHandle = IS_MASTER_SCAN(pRuntimeEnv)? pRuntimeEnv->pQueryHandle : pRuntimeEnv->pSecQueryHandle;
H
Haojun Liao 已提交
4372
  SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER;
H
Haojun Liao 已提交
4373

H
Haojun Liao 已提交
4374 4375
  int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);

B
Bomin Zhang 已提交
4376 4377 4378 4379 4380 4381 4382 4383
  while (true) {
    if (!tsdbNextDataBlock(pQueryHandle)) {
      if (terrno != TSDB_CODE_SUCCESS) {
        longjmp(pRuntimeEnv->env, terrno);
      }
      break;
    }

4384
    summary->totalBlocks += 1;
H
Haojun Liao 已提交
4385
    
H
Haojun Liao 已提交
4386
    if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4387
      longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4388
    }
4389

H
Haojun Liao 已提交
4390
    tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo);
H
Haojun Liao 已提交
4391 4392 4393 4394
    STableQueryInfo **pTableQueryInfo = (STableQueryInfo**) taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid));
    if(pTableQueryInfo == NULL) {
      break;
    }
4395

H
Haojun Liao 已提交
4396 4397
    pQuery->current = *pTableQueryInfo;
    CHECK_QUERY_TIME_RANGE(pQuery, *pTableQueryInfo);
4398

H
Haojun Liao 已提交
4399
    if (!pRuntimeEnv->groupbyNormalCol) {
H
Haojun Liao 已提交
4400
      setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo);
4401
    }
4402

H
Haojun Liao 已提交
4403 4404 4405 4406 4407 4408 4409
    SDataStatis *pStatis = NULL;
    SArray *pDataBlock = NULL;
    if (loadDataBlockOnDemand(pRuntimeEnv, pQueryHandle, &blockInfo, &pStatis, &pDataBlock) == BLK_DATA_DISCARD) {
      pQuery->current->lastKey = QUERY_IS_ASC_QUERY(pQuery)? blockInfo.window.ekey + step:blockInfo.window.skey + step;
      continue;
    }

4410 4411 4412
    summary->totalRows += blockInfo.rows;
    stableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, pDataBlock, binarySearchForKey);
  
4413
    qDebug("QInfo:%p check data block, uid:%"PRId64", tid:%d, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, lastKey:%" PRId64,
4414
           pQInfo, blockInfo.uid, blockInfo.tid, blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, pQuery->current->lastKey);
4415
  }
4416

H
Haojun Liao 已提交
4417 4418
  updateWindowResNumOfRes(pRuntimeEnv);

H
hjxilinx 已提交
4419 4420
  int64_t et = taosGetTimestampMs();
  return et - st;
4421 4422
}

4423 4424
static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4425
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4426

4427
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
H
Haojun Liao 已提交
4428
  SArray *group = GET_TABLEGROUP(pQInfo, 0);
4429
  STableQueryInfo* pCheckInfo = taosArrayGetP(group, index);
4430

H
Haojun Liao 已提交
4431 4432 4433
  if (pRuntimeEnv->hasTagResults || pRuntimeEnv->pTSBuf != NULL) {
    setTagVal(pRuntimeEnv, pCheckInfo->pTable, pQInfo->tsdb);
  }
4434

H
Haojun Liao 已提交
4435
  STableId* id = TSDB_TABLEID(pCheckInfo->pTable);
4436
  qDebug("QInfo:%p query on (%d): uid:%" PRIu64 ", tid:%d, qrange:%" PRId64 "-%" PRId64, pQInfo, index,
H
Haojun Liao 已提交
4437
         id->uid, id->tid, pCheckInfo->lastKey, pCheckInfo->win.ekey);
4438

4439
  STsdbQueryCond cond = {
4440
      .twindow   = {pCheckInfo->lastKey, pCheckInfo->win.ekey},
H
hjxilinx 已提交
4441 4442
      .order     = pQuery->order.order,
      .colList   = pQuery->colList,
4443
      .numOfCols = pQuery->numOfCols,
4444
  };
4445

H
hjxilinx 已提交
4446
  // todo refactor
4447
  SArray *g1 = taosArrayInit(1, POINTER_BYTES);
H
Haojun Liao 已提交
4448 4449 4450 4451
  SArray *tx = taosArrayInit(1, sizeof(STableKeyInfo));

  STableKeyInfo info = {.pTable = pCheckInfo->pTable, .lastKey = pCheckInfo->lastKey};
  taosArrayPush(tx, &info);
4452

4453
  taosArrayPush(g1, &tx);
4454
  STableGroupInfo gp = {.numOfTables = 1, .pGroupList = g1};
4455

4456
  // include only current table
4457 4458 4459 4460
  if (pRuntimeEnv->pQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
    pRuntimeEnv->pQueryHandle = NULL;
  }
4461

H
Haojun Liao 已提交
4462
  pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
4463 4464
  taosArrayDestroy(tx);
  taosArrayDestroy(g1);
B
Bomin Zhang 已提交
4465 4466 4467
  if (pRuntimeEnv->pQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
4468

4469
  if (pRuntimeEnv->pTSBuf != NULL) {
4470
    if (pRuntimeEnv->cur.vgroupIndex == -1) {
4471 4472
      int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
      STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
4473

4474 4475 4476 4477 4478 4479 4480 4481
      // failed to find data with the specified tag value
      if (elem.vnode < 0) {
        return false;
      }
    } else {
      tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
    }
  }
4482

4483
  initCtxOutputBuf(pRuntimeEnv);
4484 4485 4486 4487 4488 4489 4490 4491 4492 4493
  return true;
}

/**
 * super table query handler
 * 1. super table projection query, group-by on normal columns query, ts-comp query
 * 2. point interpolation query, last row query
 *
 * @param pQInfo
 */
4494
static void sequentialTableProcess(SQInfo *pQInfo) {
4495
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
4496
  SQuery *          pQuery = pRuntimeEnv->pQuery;
4497
  setQueryStatus(pQuery, QUERY_COMPLETED);
4498

H
Haojun Liao 已提交
4499
  size_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
4500

H
Haojun Liao 已提交
4501
  if (isPointInterpoQuery(pQuery) || isFirstLastRowQuery(pQuery)) {
4502 4503
    resetCtxOutputBuf(pRuntimeEnv);
    assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
4504

4505
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4506
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4507

4508
      qDebug("QInfo:%p last_row query on group:%d, total group:%zu, current group:%p", pQInfo, pQInfo->groupIndex,
dengyihao's avatar
dengyihao 已提交
4509
             numOfGroups, group);
H
Haojun Liao 已提交
4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);
      
      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }
      
4530
      if (isFirstLastRowQuery(pQuery)) {
H
Haojun Liao 已提交
4531
        pRuntimeEnv->pQueryHandle = tsdbQueryLastRow(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4532
      } else {
H
Haojun Liao 已提交
4533
        pRuntimeEnv->pQueryHandle = tsdbQueryRowsInExternalWindow(pQInfo->tsdb, &cond, &gp, pQInfo);
4534
      }
B
Bomin Zhang 已提交
4535 4536 4537 4538 4539 4540

      taosArrayDestroy(tx);
      taosArrayDestroy(g1);
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
H
Haojun Liao 已提交
4541

H
Haojun Liao 已提交
4542
      initCtxOutputBuf(pRuntimeEnv);
4543
      
4544
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4545 4546
      assert(taosArrayGetSize(s) >= 1);
      
4547
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4548 4549 4550
      if (isFirstLastRowQuery(pQuery)) {
        assert(taosArrayGetSize(s) == 1);
      }
H
Haojun Liao 已提交
4551

dengyihao's avatar
dengyihao 已提交
4552
      taosArrayDestroy(s);
H
Haojun Liao 已提交
4553

H
Haojun Liao 已提交
4554
      // here we simply set the first table as current table
4555 4556 4557
      SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex);
      pQuery->current = taosArrayGetP(first, 0);

4558
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
Haojun Liao 已提交
4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570
      
      int64_t numOfRes = getNumOfResult(pRuntimeEnv);
      if (numOfRes > 0) {
        pQuery->rec.rows += numOfRes;
        forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
      }
      
      skipResults(pRuntimeEnv);
      pQInfo->groupIndex += 1;

      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4571 4572 4573 4574 4575 4576

      if (pQuery->rec.rows >= pQuery->rec.capacity) {
        setQueryStatus(pQuery, QUERY_RESBUF_FULL);
        break;
      }
    }
H
Haojun Liao 已提交
4577
  } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query
4578
    while (pQInfo->groupIndex < numOfGroups) {
H
Haojun Liao 已提交
4579
      SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex);
4580

4581
      qDebug("QInfo:%p group by normal columns group:%d, total group:%zu", pQInfo, pQInfo->groupIndex, numOfGroups);
4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601

      STsdbQueryCond cond = {
          .twindow = pQuery->window,
          .colList = pQuery->colList,
          .order   = pQuery->order.order,
          .numOfCols = pQuery->numOfCols,
      };

      SArray *g1 = taosArrayInit(1, POINTER_BYTES);
      SArray *tx = taosArrayClone(group);
      taosArrayPush(g1, &tx);

      STableGroupInfo gp = {.numOfTables = taosArrayGetSize(tx), .pGroupList = g1};

      // include only current table
      if (pRuntimeEnv->pQueryHandle != NULL) {
        tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle);
        pRuntimeEnv->pQueryHandle = NULL;
      }

H
Haojun Liao 已提交
4602
      // no need to update the lastkey for each table
4603
      pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &gp, pQInfo);
H
Haojun Liao 已提交
4604

B
Bomin Zhang 已提交
4605 4606
      taosArrayDestroy(g1);
      taosArrayDestroy(tx);
B
Bomin Zhang 已提交
4607 4608 4609
      if (pRuntimeEnv->pQueryHandle == NULL) {
        longjmp(pRuntimeEnv->env, terrno);
      }
4610

4611
      SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle);
4612 4613
      assert(taosArrayGetSize(s) >= 1);

4614
      setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb);
4615 4616 4617 4618 4619 4620 4621 4622

      // here we simply set the first table as current table
      scanMultiTableDataBlocks(pQInfo);
      pQInfo->groupIndex += 1;

      SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;

        // no results generated for current group, continue to try the next group
dengyihao's avatar
dengyihao 已提交
4623
      taosArrayDestroy(s); 
4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637
      if (pWindowResInfo->size <= 0) {
        continue;
      }

      for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
        SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
        pStatus->closed = true;  // enable return all results for group by normal columns

        SWindowResult *pResult = &pWindowResInfo->pResult[i];
        for (int32_t j = 0; j < pQuery->numOfOutput; ++j) {
          pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
        }
      }

4638
      qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size,
4639 4640 4641 4642 4643 4644 4645
          pQInfo->groupIndex);
      int32_t currentGroupIndex = pQInfo->groupIndex;

      pQuery->rec.rows = 0;
      pQInfo->groupIndex = 0;

      ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size);
4646
      copyFromWindowResToSData(pQInfo, pWindowResInfo);
4647 4648 4649 4650 4651 4652

      pQInfo->groupIndex = currentGroupIndex;  //restore the group index
      assert(pQuery->rec.rows == pWindowResInfo->size);

      clearClosedTimeWindow(pRuntimeEnv);
      break;
4653 4654 4655
    }
  } else {
    /*
4656
     * 1. super table projection query, 2. ts-comp query
4657 4658 4659
     * if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
     * we need to return it to client in the first place.
     */
4660
    if (pQInfo->groupIndex > 0) {
4661
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4662
      pQuery->rec.total += pQuery->rec.rows;
4663

4664
      if (pQuery->rec.rows > 0) {
4665 4666 4667
        return;
      }
    }
4668

4669
    // all data have returned already
4670
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
4671 4672
      return;
    }
4673

4674 4675
    resetCtxOutputBuf(pRuntimeEnv);
    resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
4676

H
Haojun Liao 已提交
4677
    SArray *group = GET_TABLEGROUP(pQInfo, 0);
4678 4679
    assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables &&
           1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList));
4680

4681
    while (pQInfo->tableIndex < pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4682
      if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4683
        longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4684
      }
4685

4686
      pQuery->current = taosArrayGetP(group, pQInfo->tableIndex);
4687
      if (!multiTableMultioutputHelper(pQInfo, pQInfo->tableIndex)) {
4688
        pQInfo->tableIndex++;
4689 4690
        continue;
      }
4691

H
hjxilinx 已提交
4692
      // TODO handle the limit offset problem
4693
      if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
4694 4695
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
          pQInfo->tableIndex++;
4696 4697 4698
          continue;
        }
      }
4699

4700
      scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
4701
      skipResults(pRuntimeEnv);
4702

4703
      // the limitation of output result is reached, set the query completed
4704
      if (limitResults(pRuntimeEnv)) {
4705
        pQInfo->tableIndex = pQInfo->tableqinfoGroupInfo.numOfTables;
4706 4707
        break;
      }
4708

4709 4710
      // enable execution for next table, when handling the projection query
      enableExecutionForNextTable(pRuntimeEnv);
4711

4712
      if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4713 4714 4715 4716 4717 4718
        /*
         * query range is identical in terms of all meters involved in query,
         * so we need to restore them at the *beginning* of query on each meter,
         * not the consecutive query on meter on which is aborted due to buffer limitation
         * to ensure that, we can reset the query range once query on a meter is completed.
         */
4719
        pQInfo->tableIndex++;
weixin_48148422's avatar
weixin_48148422 已提交
4720

H
Haojun Liao 已提交
4721
        STableIdInfo tidInfo = {0};
4722

H
Haojun Liao 已提交
4723 4724 4725
        STableId* id = TSDB_TABLEID(pQuery->current->pTable);
        tidInfo.uid = id->uid;
        tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
4726
        tidInfo.key = pQuery->current->lastKey;
weixin_48148422's avatar
weixin_48148422 已提交
4727 4728
        taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);

4729
        // if the buffer is full or group by each table, we need to jump out of the loop
H
Haojun Liao 已提交
4730
        if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4731 4732
          break;
        }
4733

4734
      } else {
4735
        // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
4736 4737
        if (pQuery->rec.rows == 0) {
          assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
4738 4739
          continue;
        } else {
4740 4741 4742
          // buffer is full, wait for the next round to retrieve data from current meter
          assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL));
          break;
4743 4744 4745
        }
      }
    }
H
Haojun Liao 已提交
4746

4747
    if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) {
H
Haojun Liao 已提交
4748 4749
      setQueryStatus(pQuery, QUERY_COMPLETED);
    }
4750
  }
4751

4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763
  /*
   * 1. super table projection query, group-by on normal columns query, ts-comp query
   * 2. point interpolation query, last row query
   *
   * group-by on normal columns query and last_row query do NOT invoke the finalizer here,
   * since the finalize stage will be done at the client side.
   *
   * projection query, point interpolation query do not need the finalizer.
   *
   * Only the ts-comp query requires the finalizer function to be executed here.
   */
  if (isTSCompQuery(pQuery)) {
H
hjxilinx 已提交
4764
    finalizeQueryResult(pRuntimeEnv);
4765
  }
4766

4767 4768 4769
  if (pRuntimeEnv->pTSBuf != NULL) {
    pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
  }
4770

4771
  qDebug(
B
Bomin Zhang 已提交
4772
      "QInfo %p numOfTables:%"PRIu64", index:%d, numOfGroups:%zu, %"PRId64" points returned, total:%"PRId64", offset:%" PRId64,
4773
      pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total,
4774
      pQuery->limit.offset);
4775 4776
}

4777 4778 4779 4780
static void doSaveContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4781 4782 4783 4784
  SET_REVERSE_SCAN_FLAG(pRuntimeEnv);
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
  SWITCH_ORDER(pQuery->order.order);
  
4785
  if (pRuntimeEnv->pTSBuf != NULL) {
4786
    pRuntimeEnv->pTSBuf->cur.order = pQuery->order.order;
4787
  }
4788 4789 4790 4791 4792 4793 4794
  
  STsdbQueryCond cond = {
      .twindow = pQuery->window,
      .order   = pQuery->order.order,
      .colList = pQuery->colList,
      .numOfCols = pQuery->numOfCols,
  };
H
Haojun Liao 已提交
4795

4796 4797 4798 4799
  // clean unused handle
  if (pRuntimeEnv->pSecQueryHandle != NULL) {
    tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle);
  }
H
Haojun Liao 已提交
4800

H
Haojun Liao 已提交
4801 4802 4803 4804 4805
  setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
  switchCtxOrder(pRuntimeEnv);
  disableFuncInReverseScan(pQInfo);
  setupQueryRangeForReverseScan(pQInfo);

H
Haojun Liao 已提交
4806
  pRuntimeEnv->prevGroupId = INT32_MIN;
4807
  pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo);
B
Bomin Zhang 已提交
4808 4809 4810
  if (pRuntimeEnv->pSecQueryHandle == NULL) {
    longjmp(pRuntimeEnv->env, terrno);
  }
H
hjxilinx 已提交
4811 4812
}

4813 4814 4815 4816
static void doRestoreContext(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

H
hjxilinx 已提交
4817
  SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY);
4818

4819
  if (pRuntimeEnv->pTSBuf != NULL) {
4820
    SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order);
4821
  }
4822

4823
  switchCtxOrder(pRuntimeEnv);
4824 4825 4826
  SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}

4827 4828 4829
static void doCloseAllTimeWindowAfterScan(SQInfo *pQInfo) {
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
Haojun Liao 已提交
4830
  if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
H
Haojun Liao 已提交
4831
    size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
4832
    for (int32_t i = 0; i < numOfGroup; ++i) {
H
Haojun Liao 已提交
4833
      SArray *group = GET_TABLEGROUP(pQInfo, i);
4834

4835
      size_t num = taosArrayGetSize(group);
4836
      for (int32_t j = 0; j < num; ++j) {
4837 4838
        STableQueryInfo* item = taosArrayGetP(group, j);
        closeAllTimeWindow(&item->windowResInfo);
4839
      }
H
hjxilinx 已提交
4840 4841 4842 4843 4844 4845 4846
    }
  } else {  // close results for group result
    closeAllTimeWindow(&pQInfo->runtimeEnv.windowResInfo);
  }
}

static void multiTableQueryProcess(SQInfo *pQInfo) {
4847 4848 4849
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

4850
  if (pQInfo->groupIndex > 0) {
4851
    /*
4852
     * if the groupIndex > 0, the query process must be completed yet, we only need to
4853 4854
     * copy the data into output buffer
     */
H
Haojun Liao 已提交
4855
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
4856 4857
      copyResToQueryResultBuf(pQInfo, pQuery);
#ifdef _DEBUG_VIEW
4858
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4859 4860
#endif
    } else {
4861
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4862
    }
4863

4864
    qDebug("QInfo:%p current:%"PRId64", total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
4865 4866
    return;
  }
4867

4868
  qDebug("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, forward scan start", pQInfo,
4869 4870
         pQuery->window.skey, pQuery->window.ekey, pQuery->order.order);

H
hjxilinx 已提交
4871
  // do check all qualified data blocks
H
Haojun Liao 已提交
4872
  int64_t el = scanMultiTableDataBlocks(pQInfo);
4873
  qDebug("QInfo:%p master scan completed, elapsed time: %" PRId64 "ms, reverse scan start", pQInfo, el);
4874

H
hjxilinx 已提交
4875
  // query error occurred or query is killed, abort current execution
H
Haojun Liao 已提交
4876
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4877
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4878 4879
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4880
  }
4881

H
hjxilinx 已提交
4882 4883
  // close all time window results
  doCloseAllTimeWindowAfterScan(pQInfo);
4884

H
hjxilinx 已提交
4885 4886
  if (needReverseScan(pQuery)) {
    doSaveContext(pQInfo);
4887

H
Haojun Liao 已提交
4888
    el = scanMultiTableDataBlocks(pQInfo);
4889
    qDebug("QInfo:%p reversed scan completed, elapsed time: %" PRId64 "ms", pQInfo, el);
4890

H
Haojun Liao 已提交
4891
//    doCloseAllTimeWindowAfterScan(pQInfo);
H
Haojun Liao 已提交
4892
    doRestoreContext(pQInfo);
H
hjxilinx 已提交
4893
  } else {
4894
    qDebug("QInfo:%p no need to do reversed scan, query completed", pQInfo);
4895
  }
4896

4897
  setQueryStatus(pQuery, QUERY_COMPLETED);
4898

H
Haojun Liao 已提交
4899
  if (pQInfo->code != TSDB_CODE_SUCCESS || IS_QUERY_KILLED(pQInfo)) {
4900
    qDebug("QInfo:%p query killed or error occurred, code:%s, abort", pQInfo, tstrerror(pQInfo->code));
H
Haojun Liao 已提交
4901 4902
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
H
hjxilinx 已提交
4903
  }
4904

H
Haojun Liao 已提交
4905
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || isSumAvgRateQuery(pQuery)) {
4906
    if (mergeIntoGroupResult(pQInfo) == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
4907
      copyResToQueryResultBuf(pQInfo, pQuery);
4908 4909

#ifdef _DEBUG_VIEW
4910
      displayInterResult(pQuery->sdata, pRuntimeEnv, pQuery->sdata[0]->num);
4911 4912 4913
#endif
    }
  } else {  // not a interval query
4914
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
4915
  }
4916

4917
  // handle the limitation of output buffer
4918
  qDebug("QInfo:%p points returned:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
4919 4920 4921 4922 4923 4924 4925 4926
}

/*
 * in each query, this function will be called only once, no retry for further result.
 *
 * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
 * select count(*) from table_name group by status_column;
 */
H
hjxilinx 已提交
4927
static void tableFixedOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4928
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4929 4930
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
4931
  if (!pRuntimeEnv->topBotQuery && pQuery->limit.offset > 0) {  // no need to execute, since the output will be ignore.
H
Haojun Liao 已提交
4932 4933 4934
    return;
  }
  
H
hjxilinx 已提交
4935 4936
  pQuery->current = pTableInfo;  // set current query table info
  
4937
  scanOneTableDataBlocks(pRuntimeEnv, pTableInfo->lastKey);
H
hjxilinx 已提交
4938
  finalizeQueryResult(pRuntimeEnv);
4939

H
Haojun Liao 已提交
4940
  if (IS_QUERY_KILLED(pQInfo)) {
H
Haojun Liao 已提交
4941 4942
    finalizeQueryResult(pRuntimeEnv); // clean up allocated resource during query
    longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED);
4943
  }
4944

H
Haojun Liao 已提交
4945
  // since the numOfRows must be identical for all sql functions that are allowed to be executed simutaneously.
4946
  pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
4947

4948
  skipResults(pRuntimeEnv);
4949
  limitResults(pRuntimeEnv);
4950 4951
}

H
hjxilinx 已提交
4952
static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
4953
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
4954 4955 4956 4957
  
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
  
4958 4959 4960 4961
  // for ts_comp query, re-initialized is not allowed
  if (!isTSCompQuery(pQuery)) {
    resetCtxOutputBuf(pRuntimeEnv);
  }
4962

4963 4964 4965 4966 4967 4968
  // skip blocks without load the actual data block from file if no filter condition present
  skipBlocks(&pQInfo->runtimeEnv);
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }
4969 4970

  while (1) {
4971
    scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey);
H
hjxilinx 已提交
4972
    finalizeQueryResult(pRuntimeEnv);
4973

4974 4975
    pQuery->rec.rows = getNumOfResult(pRuntimeEnv);
    if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->rec.rows > 0) {
4976
      skipResults(pRuntimeEnv);
4977 4978 4979
    }

    /*
H
hjxilinx 已提交
4980 4981
     * 1. if pQuery->size == 0, pQuery->limit.offset >= 0, still need to check data
     * 2. if pQuery->size > 0, pQuery->limit.offset must be 0
4982
     */
4983
    if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
4984 4985 4986
      break;
    }

4987
    qDebug("QInfo:%p skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
B
Bomin Zhang 已提交
4988
           pQInfo, pQuery->limit.offset, pQuery->current->lastKey, pQuery->current->win.ekey);
4989 4990 4991 4992

    resetCtxOutputBuf(pRuntimeEnv);
  }

4993
  limitResults(pRuntimeEnv);
4994
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
4995
    qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo,
H
hjxilinx 已提交
4996
        pQuery->current->lastKey, pQuery->window.ekey);
weixin_48148422's avatar
weixin_48148422 已提交
4997 4998
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
    STableIdInfo tidInfo;
B
Bomin Zhang 已提交
4999
    STableId* id = TSDB_TABLEID(pQuery->current->pTable);
5000

H
Haojun Liao 已提交
5001 5002
    tidInfo.uid = id->uid;
    tidInfo.tid = id->tid;
weixin_48148422's avatar
weixin_48148422 已提交
5003 5004
    tidInfo.key = pQuery->current->lastKey;
    taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo);
5005 5006
  }

5007 5008 5009
  if (!isTSCompQuery(pQuery)) {
    assert(pQuery->rec.rows <= pQuery->rec.capacity);
  }
5010 5011
}

H
Haojun Liao 已提交
5012
static void tableIntervalProcessImpl(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) {
5013
  SQuery *pQuery = pRuntimeEnv->pQuery;
5014

5015
  while (1) {
5016
    scanOneTableDataBlocks(pRuntimeEnv, start);
5017

5018
    assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED));
H
hjxilinx 已提交
5019
    finalizeQueryResult(pRuntimeEnv);
5020

5021 5022 5023
    // here we can ignore the records in case of no interpolation
    // todo handle offset, in case of top/bottom interval query
    if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
5024
        pQuery->fillType == TSDB_FILL_NONE) {
5025 5026
      // maxOutput <= 0, means current query does not generate any results
      int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
5027

5028 5029 5030 5031
      int32_t c = MIN(numOfClosed, pQuery->limit.offset);
      clearFirstNTimeWindow(pRuntimeEnv, c);
      pQuery->limit.offset -= c;
    }
5032

5033
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_RESBUF_FULL)) {
5034 5035 5036 5037 5038
      break;
    }
  }
}

5039
// handle time interval query on table
H
hjxilinx 已提交
5040
static void tableIntervalProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) {
5041 5042
  SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv);

H
hjxilinx 已提交
5043 5044
  SQuery *pQuery = pRuntimeEnv->pQuery;
  pQuery->current = pTableInfo;
5045

H
Haojun Liao 已提交
5046
  int32_t numOfFilled = 0;
H
Haojun Liao 已提交
5047 5048
  TSKEY newStartKey = TSKEY_INITIAL_VAL;
  
5049
  // skip blocks without load the actual data block from file if no filter condition present
H
Haojun Liao 已提交
5050
  skipTimeInterval(pRuntimeEnv, &newStartKey);
5051
  if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0 && pRuntimeEnv->pFillInfo == NULL) {
5052 5053 5054 5055
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return;
  }

5056
  while (1) {
H
Haojun Liao 已提交
5057
    tableIntervalProcessImpl(pRuntimeEnv, newStartKey);
5058

H
Haojun Liao 已提交
5059
    if (QUERY_IS_INTERVAL_QUERY(pQuery)) {
5060
      pQInfo->groupIndex = 0;  // always start from 0
5061
      pQuery->rec.rows = 0;
5062
      copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5063

5064
      clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5065
    }
5066

5067
    // the offset is handled at prepare stage if no interpolation involved
5068
    if (pQuery->fillType == TSDB_FILL_NONE || pQuery->rec.rows == 0) {
5069
      limitResults(pRuntimeEnv);
5070 5071
      break;
    } else {
H
Haojun Liao 已提交
5072
      taosFillSetStartInfo(pRuntimeEnv->pFillInfo, pQuery->rec.rows, pQuery->window.ekey);
5073
      taosFillCopyInputDataFromFilePage(pRuntimeEnv->pFillInfo, (tFilePage**) pQuery->sdata);
H
Haojun Liao 已提交
5074
      numOfFilled = 0;
5075
      
H
Haojun Liao 已提交
5076
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);
5077
      if (pQuery->rec.rows > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
5078
        limitResults(pRuntimeEnv);
5079 5080
        break;
      }
5081

5082
      // no result generated yet, continue retrieve data
5083
      pQuery->rec.rows = 0;
5084 5085
    }
  }
5086

5087
  // all data scanned, the group by normal column can return
H
Haojun Liao 已提交
5088
  if (pRuntimeEnv->groupbyNormalCol) {  // todo refactor with merge interval time result
5089
    pQInfo->groupIndex = 0;
5090
    pQuery->rec.rows = 0;
5091
    copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5092
    clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5093 5094 5095
  }
}

5096 5097 5098 5099
static void tableQueryImpl(SQInfo *pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;

5100
  if (queryHasRemainResults(pRuntimeEnv)) {
5101

H
Haojun Liao 已提交
5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113
    if (pQuery->fillType != TSDB_FILL_NONE) {
      /*
       * There are remain results that are not returned due to result interpolation
       * So, we do keep in this procedure instead of launching retrieve procedure for next results.
       */
      int32_t numOfFilled = 0;
      pQuery->rec.rows = doFillGapsInResults(pRuntimeEnv, (tFilePage **)pQuery->sdata, &numOfFilled);

      if (pQuery->rec.rows > 0) {
        limitResults(pRuntimeEnv);
      }

H
Haojun Liao 已提交
5114
      qDebug("QInfo:%p current:%" PRId64 " returned, total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
5115
      return;
H
Haojun Liao 已提交
5116
    } else {
5117
      pQuery->rec.rows = 0;
5118
      pQInfo->groupIndex = 0;  // always start from 0
5119

5120
      if (pRuntimeEnv->windowResInfo.size > 0) {
5121
        copyFromWindowResToSData(pQInfo, &pRuntimeEnv->windowResInfo);
5122
        clearFirstNTimeWindow(pRuntimeEnv, pQInfo->groupIndex);
5123

5124
        if (pQuery->rec.rows > 0) {
5125
          qDebug("QInfo:%p %"PRId64" rows returned from group results, total:%"PRId64"", pQInfo, pQuery->rec.rows, pQuery->rec.total);
H
Haojun Liao 已提交
5126 5127 5128

          // there are not data remains
          if (pRuntimeEnv->windowResInfo.size <= 0) {
H
Haojun Liao 已提交
5129
            qDebug("QInfo:%p query over, %"PRId64" rows are returned", pQInfo, pQuery->rec.total);
H
Haojun Liao 已提交
5130 5131
          }

5132 5133 5134 5135 5136
          return;
        }
      }
    }
  }
5137

H
hjxilinx 已提交
5138
  // number of points returned during this query
5139
  pQuery->rec.rows = 0;
5140
  int64_t st = taosGetTimestampUs();
H
hjxilinx 已提交
5141
  
5142
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
H
Haojun Liao 已提交
5143
  SArray* g = GET_TABLEGROUP(pQInfo, 0);
5144
  STableQueryInfo* item = taosArrayGetP(g, 0);
H
hjxilinx 已提交
5145
  
5146
  // group by normal column, sliding window query, interval query are handled by interval query processor
H
Haojun Liao 已提交
5147
  if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) {  // interval (down sampling operation)
5148
    tableIntervalProcess(pQInfo, item);
H
Haojun Liao 已提交
5149
  } else if (isFixedOutputQuery(pRuntimeEnv)) {
5150
    tableFixedOutputProcess(pQInfo, item);
5151 5152
  } else {  // diff/add/multiply/subtract/division
    assert(pQuery->checkBuffer == 1);
5153
    tableMultiOutputProcess(pQInfo, item);
5154
  }
5155

5156
  // record the total elapsed time
5157
  pRuntimeEnv->summary.elapsedTime += (taosGetTimestampUs() - st);
5158
  assert(pQInfo->tableqinfoGroupInfo.numOfTables == 1);
5159 5160
}

5161
static void stableQueryImpl(SQInfo *pQInfo) {
H
Haojun Liao 已提交
5162 5163
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *pQuery = pRuntimeEnv->pQuery;
5164
  pQuery->rec.rows = 0;
5165

5166
  int64_t st = taosGetTimestampUs();
5167

H
Haojun Liao 已提交
5168
  if (QUERY_IS_INTERVAL_QUERY(pQuery) ||
H
Haojun Liao 已提交
5169
      (isFixedOutputQuery(pRuntimeEnv) && (!isPointInterpoQuery(pQuery)) && !pRuntimeEnv->groupbyNormalCol &&
5170
      !isFirstLastRowQuery(pQuery))) {
H
hjxilinx 已提交
5171
    multiTableQueryProcess(pQInfo);
5172
  } else {
5173
    assert((pQuery->checkBuffer == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
H
Haojun Liao 已提交
5174
            isFirstLastRowQuery(pQuery) || pRuntimeEnv->groupbyNormalCol);
5175

5176
    sequentialTableProcess(pQInfo);
H
Haojun Liao 已提交
5177

5178
  }
5179

H
hjxilinx 已提交
5180
  // record the total elapsed time
5181
  pQInfo->runtimeEnv.summary.elapsedTime += (taosGetTimestampUs() - st);
H
hjxilinx 已提交
5182 5183
}

5184
static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
5185
  int32_t j = 0;
5186

5187
  if (TSDB_COL_IS_TAG(pExprMsg->colInfo.flag)) {
H
Haojun Liao 已提交
5188 5189 5190 5191
    if (pExprMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
      return -1;
    }

5192 5193 5194 5195
    while(j < pQueryMsg->numOfTags) {
      if (pExprMsg->colInfo.colId == pTagCols[j].colId) {
        return j;
      }
5196

5197 5198
      j += 1;
    }
5199

5200 5201 5202 5203 5204
  } else {
    while (j < pQueryMsg->numOfCols) {
      if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
        return j;
      }
5205

5206
      j += 1;
5207 5208 5209
    }
  }

5210
  assert(0);
5211 5212
}

5213 5214 5215
bool validateExprColumnInfo(SQueryTableMsg *pQueryMsg, SSqlFuncMsg *pExprMsg, SColumnInfo* pTagCols) {
  int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg, pTagCols);
  return j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags;
5216 5217
}

5218
static bool validateQueryMsg(SQueryTableMsg *pQueryMsg) {
H
hjxilinx 已提交
5219
  if (pQueryMsg->intervalTime < 0) {
5220
    qError("qmsg:%p illegal value of interval time %" PRId64, pQueryMsg, pQueryMsg->intervalTime);
5221
    return false;
5222 5223
  }

H
hjxilinx 已提交
5224
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
5225
    qError("qmsg:%p illegal value of numOfTables %d", pQueryMsg, pQueryMsg->numOfTables);
5226
    return false;
5227 5228
  }

H
hjxilinx 已提交
5229
  if (pQueryMsg->numOfGroupCols < 0) {
S
slguan 已提交
5230
    qError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
5231
    return false;
5232 5233
  }

5234 5235
  if (pQueryMsg->numOfOutput > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutput <= 0) {
    qError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutput);
5236
    return false;
5237 5238
  }

5239 5240 5241 5242 5243 5244 5245 5246 5247 5248
  return true;
}

static bool validateQuerySourceCols(SQueryTableMsg *pQueryMsg, SSqlFuncMsg** pExprMsg) {
  int32_t numOfTotal = pQueryMsg->numOfCols + pQueryMsg->numOfTags;
  if (pQueryMsg->numOfCols < 0 || pQueryMsg->numOfTags < 0 || numOfTotal > TSDB_MAX_COLUMNS) {
    qError("qmsg:%p illegal value of numOfCols %d numOfTags:%d", pQueryMsg, pQueryMsg->numOfCols, pQueryMsg->numOfTags);
    return false;
  } else if (numOfTotal == 0) {
    for(int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
H
Haojun Liao 已提交
5249 5250 5251 5252 5253
      SSqlFuncMsg* pFuncMsg = pExprMsg[i];

      if ((pFuncMsg->functionId == TSDB_FUNC_TAGPRJ) ||
          (pFuncMsg->functionId == TSDB_FUNC_TID_TAG && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) ||
          (pFuncMsg->functionId == TSDB_FUNC_COUNT && pFuncMsg->colInfo.colId == TSDB_TBNAME_COLUMN_INDEX)) {
5254
        continue;
5255
      }
5256

5257
      return false;
5258 5259
    }
  }
5260

5261
  return true;
5262 5263
}

5264
static char *createTableIdList(SQueryTableMsg *pQueryMsg, char *pMsg, SArray **pTableIdList) {
H
hjxilinx 已提交
5265
  assert(pQueryMsg->numOfTables > 0);
5266

weixin_48148422's avatar
weixin_48148422 已提交
5267
  *pTableIdList = taosArrayInit(pQueryMsg->numOfTables, sizeof(STableIdInfo));
5268

weixin_48148422's avatar
weixin_48148422 已提交
5269 5270
  for (int32_t j = 0; j < pQueryMsg->numOfTables; ++j) {
    STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg;
5271

5272
    pTableIdInfo->tid = htonl(pTableIdInfo->tid);
H
hjxilinx 已提交
5273 5274
    pTableIdInfo->uid = htobe64(pTableIdInfo->uid);
    pTableIdInfo->key = htobe64(pTableIdInfo->key);
5275

H
hjxilinx 已提交
5276 5277 5278
    taosArrayPush(*pTableIdList, pTableIdInfo);
    pMsg += sizeof(STableIdInfo);
  }
5279

H
hjxilinx 已提交
5280 5281
  return pMsg;
}
5282

5283
/**
H
hjxilinx 已提交
5284
 * pQueryMsg->head has been converted before this function is called.
5285
 *
H
hjxilinx 已提交
5286
 * @param pQueryMsg
5287 5288 5289 5290
 * @param pTableIdList
 * @param pExpr
 * @return
 */
5291
static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, SSqlFuncMsg ***pExpr,
weixin_48148422's avatar
weixin_48148422 已提交
5292
                               char **tagCond, char** tbnameCond, SColIndex **groupbyCols, SColumnInfo** tagCols) {
5293 5294
  int32_t code = TSDB_CODE_SUCCESS;

5295 5296 5297 5298 5299 5300 5301 5302
  pQueryMsg->numOfTables = htonl(pQueryMsg->numOfTables);

  pQueryMsg->window.skey = htobe64(pQueryMsg->window.skey);
  pQueryMsg->window.ekey = htobe64(pQueryMsg->window.ekey);
  pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
  pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
  pQueryMsg->limit = htobe64(pQueryMsg->limit);
  pQueryMsg->offset = htobe64(pQueryMsg->offset);
H
hjxilinx 已提交
5303

5304 5305
  pQueryMsg->order = htons(pQueryMsg->order);
  pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
H
Haojun Liao 已提交
5306
  pQueryMsg->queryType = htonl(pQueryMsg->queryType);
weixin_48148422's avatar
weixin_48148422 已提交
5307
  pQueryMsg->tagNameRelType = htons(pQueryMsg->tagNameRelType);
5308 5309

  pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
5310
  pQueryMsg->numOfOutput = htons(pQueryMsg->numOfOutput);
H
hjxilinx 已提交
5311
  pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
5312 5313 5314
  pQueryMsg->tagCondLen = htons(pQueryMsg->tagCondLen);
  pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
  pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
H
hjxilinx 已提交
5315
  pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
5316
  pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
5317
  pQueryMsg->numOfTags = htonl(pQueryMsg->numOfTags);
5318

5319
  // query msg safety check
5320
  if (!validateQueryMsg(pQueryMsg)) {
5321 5322
    code = TSDB_CODE_QRY_INVALID_MSG;
    goto _cleanup;
5323 5324
  }

H
hjxilinx 已提交
5325 5326
  char *pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
  for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
5327 5328
    SColumnInfo *pColInfo = &pQueryMsg->colList[col];

H
hjxilinx 已提交
5329
    pColInfo->colId = htons(pColInfo->colId);
5330
    pColInfo->type = htons(pColInfo->type);
H
hjxilinx 已提交
5331 5332
    pColInfo->bytes = htons(pColInfo->bytes);
    pColInfo->numOfFilters = htons(pColInfo->numOfFilters);
5333

H
hjxilinx 已提交
5334
    assert(pColInfo->type >= TSDB_DATA_TYPE_BOOL && pColInfo->type <= TSDB_DATA_TYPE_NCHAR);
5335

H
hjxilinx 已提交
5336
    int32_t numOfFilters = pColInfo->numOfFilters;
5337
    if (numOfFilters > 0) {
H
hjxilinx 已提交
5338
      pColInfo->filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
5339 5340 5341
    }

    for (int32_t f = 0; f < numOfFilters; ++f) {
5342 5343 5344 5345
      SColumnFilterInfo *pFilterMsg = (SColumnFilterInfo *)pMsg;
      
      SColumnFilterInfo *pColFilter = &pColInfo->filters[f];
      pColFilter->filterstr = htons(pFilterMsg->filterstr);
5346 5347 5348

      pMsg += sizeof(SColumnFilterInfo);

5349 5350
      if (pColFilter->filterstr) {
        pColFilter->len = htobe64(pFilterMsg->len);
5351

5352
        pColFilter->pz = (int64_t) calloc(1, pColFilter->len + 1 * TSDB_NCHAR_SIZE); // note: null-terminator
5353 5354
        memcpy((void *)pColFilter->pz, pMsg, pColFilter->len);
        pMsg += (pColFilter->len + 1);
5355
      } else {
5356 5357
        pColFilter->lowerBndi = htobe64(pFilterMsg->lowerBndi);
        pColFilter->upperBndi = htobe64(pFilterMsg->upperBndi);
5358 5359
      }

5360 5361
      pColFilter->lowerRelOptr = htons(pFilterMsg->lowerRelOptr);
      pColFilter->upperRelOptr = htons(pFilterMsg->upperRelOptr);
5362 5363 5364
    }
  }

5365 5366
  *pExpr = calloc(pQueryMsg->numOfOutput, POINTER_BYTES);
  SSqlFuncMsg *pExprMsg = (SSqlFuncMsg *)pMsg;
5367

5368
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5369
    (*pExpr)[i] = pExprMsg;
5370

5371
    pExprMsg->colInfo.colIndex = htons(pExprMsg->colInfo.colIndex);
5372 5373 5374 5375
    pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
    pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
    pExprMsg->functionId = htons(pExprMsg->functionId);
    pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
5376

5377
    pMsg += sizeof(SSqlFuncMsg);
5378 5379

    for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
5380
      pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
5381 5382 5383 5384
      pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);

      if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
        pExprMsg->arg[j].argValue.pz = pMsg;
5385
        pMsg += pExprMsg->arg[j].argBytes;  // one more for the string terminated char.
5386 5387 5388 5389 5390
      } else {
        pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
      }
    }

H
Haojun Liao 已提交
5391 5392
    int16_t functionId = pExprMsg->functionId;
    if (functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG_DUMMY) {
5393
      if (pExprMsg->colInfo.flag != TSDB_COL_TAG) {  // ignore the column  index check for arithmetic expression.
5394 5395
        code = TSDB_CODE_QRY_INVALID_MSG;
        goto _cleanup;
5396 5397
      }
    } else {
5398
//      if (!validateExprColumnInfo(pQueryMsg, pExprMsg)) {
5399
//        return TSDB_CODE_QRY_INVALID_MSG;
5400
//      }
5401 5402
    }

5403
    pExprMsg = (SSqlFuncMsg *)pMsg;
5404
  }
5405

5406
  if (!validateQuerySourceCols(pQueryMsg, *pExpr)) {
5407
    code = TSDB_CODE_QRY_INVALID_MSG;
dengyihao's avatar
dengyihao 已提交
5408
    goto _cleanup;
5409
  }
5410

H
hjxilinx 已提交
5411
  pMsg = createTableIdList(pQueryMsg, pMsg, pTableIdList);
5412

H
hjxilinx 已提交
5413
  if (pQueryMsg->numOfGroupCols > 0) {  // group by tag columns
5414
    *groupbyCols = malloc(pQueryMsg->numOfGroupCols * sizeof(SColIndex));
5415 5416 5417 5418
    if (*groupbyCols == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }
5419 5420 5421

    for (int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
      (*groupbyCols)[i].colId = *(int16_t *)pMsg;
5422
      pMsg += sizeof((*groupbyCols)[i].colId);
5423 5424

      (*groupbyCols)[i].colIndex = *(int16_t *)pMsg;
5425 5426
      pMsg += sizeof((*groupbyCols)[i].colIndex);

5427
      (*groupbyCols)[i].flag = *(int16_t *)pMsg;
5428 5429 5430 5431 5432
      pMsg += sizeof((*groupbyCols)[i].flag);

      memcpy((*groupbyCols)[i].name, pMsg, tListLen(groupbyCols[i]->name));
      pMsg += tListLen((*groupbyCols)[i].name);
    }
5433

H
hjxilinx 已提交
5434 5435
    pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
    pQueryMsg->orderType = htons(pQueryMsg->orderType);
5436 5437
  }

5438 5439
  pQueryMsg->fillType = htons(pQueryMsg->fillType);
  if (pQueryMsg->fillType != TSDB_FILL_NONE) {
5440
    pQueryMsg->fillVal = (uint64_t)(pMsg);
5441 5442

    int64_t *v = (int64_t *)pMsg;
5443
    for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5444 5445
      v[i] = htobe64(v[i]);
    }
5446

5447
    pMsg += sizeof(int64_t) * pQueryMsg->numOfOutput;
5448
  }
5449

5450 5451 5452 5453
  if (pQueryMsg->numOfTags > 0) {
    (*tagCols) = calloc(1, sizeof(SColumnInfo) * pQueryMsg->numOfTags);
    for (int32_t i = 0; i < pQueryMsg->numOfTags; ++i) {
      SColumnInfo* pTagCol = (SColumnInfo*) pMsg;
5454

5455 5456 5457 5458
      pTagCol->colId = htons(pTagCol->colId);
      pTagCol->bytes = htons(pTagCol->bytes);
      pTagCol->type  = htons(pTagCol->type);
      pTagCol->numOfFilters = 0;
5459

5460
      (*tagCols)[i] = *pTagCol;
5461
      pMsg += sizeof(SColumnInfo);
5462
    }
H
hjxilinx 已提交
5463
  }
5464

5465 5466 5467 5468 5469 5470
  // the tag query condition expression string is located at the end of query msg
  if (pQueryMsg->tagCondLen > 0) {
    *tagCond = calloc(1, pQueryMsg->tagCondLen);
    memcpy(*tagCond, pMsg, pQueryMsg->tagCondLen);
    pMsg += pQueryMsg->tagCondLen;
  }
5471

weixin_48148422's avatar
weixin_48148422 已提交
5472
  if (*pMsg != 0) {
5473
    size_t len = strlen(pMsg) + 1;
5474

5475
    *tbnameCond = malloc(len);
5476 5477 5478 5479 5480
    if (*tbnameCond == NULL) {
      code = TSDB_CODE_QRY_OUT_OF_MEMORY;
      goto _cleanup;
    }

weixin_48148422's avatar
weixin_48148422 已提交
5481
    strcpy(*tbnameCond, pMsg);
5482
    pMsg += len;
weixin_48148422's avatar
weixin_48148422 已提交
5483
  }
5484

5485
  qDebug("qmsg:%p query %d tables, type:%d, qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, order:%d, "
H
Haojun Liao 已提交
5486 5487
         "outputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptsLen:%d, compNumOfBlocks:%d, limit:%" PRId64 ", offset:%" PRId64,
         pQueryMsg, pQueryMsg->numOfTables, pQueryMsg->queryType, pQueryMsg->window.skey, pQueryMsg->window.ekey, pQueryMsg->numOfGroupCols,
5488
         pQueryMsg->order, pQueryMsg->numOfOutput, pQueryMsg->numOfCols, pQueryMsg->intervalTime,
H
Haojun Liao 已提交
5489
         pQueryMsg->fillType, pQueryMsg->tsLen, pQueryMsg->tsNumOfBlocks, pQueryMsg->limit, pQueryMsg->offset);
5490 5491

  return TSDB_CODE_SUCCESS;
dengyihao's avatar
dengyihao 已提交
5492 5493

_cleanup:
S
Shengliang Guan 已提交
5494
  taosTFree(*pExpr);
dengyihao's avatar
dengyihao 已提交
5495 5496
  taosArrayDestroy(*pTableIdList);
  *pTableIdList = NULL;
S
Shengliang Guan 已提交
5497 5498 5499 5500
  taosTFree(*tbnameCond);
  taosTFree(*groupbyCols);
  taosTFree(*tagCols);
  taosTFree(*tagCond);
5501 5502

  return code;
5503 5504
}

H
hjxilinx 已提交
5505
static int32_t buildAirthmeticExprFromMsg(SExprInfo *pArithExprInfo, SQueryTableMsg *pQueryMsg) {
5506
  qDebug("qmsg:%p create arithmetic expr from binary string: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
weixin_48148422's avatar
weixin_48148422 已提交
5507 5508

  tExprNode* pExprNode = NULL;
dengyihao's avatar
dengyihao 已提交
5509
  TRY(TSDB_MAX_TAGS) {
weixin_48148422's avatar
weixin_48148422 已提交
5510 5511 5512
    pExprNode = exprTreeFromBinary(pArithExprInfo->base.arg[0].argValue.pz, pArithExprInfo->base.arg[0].argBytes);
  } CATCH( code ) {
    CLEANUP_EXECUTE();
dengyihao's avatar
TD-816  
dengyihao 已提交
5513
    qError("qmsg:%p failed to create arithmetic expression string from:%s, reason: %s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz, tstrerror(code));
weixin_48148422's avatar
weixin_48148422 已提交
5514 5515 5516
    return code;
  } END_TRY

H
hjxilinx 已提交
5517
  if (pExprNode == NULL) {
5518
    qError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pArithExprInfo->base.arg[0].argValue.pz);
5519
    return TSDB_CODE_QRY_APP_ERROR;
5520
  }
5521

5522
  pArithExprInfo->pExpr = pExprNode;
5523 5524 5525
  return TSDB_CODE_SUCCESS;
}

H
Haojun Liao 已提交
5526
static int32_t createQFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SExprInfo **pExprInfo, SSqlFuncMsg **pExprMsg,
5527 5528
    SColumnInfo* pTagCols) {
  *pExprInfo = NULL;
H
hjxilinx 已提交
5529
  int32_t code = TSDB_CODE_SUCCESS;
5530

H
Haojun Liao 已提交
5531
  SExprInfo *pExprs = (SExprInfo *)calloc(pQueryMsg->numOfOutput, sizeof(SExprInfo));
5532
  if (pExprs == NULL) {
5533
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
5534 5535 5536 5537 5538
  }

  bool    isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
  int16_t tagLen = 0;

5539
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5540
    pExprs[i].base = *pExprMsg[i];
5541
    pExprs[i].bytes = 0;
5542 5543 5544 5545

    int16_t type = 0;
    int16_t bytes = 0;

5546
    // parse the arithmetic expression
5547
    if (pExprs[i].base.functionId == TSDB_FUNC_ARITHM) {
5548
      code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg);
5549

5550
      if (code != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5551
        taosTFree(pExprs);
5552
        return code;
5553 5554
      }

5555
      type  = TSDB_DATA_TYPE_DOUBLE;
5556
      bytes = tDataTypeDesc[type].nSize;
H
Haojun Liao 已提交
5557
    } else if (pExprs[i].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX && pExprs[i].base.functionId == TSDB_FUNC_TAGPRJ) {  // parse the normal column
H
Haojun Liao 已提交
5558 5559 5560
      SSchema s = tGetTableNameColumnSchema();
      type  = s.type;
      bytes = s.bytes;
B
Bomin Zhang 已提交
5561
    } else{
5562
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
dengyihao's avatar
dengyihao 已提交
5563
      assert(j < pQueryMsg->numOfCols || j < pQueryMsg->numOfTags);
H
Haojun Liao 已提交
5564

dengyihao's avatar
dengyihao 已提交
5565
      if (pExprs[i].base.colInfo.colId != TSDB_TBNAME_COLUMN_INDEX && j >= 0) {
H
Haojun Liao 已提交
5566 5567 5568 5569
        SColumnInfo* pCol = (TSDB_COL_IS_TAG(pExprs[i].base.colInfo.flag))? &pTagCols[j]:&pQueryMsg->colList[j];
        type = pCol->type;
        bytes = pCol->bytes;
      } else {
H
Haojun Liao 已提交
5570
        SSchema s = tGetTableNameColumnSchema();
H
hjxilinx 已提交
5571

H
Haojun Liao 已提交
5572 5573 5574
        type  = s.type;
        bytes = s.bytes;
      }
5575 5576
    }

5577 5578
    int32_t param = pExprs[i].base.arg[0].argValue.i64;
    if (getResultDataInfo(type, bytes, pExprs[i].base.functionId, param, &pExprs[i].type, &pExprs[i].bytes,
5579
                          &pExprs[i].interBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
S
Shengliang Guan 已提交
5580
      taosTFree(pExprs);
5581
      return TSDB_CODE_QRY_INVALID_MSG;
5582 5583
    }

5584
    if (pExprs[i].base.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].base.functionId == TSDB_FUNC_TS_DUMMY) {
5585
      tagLen += pExprs[i].bytes;
5586
    }
5587
    assert(isValidDataType(pExprs[i].type));
5588 5589 5590
  }

  // TODO refactor
5591
  for (int32_t i = 0; i < pQueryMsg->numOfOutput; ++i) {
5592 5593
    pExprs[i].base = *pExprMsg[i];
    int16_t functId = pExprs[i].base.functionId;
5594

5595
    if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
5596
      int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].base, pTagCols);
5597 5598 5599 5600 5601
      assert(j < pQueryMsg->numOfCols);

      SColumnInfo *pCol = &pQueryMsg->colList[j];

      int32_t ret =
5602
          getResultDataInfo(pCol->type, pCol->bytes, functId, pExprs[i].base.arg[0].argValue.i64,
5603
                            &pExprs[i].type, &pExprs[i].bytes, &pExprs[i].interBytes, tagLen, isSuperTable);
5604 5605 5606
      assert(ret == TSDB_CODE_SUCCESS);
    }
  }
5607
  *pExprInfo = pExprs;
5608 5609 5610 5611

  return TSDB_CODE_SUCCESS;
}

5612
static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pColIndex, int32_t *code) {
5613 5614 5615 5616 5617
  if (pQueryMsg->numOfGroupCols == 0) {
    return NULL;
  }

  // using group by tag columns
5618
  SSqlGroupbyExpr *pGroupbyExpr = (SSqlGroupbyExpr *)calloc(1, sizeof(SSqlGroupbyExpr));
5619
  if (pGroupbyExpr == NULL) {
5620
    *code = TSDB_CODE_QRY_OUT_OF_MEMORY;
5621 5622 5623 5624 5625 5626 5627
    return NULL;
  }

  pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
  pGroupbyExpr->orderType = pQueryMsg->orderType;
  pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;

5628 5629 5630 5631
  pGroupbyExpr->columnInfo = taosArrayInit(pQueryMsg->numOfGroupCols, sizeof(SColIndex));
  for(int32_t i = 0; i < pQueryMsg->numOfGroupCols; ++i) {
    taosArrayPush(pGroupbyExpr->columnInfo, &pColIndex[i]);
  }
5632

5633 5634 5635
  return pGroupbyExpr;
}

5636
static int32_t createFilterInfo(void *pQInfo, SQuery *pQuery) {
5637
  for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
5638
    if (pQuery->colList[i].numOfFilters > 0) {
5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649
      pQuery->numOfFilterCols++;
    }
  }

  if (pQuery->numOfFilterCols == 0) {
    return TSDB_CODE_SUCCESS;
  }

  pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);

  for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
5650
    if (pQuery->colList[i].numOfFilters > 0) {
5651 5652
      SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j];

B
Bomin Zhang 已提交
5653
      memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfo));
5654
      pFilterInfo->info = pQuery->colList[i];
5655

5656
      pFilterInfo->numOfFilters = pQuery->colList[i].numOfFilters;
5657 5658 5659 5660
      pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));

      for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
        SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
5661
        pSingleColFilter->filterInfo = pQuery->colList[i].filters[f];
5662 5663 5664 5665 5666

        int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
        int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;

        if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
S
slguan 已提交
5667
          qError("QInfo:%p invalid filter info", pQInfo);
5668
          return TSDB_CODE_QRY_INVALID_MSG;
5669 5670
        }

5671 5672
        int16_t type  = pQuery->colList[i].type;
        int16_t bytes = pQuery->colList[i].bytes;
5673

5674 5675 5676
        // todo refactor
        __filter_func_t *rangeFilterArray = getRangeFilterFuncArray(type);
        __filter_func_t *filterArray = getValueFilterFuncArray(type);
5677 5678

        if (rangeFilterArray == NULL && filterArray == NULL) {
S
slguan 已提交
5679
          qError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
5680
          return TSDB_CODE_QRY_INVALID_MSG;
5681 5682
        }

5683
        if ((lower == TSDB_RELATION_GREATER_EQUAL || lower == TSDB_RELATION_GREATER) &&
5684
            (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
dengyihao's avatar
dengyihao 已提交
5685
          assert(rangeFilterArray != NULL);
5686
          if (lower == TSDB_RELATION_GREATER_EQUAL) {
5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[4];
            } else {
              pSingleColFilter->fp = rangeFilterArray[2];
            }
          } else {
            if (upper == TSDB_RELATION_LESS_EQUAL) {
              pSingleColFilter->fp = rangeFilterArray[3];
            } else {
              pSingleColFilter->fp = rangeFilterArray[1];
            }
          }
        } else {  // set callback filter function
dengyihao's avatar
dengyihao 已提交
5700
          assert(filterArray != NULL);
5701 5702 5703 5704
          if (lower != TSDB_RELATION_INVALID) {
            pSingleColFilter->fp = filterArray[lower];

            if (upper != TSDB_RELATION_INVALID) {
dengyihao's avatar
dengyihao 已提交
5705
              qError("pQInfo:%p failed to get filter function, invalid filter condition: %d", pQInfo, type);
5706
              return TSDB_CODE_QRY_INVALID_MSG;
5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722
            }
          } else {
            pSingleColFilter->fp = filterArray[upper];
          }
        }
        assert(pSingleColFilter->fp != NULL);
        pSingleColFilter->bytes = bytes;
      }

      j++;
    }
  }

  return TSDB_CODE_SUCCESS;
}

5723
static void doUpdateExprColumnIndex(SQuery *pQuery) {
5724
  assert(pQuery->pSelectExpr != NULL && pQuery != NULL);
5725

5726
  for (int32_t k = 0; k < pQuery->numOfOutput; ++k) {
5727
    SSqlFuncMsg *pSqlExprMsg = &pQuery->pSelectExpr[k].base;
5728
    if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM) {
5729 5730
      continue;
    }
5731

5732
    // todo opt performance
H
Haojun Liao 已提交
5733 5734
    SColIndex *pColIndex = &pSqlExprMsg->colInfo;
    if (!TSDB_COL_IS_TAG(pColIndex->flag)) {
5735 5736
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfCols; ++f) {
H
Haojun Liao 已提交
5737 5738
        if (pColIndex->colId == pQuery->colList[f].colId) {
          pColIndex->colIndex = f;
5739 5740 5741
          break;
        }
      }
5742 5743
      
      assert (f < pQuery->numOfCols);
5744
    } else {
5745 5746
      int32_t f = 0;
      for (f = 0; f < pQuery->numOfTags; ++f) {
H
Haojun Liao 已提交
5747 5748
        if (pColIndex->colId == pQuery->tagColList[f].colId) {
          pColIndex->colIndex = f;
5749 5750
          break;
        }
5751
      }
5752 5753
      
      assert(f < pQuery->numOfTags || pColIndex->colId == TSDB_TBNAME_COLUMN_INDEX);
5754 5755 5756 5757
    }
  }
}

5758
static int compareTableIdInfo(const void* a, const void* b) {
weixin_48148422's avatar
weixin_48148422 已提交
5759 5760 5761 5762 5763 5764 5765
  const STableIdInfo* x = (const STableIdInfo*)a;
  const STableIdInfo* y = (const STableIdInfo*)b;
  if (x->uid > y->uid) return 1;
  if (x->uid < y->uid) return -1;
  return 0;
}

dengyihao's avatar
dengyihao 已提交
5766 5767
static void freeQInfo(SQInfo *pQInfo);

H
Haojun Liao 已提交
5768 5769 5770 5771 5772
static void calResultBufSize(SQuery* pQuery) {
  const int32_t RESULT_MSG_MIN_SIZE  = 1024 * (1024 + 512);  // bytes
  const int32_t RESULT_MSG_MIN_ROWS  = 8192;
  const float RESULT_THRESHOLD_RATIO = 0.85;

5773 5774 5775 5776 5777
  if (isProjQuery(pQuery)) {
    int32_t numOfRes = RESULT_MSG_MIN_SIZE / pQuery->rowSize;
    if (numOfRes < RESULT_MSG_MIN_ROWS) {
      numOfRes = RESULT_MSG_MIN_ROWS;
    }
H
Haojun Liao 已提交
5778

5779 5780 5781 5782 5783 5784
    pQuery->rec.capacity  = numOfRes;
    pQuery->rec.threshold = numOfRes * RESULT_THRESHOLD_RATIO;
  } else {  // in case of non-prj query, a smaller output buffer will be used.
    pQuery->rec.capacity = 4096;
    pQuery->rec.threshold = pQuery->rec.capacity * RESULT_THRESHOLD_RATIO;
  }
H
Haojun Liao 已提交
5785 5786
}

weixin_48148422's avatar
weixin_48148422 已提交
5787
static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, SSqlGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs,
5788
                               STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols) {
B
Bomin Zhang 已提交
5789 5790 5791
  int16_t numOfCols = pQueryMsg->numOfCols;
  int16_t numOfOutput = pQueryMsg->numOfOutput;

5792 5793
  SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
  if (pQInfo == NULL) {
B
Bomin Zhang 已提交
5794
    goto _cleanup_qinfo;
5795
  }
5796

B
Bomin Zhang 已提交
5797 5798 5799
  // to make sure third party won't overwrite this structure
  pQInfo->signature = pQInfo;
  pQInfo->tableGroupInfo = *pTableGroupInfo;
5800 5801

  SQuery *pQuery = calloc(1, sizeof(SQuery));
B
Bomin Zhang 已提交
5802 5803 5804
  if (pQuery == NULL) {
    goto _cleanup_query;
  }
5805 5806
  pQInfo->runtimeEnv.pQuery = pQuery;

5807
  pQuery->numOfCols       = numOfCols;
H
hjxilinx 已提交
5808
  pQuery->numOfOutput     = numOfOutput;
5809 5810 5811
  pQuery->limit.limit     = pQueryMsg->limit;
  pQuery->limit.offset    = pQueryMsg->offset;
  pQuery->order.order     = pQueryMsg->order;
5812
  pQuery->order.orderColId = pQueryMsg->orderColId;
5813 5814 5815 5816
  pQuery->pSelectExpr     = pExprs;
  pQuery->pGroupbyExpr    = pGroupbyExpr;
  pQuery->intervalTime    = pQueryMsg->intervalTime;
  pQuery->slidingTime     = pQueryMsg->slidingTime;
5817
  pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit;
5818
  pQuery->fillType        = pQueryMsg->fillType;
5819
  pQuery->numOfTags       = pQueryMsg->numOfTags;
B
Bomin Zhang 已提交
5820
  pQuery->tagColList      = pTagCols;
H
Haojun Liao 已提交
5821

5822
  pQuery->colList = calloc(numOfCols, sizeof(SSingleColumnFilterInfo));
5823
  if (pQuery->colList == NULL) {
5824
    goto _cleanup;
5825
  }
5826

H
hjxilinx 已提交
5827
  for (int16_t i = 0; i < numOfCols; ++i) {
5828
    pQuery->colList[i] = pQueryMsg->colList[i];
5829
    pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters);
H
hjxilinx 已提交
5830
  }
5831

5832
  // calculate the result row size
5833 5834 5835
  for (int16_t col = 0; col < numOfOutput; ++col) {
    assert(pExprs[col].bytes > 0);
    pQuery->rowSize += pExprs[col].bytes;
5836
  }
5837

5838
  doUpdateExprColumnIndex(pQuery);
5839

5840
  int32_t ret = createFilterInfo(pQInfo, pQuery);
5841
  if (ret != TSDB_CODE_SUCCESS) {
5842
    goto _cleanup;
5843 5844 5845
  }

  // prepare the result buffer
5846
  pQuery->sdata = (tFilePage **)calloc(pQuery->numOfOutput, POINTER_BYTES);
5847
  if (pQuery->sdata == NULL) {
5848
    goto _cleanup;
5849 5850
  }

H
Haojun Liao 已提交
5851
  calResultBufSize(pQuery);
5852

5853
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
5854
    assert(pExprs[col].interBytes >= pExprs[col].bytes);
5855 5856

    // allocate additional memory for interResults that are usually larger then final results
5857 5858
    size_t size = (pQuery->rec.capacity + 1) * pExprs[col].bytes + pExprs[col].interBytes + sizeof(tFilePage);
    pQuery->sdata[col] = (tFilePage *)calloc(1, size);
5859
    if (pQuery->sdata[col] == NULL) {
5860
      goto _cleanup;
5861 5862 5863
    }
  }

5864
  if (pQuery->fillType != TSDB_FILL_NONE) {
5865 5866
    pQuery->fillVal = malloc(sizeof(int64_t) * pQuery->numOfOutput);
    if (pQuery->fillVal == NULL) {
5867
      goto _cleanup;
5868 5869 5870
    }

    // the first column is the timestamp
5871
    memcpy(pQuery->fillVal, (char *)pQueryMsg->fillVal, pQuery->numOfOutput * sizeof(int64_t));
5872 5873
  }

dengyihao's avatar
dengyihao 已提交
5874 5875 5876 5877 5878 5879
  size_t numOfGroups = 0;
  if (pTableGroupInfo->pGroupList != NULL) {
    numOfGroups = taosArrayGetSize(pTableGroupInfo->pGroupList);

    pQInfo->tableqinfoGroupInfo.pGroupList = taosArrayInit(numOfGroups, POINTER_BYTES);
    pQInfo->tableqinfoGroupInfo.numOfTables = pTableGroupInfo->numOfTables;
H
Haojun Liao 已提交
5880
    pQInfo->tableqinfoGroupInfo.map = taosHashInit(pTableGroupInfo->numOfTables,
H
Haojun Liao 已提交
5881
                                                   taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
H
Haojun Liao 已提交
5882
  }
5883

weixin_48148422's avatar
weixin_48148422 已提交
5884 5885
  int tableIndex = 0;
  STimeWindow window = pQueryMsg->window;
5886
  taosArraySort(pTableIdList, compareTableIdInfo);
5887

H
Haojun Liao 已提交
5888
  pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery);
H
Haojun Liao 已提交
5889 5890 5891
  pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo));
  int32_t index = 0;

H
hjxilinx 已提交
5892
  for(int32_t i = 0; i < numOfGroups; ++i) {
5893
    SArray* pa = taosArrayGetP(pTableGroupInfo->pGroupList, i);
5894

H
Haojun Liao 已提交
5895
    size_t s = taosArrayGetSize(pa);
5896
    SArray* p1 = taosArrayInit(s, POINTER_BYTES);
B
Bomin Zhang 已提交
5897 5898 5899
    if (p1 == NULL) {
      goto _cleanup;
    }
5900

H
hjxilinx 已提交
5901
    for(int32_t j = 0; j < s; ++j) {
H
Haojun Liao 已提交
5902 5903
      STableKeyInfo* info = taosArrayGet(pa, j);
      STableId* id = TSDB_TABLEID(info->pTable);
5904

H
Haojun Liao 已提交
5905
      STableIdInfo* pTableId = taosArraySearch(pTableIdList, id, compareTableIdInfo);
weixin_48148422's avatar
weixin_48148422 已提交
5906 5907 5908
      if (pTableId != NULL ) {
        window.skey = pTableId->key;
      } else {
B
Bomin Zhang 已提交
5909
        window.skey = pQueryMsg->window.skey;
weixin_48148422's avatar
weixin_48148422 已提交
5910
      }
5911

H
Haojun Liao 已提交
5912
      void* buf = pQInfo->pBuf + index * sizeof(STableQueryInfo);
H
Haojun Liao 已提交
5913
      STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf);
B
Bomin Zhang 已提交
5914 5915 5916
      if (item == NULL) {
        goto _cleanup;
      }
H
Haojun Liao 已提交
5917

5918
      item->groupIndex = i;
H
hjxilinx 已提交
5919
      taosArrayPush(p1, &item);
H
Haojun Liao 已提交
5920 5921
      taosHashPut(pQInfo->tableqinfoGroupInfo.map, &id->tid, sizeof(id->tid), &item, POINTER_BYTES);
      index += 1;
H
hjxilinx 已提交
5922
    }
5923

5924
    taosArrayPush(pQInfo->tableqinfoGroupInfo.pGroupList, &p1);
H
hjxilinx 已提交
5925
  }
5926

weixin_48148422's avatar
weixin_48148422 已提交
5927
  pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo));
5928 5929
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;
  pthread_mutex_init(&pQInfo->lock, NULL);
weixin_48148422's avatar
weixin_48148422 已提交
5930

5931
  pQuery->pos = -1;
5932
  pQuery->window = pQueryMsg->window;
5933
  colIdCheck(pQuery);
5934

5935
  qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo);
5936 5937
  return pQInfo;

B
Bomin Zhang 已提交
5938
_cleanup_qinfo:
H
Haojun Liao 已提交
5939
  tsdbDestroyTableGroup(pTableGroupInfo);
B
Bomin Zhang 已提交
5940 5941

_cleanup_query:
5942 5943 5944 5945
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
    free(pGroupbyExpr);
  }
H
Haojun Liao 已提交
5946

S
Shengliang Guan 已提交
5947
  taosTFree(pTagCols);
B
Bomin Zhang 已提交
5948 5949 5950 5951 5952 5953
  for (int32_t i = 0; i < numOfOutput; ++i) {
    SExprInfo* pExprInfo = &pExprs[i];
    if (pExprInfo->pExpr != NULL) {
      tExprTreeDestroy(&pExprInfo->pExpr, NULL);
    }
  }
H
Haojun Liao 已提交
5954

S
Shengliang Guan 已提交
5955
  taosTFree(pExprs);
B
Bomin Zhang 已提交
5956

5957
_cleanup:
dengyihao's avatar
dengyihao 已提交
5958
  freeQInfo(pQInfo);
5959 5960 5961
  return NULL;
}

H
hjxilinx 已提交
5962
static bool isValidQInfo(void *param) {
H
hjxilinx 已提交
5963 5964 5965 5966
  SQInfo *pQInfo = (SQInfo *)param;
  if (pQInfo == NULL) {
    return false;
  }
5967

H
hjxilinx 已提交
5968 5969 5970 5971
  /*
   * pQInfo->signature may be changed by another thread, so we assign value of signature
   * into local variable, then compare by using local variable
   */
5972
  uint64_t sig = (uint64_t)pQInfo->signature;
H
hjxilinx 已提交
5973 5974 5975
  return (sig == (uint64_t)pQInfo);
}

5976
static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) {
H
hjxilinx 已提交
5977
  int32_t code = TSDB_CODE_SUCCESS;
5978
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
5979

H
hjxilinx 已提交
5980 5981
  STSBuf *pTSBuf = NULL;
  if (pQueryMsg->tsLen > 0) {  // open new file to save the result
H
Haojun Liao 已提交
5982
    char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset;
H
hjxilinx 已提交
5983
    pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
5984

H
hjxilinx 已提交
5985
    tsBufResetPos(pTSBuf);
dengyihao's avatar
dengyihao 已提交
5986 5987
    bool ret = tsBufNextPos(pTSBuf);
    UNUSED(ret);
H
hjxilinx 已提交
5988
  }
5989

5990 5991
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
5992
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,
5993
           pQuery->window.ekey, pQuery->order.order);
5994
    setQueryStatus(pQuery, QUERY_COMPLETED);
B
Bomin Zhang 已提交
5995
    pQInfo->tableqinfoGroupInfo.numOfTables = 0;
5996 5997
    return TSDB_CODE_SUCCESS;
  }
5998

5999
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
6000
    qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo);
6001 6002 6003
    setQueryStatus(pQuery, QUERY_COMPLETED);
    return TSDB_CODE_SUCCESS;
  }
H
hjxilinx 已提交
6004 6005

  // filter the qualified
6006
  if ((code = doInitQInfo(pQInfo, pTSBuf, tsdb, vgId, isSTable)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6007 6008
    goto _error;
  }
H
hjxilinx 已提交
6009
  
H
hjxilinx 已提交
6010 6011 6012 6013
  return code;

_error:
  // table query ref will be decrease during error handling
6014
  freeQInfo(pQInfo);
H
hjxilinx 已提交
6015 6016 6017
  return code;
}

B
Bomin Zhang 已提交
6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029
static void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters) {
    if (pFilter == NULL) {
      return;
    }
    for (int32_t i = 0; i < numOfFilters; i++) {
      if (pFilter[i].filterstr) {
        free((void*)(pFilter[i].pz));
      }
    }
    free(pFilter);
}

H
hjxilinx 已提交
6030 6031 6032 6033
static void freeQInfo(SQInfo *pQInfo) {
  if (!isValidQInfo(pQInfo)) {
    return;
  }
6034 6035

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
hjxilinx 已提交
6036
  setQueryKilled(pQInfo);
6037

6038
  qDebug("QInfo:%p start to free QInfo", pQInfo);
6039
  for (int32_t col = 0; col < pQuery->numOfOutput; ++col) {
S
Shengliang Guan 已提交
6040
    taosTFree(pQuery->sdata[col]);
H
hjxilinx 已提交
6041
  }
6042

6043
  teardownQueryRuntimeEnv(&pQInfo->runtimeEnv);
6044

H
hjxilinx 已提交
6045 6046 6047
  for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
    SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
    if (pColFilter->numOfFilters > 0) {
S
Shengliang Guan 已提交
6048
      taosTFree(pColFilter->pFilters);
H
hjxilinx 已提交
6049 6050
    }
  }
6051

H
hjxilinx 已提交
6052
  if (pQuery->pSelectExpr != NULL) {
6053
    for (int32_t i = 0; i < pQuery->numOfOutput; ++i) {
H
hjxilinx 已提交
6054
      SExprInfo* pExprInfo = &pQuery->pSelectExpr[i];
6055

H
hjxilinx 已提交
6056 6057 6058
      if (pExprInfo->pExpr != NULL) {
        tExprTreeDestroy(&pExprInfo->pExpr, NULL);
      }
H
hjxilinx 已提交
6059
    }
6060

S
Shengliang Guan 已提交
6061
    taosTFree(pQuery->pSelectExpr);
H
hjxilinx 已提交
6062
  }
6063

6064
  if (pQuery->fillVal != NULL) {
S
Shengliang Guan 已提交
6065
    taosTFree(pQuery->fillVal);
H
hjxilinx 已提交
6066
  }
6067

6068
  // todo refactor, extract method to destroytableDataInfo
B
Bomin Zhang 已提交
6069 6070 6071 6072 6073 6074 6075 6076
  if (pQInfo->tableqinfoGroupInfo.pGroupList != NULL) {
    int32_t numOfGroups = GET_NUM_OF_TABLEGROUP(pQInfo);
    for (int32_t i = 0; i < numOfGroups; ++i) {
      SArray *p = GET_TABLEGROUP(pQInfo, i);

      size_t num = taosArrayGetSize(p);
      for(int32_t j = 0; j < num; ++j) {
        STableQueryInfo* item = taosArrayGetP(p, j);
H
Haojun Liao 已提交
6077
        destroyTableQueryInfo(item);
6078
      }
6079

B
Bomin Zhang 已提交
6080 6081
      taosArrayDestroy(p);
    }
H
hjxilinx 已提交
6082
  }
6083

S
Shengliang Guan 已提交
6084
  taosTFree(pQInfo->pBuf);
6085
  taosArrayDestroy(pQInfo->tableqinfoGroupInfo.pGroupList);
H
Haojun Liao 已提交
6086
  taosHashCleanup(pQInfo->tableqinfoGroupInfo.map);
H
Haojun Liao 已提交
6087
  tsdbDestroyTableGroup(&pQInfo->tableGroupInfo);
weixin_48148422's avatar
weixin_48148422 已提交
6088
  taosArrayDestroy(pQInfo->arrTableIdInfo);
H
hjxilinx 已提交
6089
  
6090 6091
  if (pQuery->pGroupbyExpr != NULL) {
    taosArrayDestroy(pQuery->pGroupbyExpr->columnInfo);
S
Shengliang Guan 已提交
6092
    taosTFree(pQuery->pGroupbyExpr);
6093
  }
6094

S
Shengliang Guan 已提交
6095 6096
  taosTFree(pQuery->tagColList);
  taosTFree(pQuery->pFilterInfo);
B
Bomin Zhang 已提交
6097 6098 6099 6100 6101 6102

  if (pQuery->colList != NULL) {
    for (int32_t i = 0; i < pQuery->numOfCols; i++) {
      SColumnInfo* column = pQuery->colList + i;
      freeColumnFilterInfo(column->filters, column->numOfFilters);
    }
S
Shengliang Guan 已提交
6103
    taosTFree(pQuery->colList);
B
Bomin Zhang 已提交
6104 6105
  }

S
Shengliang Guan 已提交
6106 6107
  taosTFree(pQuery->sdata);
  taosTFree(pQuery);
6108
  pQInfo->signature = 0;
6109

6110
  qDebug("QInfo:%p QInfo is freed", pQInfo);
6111

S
Shengliang Guan 已提交
6112
  taosTFree(pQInfo);
H
hjxilinx 已提交
6113 6114
}

H
hjxilinx 已提交
6115
static size_t getResultSize(SQInfo *pQInfo, int64_t *numOfRows) {
6116 6117
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

H
hjxilinx 已提交
6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128
  /*
   * get the file size and set the numOfRows to be the file size, since for tsComp query,
   * the returned row size is equalled to 1
   * TODO handle the case that the file is too large to send back one time
   */
  if (isTSCompQuery(pQuery) && (*numOfRows) > 0) {
    struct stat fstat;
    if (stat(pQuery->sdata[0]->data, &fstat) == 0) {
      *numOfRows = fstat.st_size;
      return fstat.st_size;
    } else {
S
slguan 已提交
6129
      qError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno));
H
hjxilinx 已提交
6130 6131 6132 6133
      return 0;
    }
  } else {
    return pQuery->rowSize * (*numOfRows);
6134
  }
H
hjxilinx 已提交
6135
}
6136

H
hjxilinx 已提交
6137 6138 6139
static int32_t doDumpQueryResult(SQInfo *pQInfo, char *data) {
  // the remained number of retrieved rows, not the interpolated result
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6140

H
hjxilinx 已提交
6141 6142 6143
  // load data from file to msg buffer
  if (isTSCompQuery(pQuery)) {
    int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666);
6144

H
hjxilinx 已提交
6145 6146
    // make sure file exist
    if (FD_VALID(fd)) {
dengyihao's avatar
dengyihao 已提交
6147 6148
      int32_t s = lseek(fd, 0, SEEK_END);
      UNUSED(s);
6149
      qDebug("QInfo:%p ts comp data return, file:%s, size:%d", pQInfo, pQuery->sdata[0]->data, s);
H
Haojun Liao 已提交
6150
      if (lseek(fd, 0, SEEK_SET) >= 0) {
dengyihao's avatar
dengyihao 已提交
6151 6152
        size_t sz = read(fd, data, s);
        UNUSED(sz);
H
Haojun Liao 已提交
6153 6154
      } else {
        // todo handle error
dengyihao's avatar
dengyihao 已提交
6155
      }
H
Haojun Liao 已提交
6156

H
hjxilinx 已提交
6157 6158 6159
      close(fd);
      unlink(pQuery->sdata[0]->data);
    } else {
dengyihao's avatar
dengyihao 已提交
6160
      // todo return the error code to client and handle invalid fd
S
slguan 已提交
6161
      qError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo,
H
hjxilinx 已提交
6162
             pQuery->sdata[0]->data, strerror(errno));
dengyihao's avatar
dengyihao 已提交
6163 6164 6165
      if (fd != -1) {
        close(fd); 
      }
H
hjxilinx 已提交
6166
    }
6167

H
hjxilinx 已提交
6168 6169 6170 6171
    // all data returned, set query over
    if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
      setQueryStatus(pQuery, QUERY_OVER);
    }
H
hjxilinx 已提交
6172
  } else {
6173
    doCopyQueryResultToMsg(pQInfo, pQuery->rec.rows, data);
6174
  }
6175

6176
  pQuery->rec.total += pQuery->rec.rows;
6177
  qDebug("QInfo:%p current numOfRes rows:%" PRId64 ", total:%" PRId64, pQInfo, pQuery->rec.rows, pQuery->rec.total);
6178

6179
  if (pQuery->limit.limit > 0 && pQuery->limit.limit == pQuery->rec.total) {
6180
    qDebug("QInfo:%p results limitation reached, limitation:%"PRId64, pQInfo, pQuery->limit.limit);
6181 6182 6183
    setQueryStatus(pQuery, QUERY_OVER);
  }
  
H
hjxilinx 已提交
6184
  return TSDB_CODE_SUCCESS;
6185 6186
}

6187 6188 6189 6190 6191 6192 6193
typedef struct SQueryMgmt {
  SCacheObj      *qinfoPool;      // query handle pool
  int32_t         vgId;
  bool            closed;
  pthread_mutex_t lock;
} SQueryMgmt;

6194
int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) {
6195
  assert(pQueryMsg != NULL && tsdb != NULL);
6196 6197

  int32_t code = TSDB_CODE_SUCCESS;
6198

6199 6200 6201 6202 6203 6204 6205 6206
  char            *tagCond  = NULL;
  char            *tbnameCond = NULL;
  SArray          *pTableIdList = NULL;
  SSqlFuncMsg    **pExprMsg = NULL;
  SExprInfo       *pExprs   = NULL;
  SColIndex       *pGroupColIndex = NULL;
  SColumnInfo     *pTagColumnInfo = NULL;
  SSqlGroupbyExpr *pGroupbyExpr   = NULL;
6207

6208 6209
  code = convertQueryMsg(pQueryMsg, &pTableIdList, &pExprMsg, &tagCond, &tbnameCond, &pGroupColIndex, &pTagColumnInfo);
  if (code != TSDB_CODE_SUCCESS) {
B
Bomin Zhang 已提交
6210
    goto _over;
6211 6212
  }

H
hjxilinx 已提交
6213
  if (pQueryMsg->numOfTables <= 0) {
S
slguan 已提交
6214
    qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables);
6215
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6216
    goto _over;
6217 6218
  }

H
hjxilinx 已提交
6219
  if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) {
S
slguan 已提交
6220
    qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg);
6221
    code = TSDB_CODE_QRY_INVALID_MSG;
H
hjxilinx 已提交
6222
    goto _over;
6223 6224
  }

H
Haojun Liao 已提交
6225
  if ((code = createQFunctionExprFromMsg(pQueryMsg, &pExprs, pExprMsg, pTagColumnInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6226
    goto _over;
6227 6228
  }

dengyihao's avatar
dengyihao 已提交
6229
  pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, pGroupColIndex, &code);
H
hjxilinx 已提交
6230
  if ((pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6231
    goto _over;
6232
  }
6233

H
hjxilinx 已提交
6234
  bool isSTableQuery = false;
6235
  STableGroupInfo tableGroupInfo = {0};
6236 6237
  int64_t st = taosGetTimestampUs();

H
Haojun Liao 已提交
6238
  if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6239
    STableIdInfo *id = taosArrayGet(pTableIdList, 0);
H
Haojun Liao 已提交
6240

6241
    qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6242
    if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6243
      goto _over;
6244
    }
H
Haojun Liao 已提交
6245
  } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) {
6246
    isSTableQuery = true;
H
Haojun Liao 已提交
6247 6248 6249

    // also note there's possibility that only one table in the super table
    if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) {
weixin_48148422's avatar
weixin_48148422 已提交
6250 6251 6252 6253 6254 6255 6256
      STableIdInfo *id = taosArrayGet(pTableIdList, 0);

      // group by normal column, do not pass the group by condition to tsdb to group table into different group
      int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols;
      if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(pGroupColIndex->flag)) {
        numOfGroupByCols = 0;
      }
6257 6258

      qDebug("qmsg:%p query stable, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid);
H
Haojun Liao 已提交
6259 6260 6261
      code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, tagCond, pQueryMsg->tagCondLen,
          pQueryMsg->tagNameRelType, tbnameCond, &tableGroupInfo, pGroupColIndex, numOfGroupByCols);

6262
      if (code != TSDB_CODE_SUCCESS) {
6263
        qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code));
6264 6265
        goto _over;
      }
weixin_48148422's avatar
weixin_48148422 已提交
6266
    } else {
6267 6268 6269 6270
      code = tsdbGetTableGroupFromIdList(tsdb, pTableIdList, &tableGroupInfo);
      if (code != TSDB_CODE_SUCCESS) {
        goto _over;
      }
H
Haojun Liao 已提交
6271

6272
      qDebug("qmsg:%p query on %zu tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables);
6273
    }
6274 6275

    int64_t el = taosGetTimestampUs() - st;
6276
    qDebug("qmsg:%p tag filter completed, numOfTables:%zu, elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el);
H
hjxilinx 已提交
6277
  } else {
6278
    assert(0);
6279
  }
6280

6281
  (*pQInfo) = createQInfoImpl(pQueryMsg, pTableIdList, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo);
dengyihao's avatar
dengyihao 已提交
6282 6283 6284 6285
  pExprs = NULL;
  pGroupbyExpr = NULL;
  pTagColumnInfo = NULL;
  
6286
  if ((*pQInfo) == NULL) {
6287
    code = TSDB_CODE_QRY_OUT_OF_MEMORY;
H
hjxilinx 已提交
6288
    goto _over;
6289
  }
6290

6291
  code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery);
6292

H
hjxilinx 已提交
6293
_over:
dengyihao's avatar
dengyihao 已提交
6294 6295 6296
  free(tagCond);
  free(tbnameCond);
  free(pGroupColIndex);
dengyihao's avatar
dengyihao 已提交
6297 6298
  if (pGroupbyExpr != NULL) {
    taosArrayDestroy(pGroupbyExpr->columnInfo);
dengyihao's avatar
dengyihao 已提交
6299
    free(pGroupbyExpr);
dengyihao's avatar
dengyihao 已提交
6300
  } 
dengyihao's avatar
dengyihao 已提交
6301 6302
  free(pTagColumnInfo);
  free(pExprs);
dengyihao's avatar
dengyihao 已提交
6303
  free(pExprMsg);
H
hjxilinx 已提交
6304
  taosArrayDestroy(pTableIdList);
6305

B
Bomin Zhang 已提交
6306 6307 6308 6309 6310
  for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) {
    SColumnInfo* column = pQueryMsg->colList + i;
    freeColumnFilterInfo(column->filters, column->numOfFilters);
  }

H
Haojun Liao 已提交
6311
  //pQInfo already freed in initQInfo, but *pQInfo may not pointer to null;
6312 6313 6314 6315
  if (code != TSDB_CODE_SUCCESS) {
    *pQInfo = NULL;
  }

6316
  // if failed to add ref for all tables in this query, abort current query
6317
  return code;
H
hjxilinx 已提交
6318 6319
}

H
Haojun Liao 已提交
6320
void qDestroyQueryInfo(qinfo_t qHandle) {
H
Haojun Liao 已提交
6321 6322 6323 6324 6325
  SQInfo* pQInfo = (SQInfo*) qHandle;
  if (!isValidQInfo(pQInfo)) {
    return;
  }

H
Haojun Liao 已提交
6326 6327 6328
  qDebug("QInfo:%p query completed", pQInfo);
  queryCostStatis(pQInfo);   // print the query cost summary
  freeQInfo(pQInfo);
H
Haojun Liao 已提交
6329 6330
}

6331 6332 6333 6334 6335 6336 6337 6338 6339 6340
static bool doBuildResCheck(SQInfo* pQInfo) {
  bool buildRes = false;

  pthread_mutex_lock(&pQInfo->lock);

  pQInfo->dataReady = QUERY_RESULT_READY;
  buildRes = (pQInfo->rspContext != NULL);

  pthread_mutex_unlock(&pQInfo->lock);

H
Haojun Liao 已提交
6341 6342 6343 6344
  // clear qhandle owner
//  assert(pQInfo->owner == pthread_self());
//  pQInfo->owner = 0;

6345 6346 6347
  return buildRes;
}

6348
bool qTableQuery(qinfo_t qinfo) {
6349
  SQInfo *pQInfo = (SQInfo *)qinfo;
H
Haojun Liao 已提交
6350 6351 6352 6353 6354 6355 6356 6357 6358
  assert(pQInfo && pQInfo->signature == pQInfo);
//  int64_t threadId = pthread_self();

//  int64_t curOwner = 0;
//  if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) {
//    qError("QInfo:%p qhandle is now executed by thread:%p", pQInfo, (void*) curOwner);
//    pQInfo->code = TSDB_CODE_QRY_IN_EXEC;
//    return false;
//  }
6359

H
Haojun Liao 已提交
6360
  if (IS_QUERY_KILLED(pQInfo)) {
6361
    qDebug("QInfo:%p it is already killed, abort", pQInfo);
6362
    return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6363
  }
6364

6365 6366
  if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) {
    qDebug("QInfo:%p no table exists for query, abort", pQInfo);
6367 6368
    setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED);
    return doBuildResCheck(pQInfo);
6369 6370 6371
  }

  // error occurs, record the error code and return to client
H
Haojun Liao 已提交
6372
  int32_t ret = setjmp(pQInfo->runtimeEnv.env);
6373 6374
  if (ret != TSDB_CODE_SUCCESS) {
    pQInfo->code = ret;
H
Haojun Liao 已提交
6375
    qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code));
6376
    return doBuildResCheck(pQInfo);
6377 6378
  }

6379
  qDebug("QInfo:%p query task is launched", pQInfo);
6380

6381
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
H
hjxilinx 已提交
6382
  if (onlyQueryTags(pQInfo->runtimeEnv.pQuery)) {
H
Haojun Liao 已提交
6383
    assert(pQInfo->runtimeEnv.pQueryHandle == NULL);
6384
    buildTagQueryResult(pQInfo);
H
hjxilinx 已提交
6385
  } else if (pQInfo->runtimeEnv.stableQuery) {
6386
    stableQueryImpl(pQInfo);
H
hjxilinx 已提交
6387
  } else {
6388
    tableQueryImpl(pQInfo);
H
hjxilinx 已提交
6389
  }
6390

6391
  SQuery* pQuery = pRuntimeEnv->pQuery;
H
Haojun Liao 已提交
6392
  if (IS_QUERY_KILLED(pQInfo)) {
6393 6394 6395 6396 6397 6398 6399 6400
    qDebug("QInfo:%p query is killed", pQInfo);
  } else if (pQuery->rec.rows == 0) {
    qDebug("QInfo:%p over, %zu tables queried, %"PRId64" rows are returned", pQInfo, pQInfo->tableqinfoGroupInfo.numOfTables, pQuery->rec.total);
  } else {
    qDebug("QInfo:%p query paused, %" PRId64 " rows returned, numOfTotal:%" PRId64 " rows",
           pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows);
  }

6401
  return doBuildResCheck(pQInfo);
H
hjxilinx 已提交
6402 6403
}

6404
int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) {
6405 6406
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6407
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6408
    return TSDB_CODE_QRY_INVALID_QHANDLE;
H
hjxilinx 已提交
6409
  }
6410

H
hjxilinx 已提交
6411
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
H
Haojun Liao 已提交
6412
  if (IS_QUERY_KILLED(pQInfo)) {
6413
    qDebug("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6414
    return pQInfo->code;
H
hjxilinx 已提交
6415
  }
6416

6417
  int32_t code = TSDB_CODE_SUCCESS;
6418 6419 6420 6421 6422 6423
  pthread_mutex_lock(&pQInfo->lock);
  if (pQInfo->dataReady == QUERY_RESULT_READY) {
    *buildRes = true;
    qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows,
           pQInfo->code);
  } else {
H
Haojun Liao 已提交
6424
    *buildRes = false;
6425
    qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo);
6426 6427
    pQInfo->rspContext = pRspContext;
  }
6428

6429
  code = pQInfo->code;
6430
  pthread_mutex_unlock(&pQInfo->lock);
6431
  return code;
H
hjxilinx 已提交
6432
}
6433

H
hjxilinx 已提交
6434
bool qHasMoreResultsToRetrieve(qinfo_t qinfo) {
6435 6436
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
Haojun Liao 已提交
6437
  if (!isValidQInfo(pQInfo) || pQInfo->code != TSDB_CODE_SUCCESS) {
6438
    qDebug("QInfo:%p invalid qhandle or error occurs, abort query, code:%x", pQInfo, pQInfo->code);
H
hjxilinx 已提交
6439 6440
    return false;
  }
6441 6442

  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
6443

H
Haojun Liao 已提交
6444
  bool ret = false;
H
hjxilinx 已提交
6445
  if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
H
Haojun Liao 已提交
6446
    ret = false;
H
hjxilinx 已提交
6447
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) {
H
Haojun Liao 已提交
6448
    ret = true;
H
hjxilinx 已提交
6449
  } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) {
H
Haojun Liao 已提交
6450
    ret = true;
H
hjxilinx 已提交
6451 6452
  } else {
    assert(0);
6453
  }
H
Haojun Liao 已提交
6454 6455

  if (ret) {
6456
    qDebug("QInfo:%p has more results waits for client retrieve", pQInfo);
H
Haojun Liao 已提交
6457 6458 6459
  }

  return ret;
6460 6461
}

6462
int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) {
6463 6464
  SQInfo *pQInfo = (SQInfo *)qinfo;

H
hjxilinx 已提交
6465
  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
6466
    return TSDB_CODE_QRY_INVALID_QHANDLE;
6467
  }
6468

6469
  SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv;
6470 6471
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;
  size_t  size = getResultSize(pQInfo, &pQuery->rec.rows);
6472

weixin_48148422's avatar
weixin_48148422 已提交
6473 6474
  size += sizeof(int32_t);
  size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo);
6475

6476
  *contLen = size + sizeof(SRetrieveTableRsp);
6477

B
Bomin Zhang 已提交
6478 6479
  // todo proper handle failed to allocate memory,
  // current solution only avoid crash, but cannot return error code to client
6480
  *pRsp = (SRetrieveTableRsp *)rpcMallocCont(*contLen);
B
Bomin Zhang 已提交
6481 6482 6483
  if (*pRsp == NULL) {
    return TSDB_CODE_QRY_OUT_OF_MEMORY;
  }
6484

6485
  (*pRsp)->numOfRows = htonl(pQuery->rec.rows);
6486

6487 6488 6489
  int32_t code = pQInfo->code;
  if (code == TSDB_CODE_SUCCESS) {
    (*pRsp)->offset = htobe64(pQuery->limit.offset);
6490
    (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime);
6491 6492
  } else {
    (*pRsp)->useconds = 0;
6493
    (*pRsp)->offset = 0;
6494
  }
6495 6496
  
  (*pRsp)->precision = htons(pQuery->precision);
6497
  if (pQuery->rec.rows > 0 && code == TSDB_CODE_SUCCESS) {
H
hjxilinx 已提交
6498
    code = doDumpQueryResult(pQInfo, (*pRsp)->data);
6499
  } else {
H
hjxilinx 已提交
6500
    setQueryStatus(pQuery, QUERY_OVER);
6501
    code = pQInfo->code;
6502
  }
6503

6504 6505 6506
  pQInfo->rspContext = NULL;
  pQInfo->dataReady = QUERY_RESULT_NOT_READY;

H
Haojun Liao 已提交
6507
  if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) {
6508
    (*pRsp)->completed = 1;  // notify no more result to client
H
hjxilinx 已提交
6509
  }
6510

6511 6512 6513 6514 6515 6516 6517
  if (qHasMoreResultsToRetrieve(pQInfo)) {
    *continueExec = true;
  } else { // failed to dump result, free qhandle immediately
    *continueExec = false;
    qKillQuery(pQInfo);
  }

H
hjxilinx 已提交
6518
  return code;
6519
}
H
hjxilinx 已提交
6520

H
Haojun Liao 已提交
6521
int32_t qKillQuery(qinfo_t qinfo) {
H
Haojun Liao 已提交
6522 6523 6524 6525 6526 6527 6528 6529 6530 6531
  SQInfo *pQInfo = (SQInfo *)qinfo;

  if (pQInfo == NULL || !isValidQInfo(pQInfo)) {
    return TSDB_CODE_QRY_INVALID_QHANDLE;
  }

  setQueryKilled(pQInfo);
  return TSDB_CODE_SUCCESS;
}

6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547
static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes) {
  if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
    if (val == NULL) {
      setVardataNull(output, type);
    } else {
      memcpy(output, val, varDataTLen(val));
    }
  } else {
    if (val == NULL) {
      setNull(output, type, bytes);
    } else {  // todo here stop will cause client crash
      memcpy(output, val, bytes);
    }
  }
}

H
hjxilinx 已提交
6548 6549 6550
static void buildTagQueryResult(SQInfo* pQInfo) {
  SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv;
  SQuery *          pQuery = pRuntimeEnv->pQuery;
6551

H
Haojun Liao 已提交
6552
  size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pQInfo);
H
Haojun Liao 已提交
6553
  assert(numOfGroup == 0 || numOfGroup == 1);
6554

H
Haojun Liao 已提交
6555
  if (numOfGroup == 0) {
6556 6557
    return;
  }
H
hjxilinx 已提交
6558
  
H
Haojun Liao 已提交
6559
  SArray* pa = GET_TABLEGROUP(pQInfo, 0);
6560

H
Haojun Liao 已提交
6561
  size_t num = taosArrayGetSize(pa);
6562
  assert(num == pQInfo->tableqinfoGroupInfo.numOfTables);
6563

H
Haojun Liao 已提交
6564
  int32_t count = 0;
6565 6566 6567
  int32_t functionId = pQuery->pSelectExpr[0].base.functionId;
  if (functionId == TSDB_FUNC_TID_TAG) { // return the tags & table Id
    assert(pQuery->numOfOutput == 1);
6568

6569 6570
    SExprInfo* pExprInfo = &pQuery->pSelectExpr[0];
    int32_t rsize = pExprInfo->bytes;
H
Haojun Liao 已提交
6571
    count = 0;
6572

H
Haojun Liao 已提交
6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583
    int16_t bytes = pExprInfo->bytes;
    int16_t type = pExprInfo->type;

    for(int32_t i = 0; i < pQuery->numOfTags; ++i) {
      if (pQuery->tagColList[i].colId == pExprInfo->base.colInfo.colId) {
        bytes = pQuery->tagColList[i].bytes;
        type = pQuery->tagColList[i].type;
        break;
      }
    }

H
Haojun Liao 已提交
6584 6585
    while(pQInfo->tableIndex < num && count < pQuery->rec.capacity) {
      int32_t i = pQInfo->tableIndex++;
6586
      STableQueryInfo *item = taosArrayGetP(pa, i);
6587

6588
      char *output = pQuery->sdata[0]->data + i * rsize;
6589
      varDataSetLen(output, rsize - VARSTR_HEADER_SIZE);
6590

6591
      output = varDataVal(output);
H
Haojun Liao 已提交
6592
      STableId* id = TSDB_TABLEID(item->pTable);
6593

H
Haojun Liao 已提交
6594 6595
      *(int64_t *)output = id->uid;  // memory align problem, todo serialize
      output += sizeof(id->uid);
6596

H
Haojun Liao 已提交
6597 6598
      *(int32_t *)output = id->tid;
      output += sizeof(id->tid);
6599

6600
      *(int32_t *)output = pQInfo->vgId;
6601
      output += sizeof(pQInfo->vgId);
6602

6603
      if (pExprInfo->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6604
        char* data = tsdbGetTableName(item->pTable);
6605
        memcpy(output, data, varDataTLen(data));
H
[td-90]  
Haojun Liao 已提交
6606
      } else {
6607 6608
        char* data = tsdbGetTableTagVal(item->pTable, pExprInfo->base.colInfo.colId, type, bytes);
        doSetTagValueToResultBuf(output, data, type, bytes);
H
[td-90]  
Haojun Liao 已提交
6609
      }
6610

H
Haojun Liao 已提交
6611
      count += 1;
6612
    }
6613

6614
    qDebug("QInfo:%p create (tableId, tag) info completed, rows:%d", pQInfo, count);
6615

H
Haojun Liao 已提交
6616 6617 6618 6619 6620
  } else if (functionId == TSDB_FUNC_COUNT) {// handle the "count(tbname)" query
    *(int64_t*) pQuery->sdata[0]->data = num;

    count = 1;
    pQInfo->tableIndex = num;  //set query completed
6621
    qDebug("QInfo:%p create count(tbname) query, res:%d rows:1", pQInfo, count);
6622
  } else {  // return only the tags|table name etc.
H
Haojun Liao 已提交
6623
    count = 0;
H
Haojun Liao 已提交
6624
    SSchema tbnameSchema = tGetTableNameColumnSchema();
6625

6626 6627 6628 6629 6630
    int32_t maxNumOfTables = pQuery->rec.capacity;
    if (pQuery->limit.limit >= 0 && pQuery->limit.limit < pQuery->rec.capacity) {
      maxNumOfTables = pQuery->limit.limit;
    }

6631
    while(pQInfo->tableIndex < num && count < maxNumOfTables) {
H
Haojun Liao 已提交
6632
      int32_t i = pQInfo->tableIndex++;
6633

6634 6635 6636 6637 6638 6639
      // discard current result due to offset
      if (pQuery->limit.offset > 0) {
        pQuery->limit.offset -= 1;
        continue;
      }

6640
      SExprInfo* pExprInfo = pQuery->pSelectExpr;
6641
      STableQueryInfo* item = taosArrayGetP(pa, i);
6642

6643 6644
      char *data = NULL, *dst = NULL;
      int16_t type = 0, bytes = 0;
6645
      for(int32_t j = 0; j < pQuery->numOfOutput; ++j) {
6646

6647
        if (pExprInfo[j].base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) {
6648 6649 6650 6651 6652 6653 6654 6655
          bytes = tbnameSchema.bytes;
          type = tbnameSchema.type;

          data = tsdbGetTableName(item->pTable);
          dst = pQuery->sdata[j]->data + count * tbnameSchema.bytes;
        } else {
          type = pExprInfo[j].type;
          bytes = pExprInfo[j].bytes;
H
[td-90]  
Haojun Liao 已提交
6656
          
6657 6658
          data = tsdbGetTableTagVal(item->pTable, pExprInfo[j].base.colInfo.colId, type, bytes);
          dst = pQuery->sdata[j]->data + count * pExprInfo[j].bytes;
6659

6660
        }
6661 6662

        doSetTagValueToResultBuf(dst, data, type, bytes);
H
hjxilinx 已提交
6663
      }
H
Haojun Liao 已提交
6664
      count += 1;
H
hjxilinx 已提交
6665
    }
6666

6667
    qDebug("QInfo:%p create tag values results completed, rows:%d", pQInfo, count);
H
hjxilinx 已提交
6668
  }
6669

H
Haojun Liao 已提交
6670
  pQuery->rec.rows = count;
H
hjxilinx 已提交
6671
  setQueryStatus(pQuery, QUERY_COMPLETED);
H
hjxilinx 已提交
6672 6673
}

6674 6675 6676 6677 6678 6679 6680
void* qGetResultRetrieveMsg(qinfo_t qinfo) {
  SQInfo* pQInfo = (SQInfo*) qinfo;
  assert(pQInfo != NULL);

  return pQInfo->rspContext;
}

6681 6682 6683 6684 6685 6686 6687
void freeqinfoFn(void *qhandle) {
  void** handle = qhandle;
  if (handle == NULL || *handle == NULL) {
    return;
  }

  qKillQuery(*handle);
H
Haojun Liao 已提交
6688
  qDestroyQueryInfo(*handle);
6689 6690 6691
}

void* qOpenQueryMgmt(int32_t vgId) {
H
Haojun Liao 已提交
6692
  const int32_t REFRESH_HANDLE_INTERVAL = 60; // every 30 seconds, refresh handle pool
6693 6694 6695 6696

  char cacheName[128] = {0};
  sprintf(cacheName, "qhandle_%d", vgId);

6697
  SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt));
6698

6699 6700 6701 6702 6703
  pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName);
  pQueryMgmt->closed    = false;
  pQueryMgmt->vgId      = vgId;

  pthread_mutex_init(&pQueryMgmt->lock, NULL);
6704 6705

  qDebug("vgId:%d, open querymgmt success", vgId);
6706
  return pQueryMgmt;
6707 6708
}

H
Haojun Liao 已提交
6709
static void queryMgmtKillQueryFn(void* handle) {
H
Hui Li 已提交
6710 6711
  void** fp = (void**)handle;
  qKillQuery(*fp);
H
Haojun Liao 已提交
6712 6713 6714
}

void qQueryMgmtNotifyClosed(void* pQMgmt) {
6715 6716 6717 6718 6719 6720 6721
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  qDebug("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId);

H
Haojun Liao 已提交
6722
//  pthread_mutex_lock(&pQueryMgmt->lock);
6723
  pQueryMgmt->closed = true;
H
Haojun Liao 已提交
6724
//  pthread_mutex_unlock(&pQueryMgmt->lock);
6725

H
Haojun Liao 已提交
6726
  taosCacheRefresh(pQueryMgmt->qinfoPool, queryMgmtKillQueryFn);
6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743
}

void qCleanupQueryMgmt(void* pQMgmt) {
  if (pQMgmt == NULL) {
    return;
  }

  SQueryMgmt* pQueryMgmt = pQMgmt;
  int32_t vgId = pQueryMgmt->vgId;

  assert(pQueryMgmt->closed);

  SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool;
  pQueryMgmt->qinfoPool = NULL;

  taosCacheCleanup(pqinfoPool);
  pthread_mutex_destroy(&pQueryMgmt->lock);
S
Shengliang Guan 已提交
6744
  taosTFree(pQueryMgmt);
6745

6746
  qDebug("vgId:%d queryMgmt cleanup completed", vgId);
6747 6748
}

6749
void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) {
6750 6751 6752 6753
  if (pMgmt == NULL) {
    return NULL;
  }

6754 6755
  const int32_t DEFAULT_QHANDLE_LIFE_SPAN = tsShellActivityTimer * 2;

6756 6757
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
6758
    qError("QInfo:%p failed to add qhandle into qMgmt, since qMgmt is closed", (void *)qInfo);
6759 6760 6761
    return NULL;
  }

H
Haojun Liao 已提交
6762
//  pthread_mutex_lock(&pQueryMgmt->lock);
6763
  if (pQueryMgmt->closed) {
H
Haojun Liao 已提交
6764
//    pthread_mutex_unlock(&pQueryMgmt->lock);
6765
    qError("QInfo:%p failed to add qhandle into cache, since qMgmt is colsing", (void *)qInfo);
6766 6767
    return NULL;
  } else {
6768 6769 6770
    uint64_t handleVal = (uint64_t) qInfo;

    void** handle = taosCachePut(pQueryMgmt->qinfoPool, &handleVal, sizeof(int64_t), &qInfo, POINTER_BYTES, DEFAULT_QHANDLE_LIFE_SPAN);
H
Haojun Liao 已提交
6771
//    pthread_mutex_unlock(&pQueryMgmt->lock);
6772 6773 6774 6775 6776

    return handle;
  }
}

6777
void** qAcquireQInfo(void* pMgmt, uint64_t key) {
6778 6779 6780 6781 6782 6783
  SQueryMgmt *pQueryMgmt = pMgmt;

  if (pQueryMgmt->qinfoPool == NULL || pQueryMgmt->closed) {
    return NULL;
  }

6784
  void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &key, sizeof(uint64_t));
6785 6786 6787 6788 6789 6790 6791
  if (handle == NULL || *handle == NULL) {
    return NULL;
  } else {
    return handle;
  }
}

H
Haojun Liao 已提交
6792
void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool freeHandle) {
6793 6794 6795 6796 6797
  SQueryMgmt *pQueryMgmt = pMgmt;
  if (pQueryMgmt->qinfoPool == NULL) {
    return NULL;
  }

H
Haojun Liao 已提交
6798
  taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle);
6799 6800 6801
  return 0;
}

6802